SeleniumBase/seleniumbase/undetected/cdp_driver/cdp_util.py

600 lines
21 KiB
Python

"""CDP-Driver is based on NoDriver"""
from __future__ import annotations
import asyncio
import fasteners
import logging
import os
import sys
import time
import types
import typing
from contextlib import suppress
from seleniumbase import config as sb_config
from seleniumbase.config import settings
from seleniumbase.core import detect_b_ver
from seleniumbase.core import proxy_helper
from seleniumbase.fixtures import constants
from seleniumbase.fixtures import shared_utils
from typing import Optional, List, Union, Callable
from .element import Element
from .browser import Browser
from .browser import PathLike
from .config import Config
from .tab import Tab
import mycdp as cdp
logger = logging.getLogger(__name__)
IS_LINUX = shared_utils.is_linux()
PROXY_DIR_LOCK = proxy_helper.PROXY_DIR_LOCK
T = typing.TypeVar("T")
def __activate_standard_virtual_display():
from sbvirtualdisplay import Display
width = settings.HEADLESS_START_WIDTH
height = settings.HEADLESS_START_HEIGHT
with suppress(Exception):
_xvfb_display = Display(
visible=0, size=(width, height)
)
_xvfb_display.start()
sb_config._virtual_display = _xvfb_display
sb_config.headless_active = True
def __activate_virtual_display_as_needed(
headless, headed, xvfb, xvfb_metrics
):
"""This is only needed on Linux."""
if IS_LINUX and (not headed or xvfb):
from sbvirtualdisplay import Display
pip_find_lock = fasteners.InterProcessLock(
constants.PipInstall.FINDLOCK
)
with pip_find_lock: # Prevent issues with multiple processes
if not headless:
import Xlib.display
try:
_xvfb_width = None
_xvfb_height = None
if xvfb_metrics:
with suppress(Exception):
metrics_string = xvfb_metrics
metrics_string = metrics_string.replace(" ", "")
metrics_list = metrics_string.split(",")[0:2]
_xvfb_width = int(metrics_list[0])
_xvfb_height = int(metrics_list[1])
# The minimum width,height is: 1024,768
if _xvfb_width < 1024:
_xvfb_width = 1024
sb_config._xvfb_width = _xvfb_width
if _xvfb_height < 768:
_xvfb_height = 768
sb_config._xvfb_height = _xvfb_height
xvfb = True
if not _xvfb_width:
_xvfb_width = 1366
if not _xvfb_height:
_xvfb_height = 768
_xvfb_display = Display(
visible=True,
size=(_xvfb_width, _xvfb_height),
backend="xvfb",
use_xauth=True,
)
_xvfb_display.start()
if "DISPLAY" not in os.environ.keys():
print(
"\nX11 display failed! Will use regular xvfb!"
)
__activate_standard_virtual_display()
else:
sb_config._virtual_display = _xvfb_display
sb_config.headless_active = True
except Exception as e:
if hasattr(e, "msg"):
print("\n" + str(e.msg))
else:
print(e)
print("\nX11 display failed! Will use regular xvfb!")
__activate_standard_virtual_display()
return
pyautogui_is_installed = False
try:
import pyautogui
with suppress(Exception):
use_pyautogui_ver = constants.PyAutoGUI.VER
if pyautogui.__version__ != use_pyautogui_ver:
del pyautogui # To get newer ver
shared_utils.pip_install(
"pyautogui", version=use_pyautogui_ver
)
import pyautogui
pyautogui_is_installed = True
except Exception:
message = (
"PyAutoGUI is required for UC Mode on Linux! "
"Installing now..."
)
print("\n" + message)
shared_utils.pip_install(
"pyautogui", version=constants.PyAutoGUI.VER
)
import pyautogui
pyautogui_is_installed = True
if (
pyautogui_is_installed
and hasattr(pyautogui, "_pyautogui_x11")
):
try:
pyautogui._pyautogui_x11._display = (
Xlib.display.Display(os.environ['DISPLAY'])
)
sb_config._pyautogui_x11_display = (
pyautogui._pyautogui_x11._display
)
except Exception as e:
if hasattr(e, "msg"):
print("\n" + str(e.msg))
else:
print(e)
else:
__activate_standard_virtual_display()
def __set_proxy_filenames():
DOWNLOADS_DIR = constants.Files.DOWNLOADS_FOLDER
for num in range(1000):
PROXY_DIR_PATH = os.path.join(DOWNLOADS_DIR, "proxy_ext_dir_%s" % num)
if os.path.exists(PROXY_DIR_PATH):
continue
proxy_helper.PROXY_DIR_PATH = PROXY_DIR_PATH
return
# Exceeded upper bound. Use Defaults:
PROXY_DIR_PATH = os.path.join(DOWNLOADS_DIR, "proxy_ext_dir")
proxy_helper.PROXY_DIR_PATH = PROXY_DIR_PATH
def __add_chrome_ext_dir(extension_dir, dir_path):
# Add dir_path to the existing extension_dir
option_exists = False
if extension_dir:
option_exists = True
extension_dir = "%s,%s" % (
extension_dir, os.path.realpath(dir_path)
)
if not option_exists:
extension_dir = os.path.realpath(dir_path)
return extension_dir
def __add_chrome_proxy_extension(
extension_dir,
proxy_string,
proxy_user,
proxy_pass,
proxy_bypass_list=None,
multi_proxy=False,
):
"""Implementation of https://stackoverflow.com/a/35293284/7058266
for https://stackoverflow.com/q/12848327/7058266
(Run Selenium on a proxy server that requires authentication.)"""
args = " ".join(sys.argv)
bypass_list = proxy_bypass_list
if (
not ("-n" in sys.argv or " -n=" in args or args == "-c")
and not multi_proxy
):
# Single-threaded
proxy_dir_lock = fasteners.InterProcessLock(PROXY_DIR_LOCK)
with proxy_dir_lock:
proxy_helper.create_proxy_ext(
proxy_string,
proxy_user,
proxy_pass,
bypass_list,
zip_it=False,
)
proxy_dir_path = proxy_helper.PROXY_DIR_PATH
extension_dir = __add_chrome_ext_dir(
extension_dir, proxy_dir_path
)
else:
# Multi-threaded
proxy_dir_lock = fasteners.InterProcessLock(PROXY_DIR_LOCK)
with proxy_dir_lock:
with suppress(Exception):
shared_utils.make_writable(PROXY_DIR_LOCK)
if multi_proxy:
__set_proxy_filenames()
if not os.path.exists(proxy_helper.PROXY_DIR_PATH):
proxy_helper.create_proxy_ext(
proxy_string,
proxy_user,
proxy_pass,
bypass_list,
zip_it=False,
)
extension_dir = __add_chrome_ext_dir(
extension_dir, proxy_helper.PROXY_DIR_PATH
)
return extension_dir
async def start(
config: Optional[Config] = None,
*,
user_data_dir: Optional[PathLike] = None,
headless: Optional[bool] = False,
incognito: Optional[bool] = False,
guest: Optional[bool] = False,
browser_executable_path: Optional[PathLike] = None,
browser_args: Optional[List[str]] = None,
xvfb_metrics: Optional[List[str]] = None, # "Width,Height" for Linux
sandbox: Optional[bool] = True,
lang: Optional[str] = None, # Set the Language Locale Code
host: Optional[str] = None, # Chrome remote-debugging-host
port: Optional[int] = None, # Chrome remote-debugging-port
xvfb: Optional[int] = None, # Use a special virtual display on Linux
headed: Optional[bool] = None, # Override default Xvfb mode on Linux
expert: Optional[bool] = None, # Open up closed Shadow-root elements
proxy: Optional[str] = None, # "host:port" or "user:pass@host:port"
extension_dir: Optional[str] = None, # Chrome extension directory
**kwargs: Optional[dict],
) -> Browser:
"""
Helper function to launch a browser. It accepts several keyword parameters.
Conveniently, you can just call it bare (no parameters) to quickly launch
an instance with best practice defaults.
Note: Due to a Chrome-130 bug, use start_async or start_sync instead.
(Calling this method directly could lead to an unresponsive browser)
Note: New args are expected: Use kwargs only!
Note: This should be called ``await start()``
:param user_data_dir:
:type user_data_dir: PathLike
:param headless:
:type headless: bool
:param browser_executable_path:
:type browser_executable_path: PathLike
:param browser_args:
["--some-chromeparam=somevalue", "some-other-param=someval"]
:type browser_args: List[str]
:param sandbox: Default True, but when set to False it adds --no-sandbox
to the params, also when using linux under a root user,
it adds False automatically (else Chrome won't start).
:type sandbox: bool
:param lang: language string
:type lang: str
:param port: If you connect to an existing debuggable session,
you can specify the port here.
If both host and port are provided,
then a local Chrome browser will not be started!
:type port: int
:param host: If you connect to an existing debuggable session,
you can specify the host here.
If both host and port are provided,
then a local Chrome browser will not be started!
:type host: str
:param expert: When set to True, "expert" mode is enabled.
This means adding: --disable-web-security --disable-site-isolation-trials,
as well as some scripts and patching useful for debugging.
(For example, ensuring shadow-root is always in "open" mode.)
:type expert: bool
"""
if IS_LINUX and not headless and not headed and not xvfb:
xvfb = True # The default setting on Linux
__activate_virtual_display_as_needed(headless, headed, xvfb, xvfb_metrics)
if proxy and "@" in str(proxy):
user_with_pass = proxy.split("@")[0]
if ":" in user_with_pass:
proxy_user = user_with_pass.split(":")[0]
proxy_pass = user_with_pass.split(":")[1]
proxy_string = proxy.split("@")[1]
extension_dir = __add_chrome_proxy_extension(
extension_dir,
proxy_string,
proxy_user,
proxy_pass,
)
if not config:
config = Config(
user_data_dir,
headless,
incognito,
guest,
browser_executable_path,
browser_args,
sandbox,
lang,
host=host,
port=port,
expert=expert,
proxy=proxy,
extension_dir=extension_dir,
**kwargs,
)
driver = None
try:
driver = await Browser.create(config)
except Exception:
time.sleep(0.15)
driver = await Browser.create(config)
if proxy and "@" in str(proxy):
time.sleep(0.15)
if lang:
sb_config._cdp_locale = lang
elif "locale" in kwargs:
sb_config._cdp_locale = kwargs["locale"]
elif "locale_code" in kwargs:
sb_config._cdp_locale = kwargs["locale_code"]
else:
sb_config._cdp_locale = None
return driver
async def start_async(*args, **kwargs) -> Browser:
headless = False
binary_location = None
if "browser_executable_path" in kwargs:
binary_location = kwargs["browser_executable_path"]
else:
binary_location = detect_b_ver.get_binary_location("google-chrome")
if binary_location and not os.path.exists(binary_location):
binary_location = None
if (
shared_utils.is_chrome_130_or_newer(binary_location)
and "user_data_dir" in kwargs
and kwargs["user_data_dir"]
):
if "headless" in kwargs:
headless = kwargs["headless"]
decoy_args = kwargs
decoy_args["headless"] = True
driver = await start(**decoy_args)
kwargs["headless"] = headless
kwargs["user_data_dir"] = driver.config.user_data_dir
time.sleep(0.2)
driver.stop() # Due to Chrome-130, must stop & start
time.sleep(0.1)
return await start(*args, **kwargs)
def start_sync(*args, **kwargs) -> Browser:
loop = None
if (
"loop" in kwargs
and kwargs["loop"]
and hasattr(kwargs["loop"], "create_task")
):
loop = kwargs["loop"]
else:
loop = asyncio.new_event_loop()
headless = False
binary_location = None
if "browser_executable_path" in kwargs:
binary_location = kwargs["browser_executable_path"]
else:
binary_location = detect_b_ver.get_binary_location("google-chrome")
if binary_location and not os.path.exists(binary_location):
binary_location = None
if (
shared_utils.is_chrome_130_or_newer(binary_location)
and "user_data_dir" in kwargs
and kwargs["user_data_dir"]
):
if "headless" in kwargs:
headless = kwargs["headless"]
decoy_args = kwargs
decoy_args["headless"] = True
driver = loop.run_until_complete(start(**decoy_args))
kwargs["headless"] = headless
kwargs["user_data_dir"] = driver.config.user_data_dir
time.sleep(0.2)
driver.stop() # Due to Chrome-130, must stop & start
time.sleep(0.1)
return loop.run_until_complete(start(*args, **kwargs))
async def create_from_driver(driver) -> Browser:
"""Create a Browser instance from a running driver instance."""
from .config import Config
conf = Config()
host, port = driver.options.debugger_address.split(":")
conf.host, conf.port = host, int(port)
# Create Browser instance
browser = await start(conf)
browser._process_pid = driver.browser_pid
# Stop chromedriver binary
try:
driver.service.send_remote_shutdown_command()
except TypeError:
pass
finally:
with suppress(Exception):
driver.service._terminate_process()
driver.browser_pid = -1
driver.user_data_dir = None
return browser
def free_port() -> int:
"""Determines a free port using sockets."""
import socket
free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
free_socket.bind(("127.0.0.1", 0))
free_socket.listen(5)
port: int = free_socket.getsockname()[1]
free_socket.close()
return port
def filter_recurse_all(
doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
) -> List[T]:
"""
Test each child using predicate(child),
and return all children for which predicate(child) == True
:param doc: The cdp.dom.Node object or :py:class:`cdp_driver.Element`
:param predicate: A function which takes a node as first parameter
and returns a boolean, where True means include.
"""
if not hasattr(doc, "children"):
raise TypeError("Object should have a .children attribute!")
out = []
if doc and doc.children:
for child in doc.children:
if predicate(child):
out.append(child)
if child.shadow_roots is not None:
out.extend(
filter_recurse_all(child.shadow_roots[0], predicate)
)
out.extend(filter_recurse_all(child, predicate))
return out
def filter_recurse(
doc: T, predicate: Callable[[cdp.dom.Node, Element], bool]
) -> T:
"""
Test each child using predicate(child),
and return the first child of which predicate(child) == True
:param doc: the cdp.dom.Node object or :py:class:`cdp_driver.Element`
:param predicate: a function which takes a node as first parameter
and returns a boolean, where True means include.
"""
if not hasattr(doc, "children"):
raise TypeError("Object should have a .children attribute!")
if doc and doc.children:
for child in doc.children:
if predicate(child):
return child
if child.shadow_roots:
shadow_root_result = filter_recurse(
child.shadow_roots[0], predicate
)
if shadow_root_result:
return shadow_root_result
result = filter_recurse(child, predicate)
if result:
return result
def circle(
x, y=None, radius=10, num=10, dir=0
) -> typing.Generator[typing.Tuple[float, float], None, None]:
"""
A generator will calculate coordinates around a circle.
:param x: start x position
:type x: int
:param y: start y position
:type y: int
:param radius: size of the circle
:type radius: int
:param num: the amount of points calculated
(higher => slower, more cpu, but more detailed)
:type num: int
"""
import math
r = radius
w = num
if not y:
y = x
a = int(x - r * 2)
b = int(y - r * 2)
m = (2 * math.pi) / w
if dir == 0:
# Regular direction
ran = 0, w + 1, 1
else:
# Opposite direction
ran = w + 1, 0, -1
for i in range(*ran):
x = a + r * math.sin(m * i)
y = b + r * math.cos(m * i)
yield x, y
def remove_from_tree(tree: cdp.dom.Node, node: cdp.dom.Node) -> cdp.dom.Node:
if not hasattr(tree, "children"):
raise TypeError("Object should have a .children attribute!")
if tree and tree.children:
for child in tree.children:
if child.backend_node_id == node.backend_node_id:
tree.children.remove(child)
remove_from_tree(child, node)
return tree
async def html_from_tree(
tree: Union[cdp.dom.Node, Element], target: Tab
):
if not hasattr(tree, "children"):
raise TypeError("Object should have a .children attribute!")
out = ""
if tree and tree.children:
for child in tree.children:
if isinstance(child, Element):
out += await child.get_html()
else:
out += await target.send(
cdp.dom.get_outer_html(
backend_node_id=child.backend_node_id
)
)
out += await html_from_tree(child, target)
return out
def compare_target_info(
info1: cdp.target.TargetInfo, info2: cdp.target.TargetInfo
) -> List[typing.Tuple[str, typing.Any, typing.Any]]:
"""
When logging mode is set to debug, browser object will log when target info
is changed. To provide more meaningful log messages,
this function is called to check what has actually changed
between the 2 (by simple dict comparison).
It returns a list of tuples
[ ... ( key_which_has_changed, old_value, new_value) ]
:param info1:
:param info2:
"""
d1 = info1.__dict__
d2 = info2.__dict__
return [(k, v, d2[k]) for (k, v) in d1.items() if d2[k] != v]
def loop():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop
def cdp_get_module(domain: Union[str, types.ModuleType]):
"""
Get cdp module by given string.
:param domain:
"""
import importlib
if isinstance(domain, types.ModuleType):
domain_mod = domain
else:
try:
if domain in ("input",):
domain = "input_"
domain_mod = getattr(cdp, domain)
if not domain_mod:
raise AttributeError
except AttributeError:
try:
domain_mod = importlib.import_module(domain)
except ModuleNotFoundError:
raise ModuleNotFoundError(
"Could not find cdp module from input '%s'" % domain
)
return domain_mod