SeleniumBase/examples/cdp_mode/raw_xhr_async.py

92 lines
2.9 KiB
Python

"""CDP.network.ResponseReceived with CDP.network.ResourceType.XHR."""
import ast
import asyncio
import colorama
import mycdp
import sys
import time
from seleniumbase import cdp_driver
xhr_requests = []
last_xhr_request = None
c1 = colorama.Fore.BLUE + colorama.Back.LIGHTYELLOW_EX
c2 = colorama.Fore.BLUE + colorama.Back.LIGHTGREEN_EX
cr = colorama.Style.RESET_ALL
if "linux" in sys.platform:
c1 = c2 = cr = ""
def listenXHR(page):
async def handler(evt):
# Get AJAX requests
if evt.type_ is mycdp.network.ResourceType.XHR:
xhr_requests.append([evt.response.url, evt.request_id])
global last_xhr_request
last_xhr_request = time.time()
page.add_handler(mycdp.network.ResponseReceived, handler)
async def receiveXHR(page, requests):
responses = []
retries = 0
max_retries = 5
# Wait at least 2 seconds after last XHR request for more
while True:
if last_xhr_request is None or retries > max_retries:
break
if time.time() - last_xhr_request <= 2:
retries = retries + 1
time.sleep(2)
continue
else:
break
await page
# Loop through gathered requests and get response body
for request in requests:
try:
res = await page.send(mycdp.network.get_response_body(request[1]))
if res is None:
continue
responses.append({
"url": request[0],
"body": res[0],
"is_base64": res[1],
})
except Exception as e:
print("Error getting response:", e)
return responses
async def crawl():
driver = await cdp_driver.start_async()
tab = await driver.get("about:blank")
listenXHR(tab)
# Change url to something that makes ajax requests
tab = await driver.get("https://learn.microsoft.com/en-us/")
time.sleep(2)
for i in range(20):
await tab.scroll_down(4)
time.sleep(0.02)
xhr_responses = await receiveXHR(tab, xhr_requests)
for response in xhr_responses:
print(c1 + "*** ==> XHR Request URL <== ***" + cr)
print(f'{response["url"]}')
is_base64 = response["is_base64"]
b64_data = "Base64 encoded data"
try:
headers = ast.literal_eval(response["body"])["headers"]
print(c2 + "*** ==> XHR Response Headers <== ***" + cr)
print(headers if not is_base64 else b64_data)
except Exception:
response_body = response["body"]
print(c2 + "*** ==> XHR Response Body <== ***" + cr)
print(response_body if not is_base64 else b64_data)
if __name__ == "__main__":
print("<============= START: XHR Example =============>")
asyncio.run(crawl())
print("<============== END: XHR Example ==============>")