265 lines
8.7 KiB
Python
265 lines
8.7 KiB
Python
"""Convert XPath selectors into CSS selectors"""
|
|
import re
|
|
|
|
_sub_regexes = {
|
|
"tag": r"([a-zA-Z][-a-zA-Z0-9]{0,40}|\*)",
|
|
"attribute": r"[.a-zA-Z_:][-\w:.]*(\(\))?)",
|
|
"value": r"\s*[\w/:][-/\w\s,:;.\S]*",
|
|
}
|
|
|
|
_validation_re = (
|
|
r"(?P<node>"
|
|
r"("
|
|
r"^id\([\"\']?(?P<idvalue>%(value)s)[\"\']?\)"
|
|
r"|"
|
|
r"(?P<nav>//?)(?P<tag>%(tag)s)"
|
|
r"(\[("
|
|
r"(?P<matched>(?P<mattr>@?%(attribute)s=[\"\']"
|
|
r"(?P<mvalue>%(value)s))[\"\']"
|
|
r"|"
|
|
r"(?P<contained>contains\((?P<cattr>@?%(attribute)s,\s*[\"\']"
|
|
r"(?P<cvalue>%(value)s)[\"\']\))"
|
|
r")\])?"
|
|
r"(\[(?P<nth>\d+)\])?"
|
|
r")"
|
|
r")" % _sub_regexes
|
|
)
|
|
|
|
prog = re.compile(_validation_re)
|
|
|
|
|
|
class XpathException(Exception):
|
|
pass
|
|
|
|
|
|
def _handle_brackets_in_strings(xpath):
|
|
# Edge Case: Brackets in strings.
|
|
# Example from GitHub.com -
|
|
# '<input type="text" id="user[login]">' => '//*[@id="user[login]"]'
|
|
# Need to tell apart string-brackets from regular brackets
|
|
new_xpath = ""
|
|
chunks = xpath.split('"')
|
|
len_chunks = len(chunks)
|
|
for chunk_num in range(len_chunks):
|
|
if chunk_num % 2 != 0:
|
|
chunks[chunk_num] = chunks[chunk_num].replace(
|
|
"[", "_STR_L_bracket_"
|
|
)
|
|
chunks[chunk_num] = chunks[chunk_num].replace(
|
|
"]", "_STR_R_bracket_"
|
|
)
|
|
new_xpath += chunks[chunk_num]
|
|
if chunk_num != len_chunks - 1:
|
|
new_xpath += '"'
|
|
return new_xpath
|
|
|
|
|
|
def _filter_xpath_grouping(xpath, original):
|
|
"""
|
|
This method removes the outer parentheses for xpath grouping.
|
|
The xpath converter will break otherwise.
|
|
Example:
|
|
"(//button[@type='submit'])[1]" becomes "//button[@type='submit'][1]"
|
|
"""
|
|
|
|
# First remove the first open parentheses
|
|
xpath = xpath[1:]
|
|
|
|
# Next remove the last closed parentheses
|
|
index = xpath.rfind(")")
|
|
index_p1 = index + 1 # Make "flake8" and "black" agree
|
|
if index == -1:
|
|
raise XpathException(
|
|
"\nInvalid or unsupported XPath:\n%s\n"
|
|
"(Unable to convert XPath Selector to CSS Selector)"
|
|
"" % original
|
|
)
|
|
xpath = xpath[:index] + xpath[index_p1:]
|
|
return xpath
|
|
|
|
|
|
def _get_raw_css_from_xpath(xpath, original):
|
|
css = ""
|
|
attr = ""
|
|
position = 0
|
|
|
|
while position < len(xpath):
|
|
node = prog.match(xpath[position:])
|
|
if node is None:
|
|
raise XpathException(
|
|
"\nInvalid or unsupported XPath:\n%s\n"
|
|
"(Unable to convert XPath Selector to CSS Selector)"
|
|
"" % original
|
|
)
|
|
match = node.groupdict()
|
|
|
|
if position != 0:
|
|
nav = " " if match["nav"] == "//" else " > "
|
|
else:
|
|
nav = ""
|
|
|
|
tag = "" if match["tag"] == "*" else match["tag"] or ""
|
|
|
|
if match["idvalue"]:
|
|
attr = "#%s" % match["idvalue"].replace(" ", "#")
|
|
elif match["matched"]:
|
|
if match["mattr"] == "@id":
|
|
attr = "#%s" % match["mvalue"].replace(" ", "#")
|
|
elif match["mattr"] == "@class":
|
|
attr = ".%s" % match["mvalue"].replace(" ", ".")
|
|
elif match["mattr"] in ["text()", "."]:
|
|
attr = ":contains('%s')" % match["mvalue"]
|
|
elif match["mattr"]:
|
|
attr = '[%s="%s"]' % (
|
|
match["mattr"].replace("@", ""),
|
|
match["mvalue"],
|
|
)
|
|
elif match["contained"]:
|
|
if match["cattr"].startswith("@"):
|
|
attr = '[%s*="%s"]' % (
|
|
match["cattr"].replace("@", ""),
|
|
match["cvalue"],
|
|
)
|
|
elif match["cattr"] == "text()":
|
|
attr = ':contains("%s")' % match["cvalue"]
|
|
elif match["cattr"] == ".":
|
|
attr = ':contains("%s")' % match["cvalue"]
|
|
else:
|
|
attr = ""
|
|
|
|
if match["nth"]:
|
|
nth = ":nth-of-type(%s)" % match["nth"]
|
|
else:
|
|
nth = ""
|
|
|
|
node_css = nav + tag + attr + nth
|
|
css += node_css
|
|
position += node.end()
|
|
else:
|
|
css = css.strip()
|
|
return css
|
|
|
|
|
|
def convert_xpath_to_css(xpath):
|
|
original = xpath
|
|
xpath = xpath.replace(" = '", "='")
|
|
|
|
# **** Start of handling special xpath edge cases instantly ****
|
|
|
|
# Handle a special edge case that converts to: 'tag.class:contains("TEXT")'
|
|
c3 = "@class and contains(concat(' ', normalize-space(@class), ' '), ' "
|
|
if c3 in xpath and xpath.count(c3) == 1 and xpath.count("[@") == 1:
|
|
p2 = " ') and (contains(., '"
|
|
if (
|
|
xpath.count(p2) == 1
|
|
and xpath.endswith("'))]")
|
|
and xpath.count("//") == 1
|
|
and xpath.count(" ') and (") == 1
|
|
):
|
|
s_contains = xpath.split(p2)[1].split("'))]")[0]
|
|
s_tag = xpath.split("//")[1].split("[@class")[0]
|
|
s_class = xpath.split(c3)[1].split(" ') and (")[0]
|
|
return '%s.%s:contains("%s")' % (s_tag, s_class, s_contains)
|
|
|
|
# Find instance of: //tag[@attribute='value' and (contains(., 'TEXT'))]
|
|
data = re.match(
|
|
r"""^\s*//(\S+)\[@(\S+)='(\S+)'\s+and\s+"""
|
|
r"""\(contains\(\.,\s'(\S+)'\)\)\]""",
|
|
xpath,
|
|
)
|
|
if data:
|
|
s_tag = data.group(1)
|
|
s_atr = data.group(2)
|
|
s_val = data.group(3)
|
|
s_contains = data.group(4)
|
|
return '%s[%s="%s"]:contains("%s")' % (s_tag, s_atr, s_val, s_contains)
|
|
|
|
# Find instance of: //tag[@attribute1='value1' and (@attribute2='value2')]
|
|
data = re.match(
|
|
r"""^\s*//(\S+)\[@(\S+)='(\S+)'\s+and\s+"""
|
|
r"""\(@(\S+)='(\S+)'\)\]""",
|
|
xpath,
|
|
)
|
|
if data:
|
|
s_tag = data.group(1)
|
|
s_atr1 = data.group(2)
|
|
s_val1 = data.group(3)
|
|
s_atr2 = data.group(4)
|
|
s_val2 = data.group(5)
|
|
return '%s[%s="%s"][%s="%s"]' % (s_tag, s_atr1, s_val1, s_atr2, s_val2)
|
|
|
|
# **** End of handling special xpath edge cases instantly ****
|
|
|
|
if xpath[0] != '"' and xpath[-1] != '"' and xpath.count('"') % 2 == 0:
|
|
xpath = _handle_brackets_in_strings(xpath)
|
|
xpath = xpath.replace("descendant-or-self::*/", "descORself/")
|
|
if len(xpath) > 3:
|
|
xpath = xpath[0:3] + xpath[3:].replace("//", "/descORself/")
|
|
|
|
if " and contains(@" in xpath and xpath.count(" and contains(@") == 1:
|
|
spot1 = xpath.find(" and contains(@")
|
|
spot1 = spot1 + len(" and contains(@")
|
|
spot2 = xpath.find(",", spot1)
|
|
attr = xpath[spot1:spot2]
|
|
swap = " and contains(@%s, " % attr
|
|
if swap in xpath:
|
|
swap_spot = xpath.find(swap)
|
|
close_paren = xpath.find("]", swap_spot) - 1
|
|
close_paren_p1 = close_paren + 1 # Make "flake8" and "black" agree
|
|
if close_paren > 1:
|
|
xpath = xpath[:close_paren] + xpath[close_paren_p1:]
|
|
xpath = xpath.replace(swap, "_STAR_=")
|
|
|
|
if xpath.startswith("("):
|
|
xpath = _filter_xpath_grouping(xpath, original)
|
|
|
|
css = ""
|
|
if "/descORself/" in xpath and ("@id" in xpath or "@class" in xpath):
|
|
css_sections = []
|
|
xpath_sections = xpath.split("/descORself/")
|
|
for xpath_section in xpath_sections:
|
|
if not xpath_section.startswith("//"):
|
|
xpath_section = "//" + xpath_section
|
|
css_sections.append(
|
|
_get_raw_css_from_xpath(xpath_section, original)
|
|
)
|
|
css = "/descORself/".join(css_sections)
|
|
else:
|
|
css = _get_raw_css_from_xpath(xpath, original)
|
|
|
|
attribute_defs = re.findall(r"(\[\w+\=\S+\])", css)
|
|
for attr_def in attribute_defs:
|
|
if (
|
|
attr_def.count("[") == 1
|
|
and attr_def.count("]") == 1
|
|
and attr_def.count("=") == 1
|
|
and attr_def.count('"') == 0
|
|
and attr_def.count("'") == 0
|
|
and attr_def.count(" ") == 0
|
|
):
|
|
# Now safe to manipulate
|
|
q1 = attr_def.find("=") + 1
|
|
q2 = attr_def.find("]")
|
|
new_attr_def = attr_def[:q1] + "'" + attr_def[q1:q2] + "']"
|
|
css = css.replace(attr_def, new_attr_def)
|
|
|
|
# Replace the string-brackets with escaped ones
|
|
css = css.replace("_STR_L_bracket_", "\\[")
|
|
css = css.replace("_STR_R_bracket_", "\\]")
|
|
|
|
# Handle a lot of edge cases with conversion
|
|
css = css.replace(" > descORself > ", " ")
|
|
css = css.replace(" descORself > ", " ")
|
|
css = css.replace("/descORself/*", " ")
|
|
css = css.replace("/descORself/", " ")
|
|
css = css.replace("descORself > ", "")
|
|
css = css.replace("descORself/", " ")
|
|
css = css.replace("descORself", " ")
|
|
css = css.replace("_STAR_=", "*=")
|
|
css = css.replace("]/", "] ")
|
|
css = css.replace("] *[", "] > [")
|
|
css = css.replace("'", '"')
|
|
css = css.replace("[@", "[")
|
|
|
|
return css
|