399 lines
14 KiB
Swift
399 lines
14 KiB
Swift
//
|
|
// QueryParser.swift
|
|
// SwiftSoup
|
|
//
|
|
// Created by Nabil Chatbi on 23/10/16.
|
|
// Copyright © 2016 Nabil Chatbi.. All rights reserved.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
/**
|
|
* Parses a CSS selector into an Evaluator tree.
|
|
*/
|
|
public class QueryParser {
|
|
private static let combinators : [String] = [",", ">", "+", "~", " "]
|
|
private static let AttributeEvals : [String] = ["=", "!=", "^=", "$=", "*=", "~="]
|
|
|
|
private var tq: TokenQueue;
|
|
private var query: String;
|
|
private var evals: Array<Evaluator> = Array<Evaluator>();
|
|
|
|
/**
|
|
* Create a new QueryParser.
|
|
* @param query CSS query
|
|
*/
|
|
private init(_ query: String) {
|
|
self.query = query;
|
|
self.tq = TokenQueue(query);
|
|
}
|
|
|
|
/**
|
|
* Parse a CSS query into an Evaluator.
|
|
* @param query CSS query
|
|
* @return Evaluator
|
|
*/
|
|
public static func parse(_ query: String)throws->Evaluator {
|
|
let p = QueryParser(query);
|
|
return try p.parse();
|
|
}
|
|
|
|
/**
|
|
* Parse the query
|
|
* @return Evaluator
|
|
*/
|
|
public func parse()throws->Evaluator {
|
|
tq.consumeWhitespace();
|
|
|
|
if (tq.matchesAny(QueryParser.combinators)) { // if starts with a combinator, use root as elements
|
|
evals.append( StructuralEvaluator.Root());
|
|
try combinator(tq.consume());
|
|
} else {
|
|
try findElements();
|
|
}
|
|
|
|
while (!tq.isEmpty()) {
|
|
// hierarchy and extras
|
|
let seenWhite: Bool = tq.consumeWhitespace();
|
|
|
|
if (tq.matchesAny(QueryParser.combinators)) {
|
|
try combinator(tq.consume());
|
|
} else if (seenWhite) {
|
|
try combinator(" " as Character);
|
|
} else { // E.class, E#id, E[attr] etc. AND
|
|
try findElements(); // take next el, #. etc off queue
|
|
}
|
|
}
|
|
|
|
if (evals.count == 1){
|
|
return evals[0]
|
|
}
|
|
return CombiningEvaluator.And(evals);
|
|
}
|
|
|
|
private func combinator(_ combinator: Character)throws {
|
|
tq.consumeWhitespace();
|
|
let subQuery: String = consumeSubQuery(); // support multi > childs
|
|
|
|
var rootEval: Evaluator? // the new topmost evaluator
|
|
var currentEval: Evaluator? // the evaluator the new eval will be combined to. could be root, or rightmost or.
|
|
let newEval: Evaluator = try QueryParser.parse(subQuery); // the evaluator to add into target evaluator
|
|
var replaceRightMost: Bool = false;
|
|
|
|
if (evals.count == 1) {
|
|
currentEval = evals[0]
|
|
rootEval = currentEval
|
|
// make sure OR (,) has precedence:
|
|
if (((rootEval as? CombiningEvaluator.Or) != nil) && combinator != ",") {
|
|
currentEval = (currentEval as! CombiningEvaluator.Or).rightMostEvaluator();
|
|
replaceRightMost = true;
|
|
}
|
|
}
|
|
else {
|
|
currentEval = CombiningEvaluator.And(evals)
|
|
rootEval = currentEval
|
|
}
|
|
evals.removeAll()
|
|
|
|
// for most combinators: change the current eval into an AND of the current eval and the new eval
|
|
if (combinator == ">")
|
|
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediateParent(currentEval!));}
|
|
else if (combinator == " ")
|
|
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.Parent(currentEval!))}
|
|
else if (combinator == "+")
|
|
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediatePreviousSibling(currentEval!))}
|
|
else if (combinator == "~")
|
|
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.PreviousSibling(currentEval!))}
|
|
else if (combinator == ",") { // group or.
|
|
let or : CombiningEvaluator.Or
|
|
if ((currentEval as? CombiningEvaluator.Or) != nil) {
|
|
or = currentEval as! CombiningEvaluator.Or
|
|
or.add(newEval);
|
|
} else {
|
|
or = CombiningEvaluator.Or();
|
|
or.add(currentEval!);
|
|
or.add(newEval);
|
|
}
|
|
currentEval = or;
|
|
}
|
|
else{
|
|
throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Unknown combinator: \(String(combinator))")
|
|
}
|
|
|
|
|
|
if (replaceRightMost)
|
|
{
|
|
(rootEval as! CombiningEvaluator.Or).replaceRightMostEvaluator(currentEval!)
|
|
}
|
|
else {
|
|
rootEval = currentEval
|
|
}
|
|
evals.append(rootEval!);
|
|
}
|
|
|
|
|
|
private func consumeSubQuery()->String {
|
|
let sq : StringBuilder = StringBuilder();
|
|
while (!tq.isEmpty()) {
|
|
if (tq.matches("(")){
|
|
sq.append("(").append(tq.chompBalanced("(", ")")).append(")");
|
|
}else if (tq.matches("[")){
|
|
sq.append("[").append(tq.chompBalanced("[", "]")).append("]");
|
|
}else if (tq.matchesAny(QueryParser.combinators)){
|
|
break;
|
|
}else{
|
|
sq.append(tq.consume());
|
|
}
|
|
}
|
|
return sq.toString();
|
|
}
|
|
|
|
private func findElements()throws {
|
|
if (tq.matchChomp("#"))
|
|
{
|
|
try byId()
|
|
}else if (tq.matchChomp("."))
|
|
{
|
|
try byClass()}
|
|
else if (tq.matchesWord() || tq.matches("*|"))
|
|
{try byTag()}
|
|
else if (tq.matches("["))
|
|
{try byAttribute()}
|
|
else if (tq.matchChomp("*"))
|
|
{ allElements()}
|
|
else if (tq.matchChomp(":lt("))
|
|
{try indexLessThan()}
|
|
else if (tq.matchChomp(":gt("))
|
|
{try indexGreaterThan()}
|
|
else if (tq.matchChomp(":eq("))
|
|
{try indexEquals()}
|
|
else if (tq.matches(":has("))
|
|
{try has()}
|
|
else if (tq.matches(":contains("))
|
|
{try contains(false)}
|
|
else if (tq.matches(":containsOwn("))
|
|
{try contains(true)}
|
|
else if (tq.matches(":matches("))
|
|
{try matches(false)}
|
|
else if (tq.matches(":matchesOwn("))
|
|
{try matches(true)}
|
|
else if (tq.matches(":not("))
|
|
{try not()}
|
|
else if (tq.matchChomp(":nth-child("))
|
|
{try cssNthChild(false, false)}
|
|
else if (tq.matchChomp(":nth-last-child("))
|
|
{try cssNthChild(true, false)}
|
|
else if (tq.matchChomp(":nth-of-type("))
|
|
{try cssNthChild(false, true)}
|
|
else if (tq.matchChomp(":nth-last-of-type("))
|
|
{try cssNthChild(true, true)}
|
|
else if (tq.matchChomp(":first-child"))
|
|
{evals.append(Evaluator.IsFirstChild())}
|
|
else if (tq.matchChomp(":last-child"))
|
|
{evals.append(Evaluator.IsLastChild())}
|
|
else if (tq.matchChomp(":first-of-type"))
|
|
{evals.append(Evaluator.IsFirstOfType())}
|
|
else if (tq.matchChomp(":last-of-type"))
|
|
{evals.append(Evaluator.IsLastOfType())}
|
|
else if (tq.matchChomp(":only-child"))
|
|
{evals.append(Evaluator.IsOnlyChild())}
|
|
else if (tq.matchChomp(":only-of-type"))
|
|
{evals.append(Evaluator.IsOnlyOfType())}
|
|
else if (tq.matchChomp(":empty"))
|
|
{evals.append(Evaluator.IsEmpty())}
|
|
else if (tq.matchChomp(":root"))
|
|
{evals.append(Evaluator.IsRoot())}
|
|
else // unhandled
|
|
{
|
|
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse query \(query): unexpected token at \(tq.remainder())")
|
|
}
|
|
}
|
|
|
|
private func byId()throws {
|
|
let id: String = tq.consumeCssIdentifier();
|
|
try Validate.notEmpty(string: id);
|
|
evals.append(Evaluator.Id(id));
|
|
}
|
|
|
|
private func byClass()throws {
|
|
let className: String = tq.consumeCssIdentifier();
|
|
try Validate.notEmpty(string: className);
|
|
evals.append(Evaluator.Class(className.trim()))
|
|
}
|
|
|
|
private func byTag()throws {
|
|
var tagName = tq.consumeElementSelector();
|
|
|
|
try Validate.notEmpty(string: tagName);
|
|
|
|
// namespaces: wildcard match equals(tagName) or ending in ":"+tagName
|
|
if (tagName.startsWith("*|")) {
|
|
evals.append(
|
|
CombiningEvaluator.Or(
|
|
Evaluator.Tag(tagName.trim().lowercased()),
|
|
Evaluator.TagEndsWith(tagName.replacingOccurrences(of: "*|", with: ":").trim().lowercased())))
|
|
} else {
|
|
// namespaces: if element name is "abc:def", selector must be "abc|def", so flip:
|
|
if (tagName.contains("|")){
|
|
tagName = tagName.replacingOccurrences(of: "|", with: ":")
|
|
}
|
|
|
|
evals.append(Evaluator.Tag(tagName.trim()))
|
|
}
|
|
}
|
|
|
|
private func byAttribute()throws {
|
|
let cq: TokenQueue = TokenQueue(tq.chompBalanced("[", "]")); // content queue
|
|
let key: String = cq.consumeToAny(QueryParser.AttributeEvals); // eq, not, start, end, contain, match, (no val)
|
|
try Validate.notEmpty(string: key);
|
|
cq.consumeWhitespace();
|
|
|
|
if (cq.isEmpty()) {
|
|
if (key.startsWith("^")){
|
|
evals.append(try Evaluator.AttributeStarting(key.substring(1)));
|
|
}else{
|
|
evals.append(Evaluator.Attribute(key));
|
|
}
|
|
} else {
|
|
if (cq.matchChomp("=")){
|
|
evals.append(try Evaluator.AttributeWithValue(key, cq.remainder()));
|
|
}
|
|
|
|
else if (cq.matchChomp("!=")){
|
|
evals.append(try Evaluator.AttributeWithValueNot(key, cq.remainder()));
|
|
}
|
|
|
|
else if (cq.matchChomp("^=")){
|
|
evals.append(try Evaluator.AttributeWithValueStarting(key, cq.remainder()));
|
|
}
|
|
|
|
else if (cq.matchChomp("$=")){
|
|
evals.append(try Evaluator.AttributeWithValueEnding(key, cq.remainder()));
|
|
}
|
|
|
|
else if (cq.matchChomp("*=")){
|
|
evals.append(try Evaluator.AttributeWithValueContaining(key, cq.remainder()));
|
|
}
|
|
|
|
else if (cq.matchChomp("~=")){
|
|
evals.append( Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder())));
|
|
}else{
|
|
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse attribute query '\(query)': unexpected token at '\(cq.remainder())'")
|
|
}
|
|
}
|
|
}
|
|
|
|
private func allElements() {
|
|
evals.append(Evaluator.AllElements());
|
|
}
|
|
|
|
// pseudo selectors :lt, :gt, :eq
|
|
private func indexLessThan()throws {
|
|
evals.append(Evaluator.IndexLessThan(try consumeIndex()));
|
|
}
|
|
|
|
private func indexGreaterThan()throws {
|
|
evals.append(Evaluator.IndexGreaterThan(try consumeIndex()));
|
|
}
|
|
|
|
private func indexEquals()throws {
|
|
evals.append(Evaluator.IndexEquals(try consumeIndex()));
|
|
}
|
|
|
|
//pseudo selectors :first-child, :last-child, :nth-child, ...
|
|
private static let NTH_AB: Pattern = Pattern.compile("((\\+|-)?(\\d+)?)n(\\s*(\\+|-)?\\s*\\d+)?", Pattern.CASE_INSENSITIVE);
|
|
private static let NTH_B: Pattern = Pattern.compile("(\\+|-)?(\\d+)");
|
|
|
|
private func cssNthChild(_ backwards: Bool, _ ofType: Bool)throws {
|
|
let argS: String = tq.chompTo(")").trim().lowercased();
|
|
let mAB: Matcher = QueryParser.NTH_AB.matcher(in: argS);
|
|
let mB: Matcher = QueryParser.NTH_B.matcher(in: argS);
|
|
var a: Int
|
|
var b: Int
|
|
if ("odd"==argS) {
|
|
a = 2;
|
|
b = 1;
|
|
} else if ("even"==argS) {
|
|
a = 2;
|
|
b = 0;
|
|
} else if (mAB.matches.count > 0) {
|
|
mAB.find()
|
|
a = mAB.group(3) != nil ? Int(mAB.group(1)!.replaceFirst(of: "^\\+", with: ""))! : 1;
|
|
b = mAB.group(4) != nil ? Int(mAB.group(4)!.replaceFirst(of: "^\\+", with: ""))! : 0;
|
|
} else if (mB.matches.count > 0) {
|
|
a = 0;
|
|
mB.find()
|
|
b = Int(mB.group()!.replaceFirst(of: "^\\+", with: ""))!;
|
|
} else {
|
|
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse nth-index '\(argS)': unexpected format")
|
|
}
|
|
if (ofType){
|
|
if (backwards){
|
|
evals.append(Evaluator.IsNthLastOfType(a, b));
|
|
}else{
|
|
evals.append(Evaluator.IsNthOfType(a, b));
|
|
}
|
|
}else {
|
|
if (backwards){
|
|
evals.append(Evaluator.IsNthLastChild(a, b));
|
|
}else{
|
|
evals.append(Evaluator.IsNthChild(a, b));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private func consumeIndex()throws->Int {
|
|
let indexS: String = tq.chompTo(")").trim();
|
|
try Validate.isTrue(val: StringUtil.isNumeric(indexS), msg: "Index must be numeric");
|
|
return Int(indexS)!
|
|
}
|
|
|
|
// pseudo selector :has(el)
|
|
private func has()throws {
|
|
try tq.consume(":has");
|
|
let subQuery: String = tq.chompBalanced("(", ")");
|
|
try Validate.notEmpty(string: subQuery, msg: ":has(el) subselect must not be empty");
|
|
evals.append(StructuralEvaluator.Has(try QueryParser.parse(subQuery)));
|
|
}
|
|
|
|
// pseudo selector :contains(text), containsOwn(text)
|
|
private func contains(_ own: Bool)throws {
|
|
try tq.consume(own ? ":containsOwn" : ":contains");
|
|
let searchText: String = TokenQueue.unescape(tq.chompBalanced("(", ")"));
|
|
try Validate.notEmpty(string: searchText, msg: ":contains(text) query must not be empty");
|
|
if (own){
|
|
evals.append(Evaluator.ContainsOwnText(searchText));
|
|
}else{
|
|
evals.append(Evaluator.ContainsText(searchText));
|
|
}
|
|
}
|
|
|
|
// :matches(regex), matchesOwn(regex)
|
|
private func matches(_ own: Bool)throws {
|
|
try tq.consume(own ? ":matchesOwn" : ":matches");
|
|
let regex: String = tq.chompBalanced("(", ")"); // don't unescape, as regex bits will be escaped
|
|
try Validate.notEmpty(string: regex, msg: ":matches(regex) query must not be empty");
|
|
|
|
if (own){
|
|
evals.append(Evaluator.MatchesOwn(Pattern.compile(regex)));
|
|
}else{
|
|
evals.append(Evaluator.Matches(Pattern.compile(regex)));
|
|
}
|
|
}
|
|
|
|
// :not(selector)
|
|
private func not()throws {
|
|
try tq.consume(":not");
|
|
let subQuery: String = tq.chompBalanced("(", ")");
|
|
try Validate.notEmpty(string: subQuery, msg: ":not(selector) subselect must not be empty");
|
|
|
|
evals.append(StructuralEvaluator.Not(try QueryParser.parse(subQuery)));
|
|
}
|
|
|
|
}
|