Compare commits
10 Commits
Author | SHA1 | Date |
---|---|---|
![]() |
b2e20da314 | |
![]() |
33b0fccade | |
![]() |
efb95005d1 | |
![]() |
72919d7292 | |
![]() |
c36d7daf42 | |
![]() |
ed4761b42d | |
![]() |
f9fd88731e | |
![]() |
dbed44d543 | |
![]() |
d987bb6ed8 | |
![]() |
bdffeac923 |
|
@ -119,7 +119,7 @@ open class Attribute {
|
|||
}
|
||||
|
||||
public func isBooleanAttribute() -> Bool {
|
||||
return Attribute.booleanAttributes.contains(key)
|
||||
return Attribute.booleanAttributes.contains(key.lowercased())
|
||||
}
|
||||
|
||||
public func hashCode() -> Int {
|
||||
|
|
|
@ -25,9 +25,9 @@ open class Attributes: NSCopying {
|
|||
|
||||
public static var dataPrefix: String = "data-"
|
||||
|
||||
var attributes: OrderedDictionary<String, Attribute> = OrderedDictionary<String, Attribute>()
|
||||
// linked hash map to preserve insertion order.
|
||||
// null be default as so many elements have no attributes -- saves a good chunk of memory
|
||||
// Stored by lowercased key, but key case is checked against the copy inside
|
||||
// the Attribute on retrieval.
|
||||
var attributes: [Attribute] = []
|
||||
|
||||
public init() {}
|
||||
|
||||
|
@ -37,9 +37,11 @@ open class Attributes: NSCopying {
|
|||
@return the attribute value if set; or empty string if not set.
|
||||
@see #hasKey(String)
|
||||
*/
|
||||
open func get(key: String) -> String {
|
||||
let attr: Attribute? = attributes.get(key: key)
|
||||
return attr != nil ? attr!.getValue() : ""
|
||||
open func get(key: String) -> String {
|
||||
if let attr = attributes.first(where: { $0.getKey() == key }) {
|
||||
return attr.getValue()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -49,11 +51,8 @@ open class Attributes: NSCopying {
|
|||
*/
|
||||
open func getIgnoreCase(key: String )throws -> String {
|
||||
try Validate.notEmpty(string: key)
|
||||
|
||||
for attrKey in (attributes.keySet()) {
|
||||
if attrKey.equalsIgnoreCase(string: key) {
|
||||
return attributes.get(key: attrKey)!.getValue()
|
||||
}
|
||||
if let attr = attributes.first(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame }) {
|
||||
return attr.getValue()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
@ -82,11 +81,16 @@ open class Attributes: NSCopying {
|
|||
}
|
||||
|
||||
/**
|
||||
Set a new attribute, or replace an existing one by key.
|
||||
Set a new attribute, or replace an existing one by (case-sensitive) key.
|
||||
@param attribute attribute
|
||||
*/
|
||||
open func put(attribute: Attribute) {
|
||||
attributes.put(value: attribute, forKey: attribute.getKey())
|
||||
let key = attribute.getKey()
|
||||
if let ix = attributes.firstIndex(where: { $0.getKey() == key }) {
|
||||
attributes[ix] = attribute
|
||||
} else {
|
||||
attributes.append(attribute)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -95,7 +99,8 @@ open class Attributes: NSCopying {
|
|||
*/
|
||||
open func remove(key: String)throws {
|
||||
try Validate.notEmpty(string: key)
|
||||
attributes.remove(key: key)
|
||||
if let ix = attributes.firstIndex(where: { $0.getKey() == key }) {
|
||||
attributes.remove(at: ix) }
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -104,10 +109,8 @@ open class Attributes: NSCopying {
|
|||
*/
|
||||
open func removeIgnoreCase(key: String ) throws {
|
||||
try Validate.notEmpty(string: key)
|
||||
for attrKey in attributes.keySet() {
|
||||
if (attrKey.equalsIgnoreCase(string: key)) {
|
||||
attributes.remove(key: attrKey)
|
||||
}
|
||||
if let ix = attributes.firstIndex(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame}) {
|
||||
attributes.remove(at: ix)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,7 +120,7 @@ open class Attributes: NSCopying {
|
|||
@return true if key exists, false otherwise
|
||||
*/
|
||||
open func hasKey(key: String) -> Bool {
|
||||
return attributes.containsKey(key: key)
|
||||
return attributes.contains(where: { $0.getKey() == key })
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -126,12 +129,7 @@ open class Attributes: NSCopying {
|
|||
@return true if key exists, false otherwise
|
||||
*/
|
||||
open func hasKeyIgnoreCase(key: String) -> Bool {
|
||||
for attrKey in attributes.keySet() {
|
||||
if (attrKey.equalsIgnoreCase(string: key)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return attributes.contains(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame})
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -139,7 +137,7 @@ open class Attributes: NSCopying {
|
|||
@return size
|
||||
*/
|
||||
open func size() -> Int {
|
||||
return attributes.count//TODO: check retyrn right size
|
||||
return attributes.count
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -147,35 +145,19 @@ open class Attributes: NSCopying {
|
|||
@param incoming attributes to add to these attributes.
|
||||
*/
|
||||
open func addAll(incoming: Attributes?) {
|
||||
guard let incoming = incoming else {
|
||||
return
|
||||
guard let incoming = incoming else { return }
|
||||
for attr in incoming.attributes {
|
||||
put(attribute: attr)
|
||||
}
|
||||
|
||||
if (incoming.size() == 0) {
|
||||
return
|
||||
}
|
||||
attributes.putAll(all: incoming.attributes)
|
||||
}
|
||||
|
||||
// open func iterator() -> IndexingIterator<Array<Attribute>> {
|
||||
// if (attributes.isEmpty) {
|
||||
// let args: [Attribute] = []
|
||||
// return args.makeIterator()
|
||||
// }
|
||||
// return attributes.orderedValues.makeIterator()
|
||||
// }
|
||||
|
||||
/**
|
||||
Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
|
||||
to keys will not be recognised in the containing set.
|
||||
@return an view of the attributes as a List.
|
||||
*/
|
||||
open func asList() -> Array<Attribute> {
|
||||
var list: Array<Attribute> = Array(/*attributes.size()*/)
|
||||
for entry in attributes.orderedValues {
|
||||
list.append(entry)
|
||||
}
|
||||
return list
|
||||
open func asList() -> [Attribute] {
|
||||
return attributes
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -183,17 +165,11 @@ open class Attributes: NSCopying {
|
|||
* starting with {@code data-}.
|
||||
* @return map of custom data attributes.
|
||||
*/
|
||||
//Map<String, String>
|
||||
open func dataset() -> Dictionary<String, String> {
|
||||
var dataset = Dictionary<String, String>()
|
||||
for attribute in attributes {
|
||||
let attr = attribute.1
|
||||
if(attr.isDataAttribute()) {
|
||||
let key = attr.getKey().substring(Attributes.dataPrefix.count)
|
||||
dataset[key] = attribute.1.getValue()
|
||||
}
|
||||
}
|
||||
return dataset
|
||||
open func dataset() -> [String: String] {
|
||||
let prefixLength = Attributes.dataPrefix.count
|
||||
let pairs = attributes.filter { $0.isDataAttribute() }
|
||||
.map { ($0.getKey().substring(prefixLength), $0.getValue()) }
|
||||
return Dictionary(uniqueKeysWithValues: pairs)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -208,9 +184,9 @@ open class Attributes: NSCopying {
|
|||
}
|
||||
|
||||
public func html(accum: StringBuilder, out: OutputSettings ) throws {
|
||||
for attribute in attributes.orderedValues {
|
||||
for attr in attributes {
|
||||
accum.append(" ")
|
||||
attribute.html(accum: accum, out: out)
|
||||
attr.html(accum: accum, out: out)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -226,21 +202,19 @@ open class Attributes: NSCopying {
|
|||
open func equals(o: AnyObject?) -> Bool {
|
||||
if(o == nil) {return false}
|
||||
if (self === o.self) {return true}
|
||||
guard let that: Attributes = o as? Attributes else {return false}
|
||||
guard let that = o as? Attributes else {return false}
|
||||
return (attributes == that.attributes)
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes.
|
||||
* @return calculated hashcode
|
||||
*/
|
||||
open func hashCode() -> Int {
|
||||
return attributes.hashCode()
|
||||
|
||||
open func lowercaseAllKeys() {
|
||||
for ix in attributes.indices {
|
||||
attributes[ix].key = attributes[ix].key.lowercased()
|
||||
}
|
||||
}
|
||||
|
||||
public func copy(with zone: NSZone? = nil) -> Any {
|
||||
let clone = Attributes()
|
||||
clone.attributes = attributes.clone()
|
||||
clone.attributes = attributes
|
||||
return clone
|
||||
}
|
||||
|
||||
|
@ -255,10 +229,7 @@ open class Attributes: NSCopying {
|
|||
}
|
||||
|
||||
extension Attributes: Sequence {
|
||||
public func makeIterator() -> AnyIterator<Attribute> {
|
||||
var list = attributes.orderedValues
|
||||
return AnyIterator {
|
||||
return list.count > 0 ? list.removeFirst() : nil
|
||||
}
|
||||
}
|
||||
public func makeIterator() -> AnyIterator<Attribute> {
|
||||
return AnyIterator(attributes.makeIterator())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
//
|
||||
// BinarySearch.swift
|
||||
// SwiftSoup-iOS
|
||||
//
|
||||
// Created by Garth Snyder on 2/28/19.
|
||||
// Copyright © 2019 Nabil Chatbi. All rights reserved.
|
||||
//
|
||||
// Adapted from https://stackoverflow.com/questions/31904396/swift-binary-search-for-standard-array
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
extension Collection {
|
||||
|
||||
/// Generalized binary search algorithm for ordered Collections
|
||||
///
|
||||
/// Behavior is undefined if the collection is not properly sorted.
|
||||
///
|
||||
/// This is only O(logN) for RandomAccessCollections; Collections in
|
||||
/// general may implement offsetting of indexes as an O(K) operation. (E.g.,
|
||||
/// Strings are like this).
|
||||
///
|
||||
/// - Note: If you are using this for searching only (not insertion), you
|
||||
/// must always test the element at the returned index to ensure that
|
||||
/// it's a genuine match. If the element is not present in the array,
|
||||
/// you will still get a valid index back that represents the location
|
||||
/// where it should be inserted. Also check to be sure the returned
|
||||
/// index isn't off the end of the collection.
|
||||
///
|
||||
/// - Parameter predicate: Reports the ordering of a given Element relative
|
||||
/// to the desired Element. Typically, this is <.
|
||||
///
|
||||
/// - Returns: Index N such that the predicate is true for all elements up to
|
||||
/// but not including N, and is false for all elements N and beyond
|
||||
|
||||
func binarySearch(predicate: (Element) -> Bool) -> Index {
|
||||
var low = startIndex
|
||||
var high = endIndex
|
||||
while low != high {
|
||||
let mid = index(low, offsetBy: distance(from: low, to: high)/2)
|
||||
if predicate(self[mid]) {
|
||||
low = index(after: mid)
|
||||
} else {
|
||||
high = mid
|
||||
}
|
||||
}
|
||||
return low
|
||||
}
|
||||
|
||||
/// Binary search lookup for ordered Collections using a KeyPath
|
||||
/// relative to Element.
|
||||
///
|
||||
/// Behavior is undefined if the collection is not properly sorted.
|
||||
///
|
||||
/// This is only O(logN) for RandomAccessCollections; Collections in
|
||||
/// general may implement offsetting of indexes as an O(K) operation. (E.g.,
|
||||
/// Strings are like this).
|
||||
///
|
||||
/// - Note: If you are using this for searching only (not insertion), you
|
||||
/// must always test the element at the returned index to ensure that
|
||||
/// it's a genuine match. If the element is not present in the array,
|
||||
/// you will still get a valid index back that represents the location
|
||||
/// where it should be inserted. Also check to be sure the returned
|
||||
/// index isn't off the end of the collection.
|
||||
///
|
||||
/// - Parameter keyPath: KeyPath that extracts the Element value on which
|
||||
/// the Collection is presorted. Must be Comparable and Equatable.
|
||||
/// ordering is presumed to be <, however that is defined for the type.
|
||||
///
|
||||
/// - Returns: The index of a matching element, or nil if not found. If
|
||||
/// the return value is non-nil, it is always a valid index.
|
||||
|
||||
func indexOfElement<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> Index? where T: Comparable & Equatable {
|
||||
let ix = binarySearch { $0[keyPath: keyPath] < value }
|
||||
guard ix < endIndex else { return nil }
|
||||
guard self[ix][keyPath: keyPath] == value else { return nil }
|
||||
return ix
|
||||
}
|
||||
|
||||
func element<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> Element? where T: Comparable & Equatable {
|
||||
if let ix = indexOfElement(withValue: value, atKeyPath: keyPath) {
|
||||
return self[ix]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func elements<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> [Element] where T: Comparable & Equatable {
|
||||
guard let start = indexOfElement(withValue: value, atKeyPath: keyPath) else { return [] }
|
||||
var end = index(after: start)
|
||||
while end < endIndex && self[end][keyPath: keyPath] == value {
|
||||
end = index(after: end)
|
||||
}
|
||||
return Array(self[start..<end])
|
||||
}
|
||||
}
|
|
@ -14,43 +14,47 @@ import Foundation
|
|||
public final class CharacterReader {
|
||||
private static let empty = ""
|
||||
public static let EOF: UnicodeScalar = "\u{FFFF}"//65535
|
||||
private let input: [UnicodeScalar]
|
||||
private let length: Int
|
||||
private var pos: Int = 0
|
||||
private var mark: Int = 0
|
||||
private let input: String.UnicodeScalarView
|
||||
private var pos: String.UnicodeScalarView.Index
|
||||
private var mark: String.UnicodeScalarView.Index
|
||||
//private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage
|
||||
|
||||
public init(_ input: String) {
|
||||
self.input = Array(input.unicodeScalars)
|
||||
self.length = self.input.count
|
||||
//stringCache = Array(repeating:nil, count:512)
|
||||
self.input = input.unicodeScalars
|
||||
self.pos = input.startIndex
|
||||
self.mark = input.startIndex
|
||||
}
|
||||
|
||||
public func getPos() -> Int {
|
||||
return self.pos
|
||||
return input.distance(from: input.startIndex, to: pos)
|
||||
}
|
||||
|
||||
public func isEmpty() -> Bool {
|
||||
return pos >= length
|
||||
return pos >= input.endIndex
|
||||
}
|
||||
|
||||
public func current() -> UnicodeScalar {
|
||||
return (pos >= length) ? CharacterReader.EOF : input[pos]
|
||||
return (pos >= input.endIndex) ? CharacterReader.EOF : input[pos]
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
public func consume() -> UnicodeScalar {
|
||||
let val = (pos >= length) ? CharacterReader.EOF : input[pos]
|
||||
pos += 1
|
||||
guard pos < input.endIndex else {
|
||||
return CharacterReader.EOF
|
||||
}
|
||||
let val = input[pos]
|
||||
pos = input.index(after: pos)
|
||||
return val
|
||||
}
|
||||
|
||||
public func unconsume() {
|
||||
pos -= 1
|
||||
guard pos > input.startIndex else { return }
|
||||
pos = input.index(before: pos)
|
||||
}
|
||||
|
||||
public func advance() {
|
||||
pos += 1
|
||||
guard pos < input.endIndex else { return }
|
||||
pos = input.index(after: pos)
|
||||
}
|
||||
|
||||
public func markPos() {
|
||||
|
@ -62,221 +66,169 @@ public final class CharacterReader {
|
|||
}
|
||||
|
||||
public func consumeAsString() -> String {
|
||||
let p = pos
|
||||
pos+=1
|
||||
return String(input[p])
|
||||
//return String(input, pos+=1, 1)
|
||||
guard pos < input.endIndex else { return "" }
|
||||
let str = String(input[pos])
|
||||
pos = input.index(after: pos)
|
||||
return str
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of characters between the current position and the next instance of the input char
|
||||
* @param c scan target
|
||||
* @return offset between current position and next instance of target. -1 if not found.
|
||||
*/
|
||||
public func nextIndexOf(_ c: UnicodeScalar) -> Int {
|
||||
// doesn't handle scanning for surrogates
|
||||
for i in pos..<length {
|
||||
if (c == input[i]) {
|
||||
return i - pos
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of characters between the current position and the next instance of the input sequence
|
||||
* Locate the next occurrence of a Unicode scalar
|
||||
*
|
||||
* @param seq scan target
|
||||
* @return offset between current position and next instance of target. -1 if not found.
|
||||
* - Parameter c: scan target
|
||||
* - Returns: offset between current position and next instance of target. -1 if not found.
|
||||
*/
|
||||
public func nextIndexOf(_ seq: String) -> Int {
|
||||
public func nextIndexOf(_ c: UnicodeScalar) -> String.UnicodeScalarView.Index? {
|
||||
// doesn't handle scanning for surrogates
|
||||
if(seq.isEmpty) {return -1}
|
||||
let startChar: UnicodeScalar = seq.unicodeScalar(0)
|
||||
for var offset in pos..<length {
|
||||
// scan to first instance of startchar:
|
||||
if (startChar != input[offset]) {
|
||||
offset+=1
|
||||
while(offset < length && startChar != input[offset]) { offset+=1 }
|
||||
}
|
||||
var i = offset + 1
|
||||
let last = i + seq.unicodeScalars.count-1
|
||||
if (offset < length && last <= length) {
|
||||
var j = 1
|
||||
while i < last && seq.unicodeScalar(j) == input[i] {
|
||||
j+=1
|
||||
i+=1
|
||||
}
|
||||
// found full sequence
|
||||
if (i == last) {
|
||||
return offset - pos
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
return input[pos...].firstIndex(of: c)
|
||||
}
|
||||
|
||||
/**
|
||||
* Locate the next occurence of a target string
|
||||
*
|
||||
* - Parameter seq: scan target
|
||||
* - Returns: index of next instance of target. nil if not found.
|
||||
*/
|
||||
public func nextIndexOf(_ seq: String) -> String.UnicodeScalarView.Index? {
|
||||
// doesn't handle scanning for surrogates
|
||||
var start = pos
|
||||
let targetScalars = seq.unicodeScalars
|
||||
guard let firstChar = targetScalars.first else { return pos } // search for "" -> current place
|
||||
MATCH: while true {
|
||||
// Match on first scalar
|
||||
guard let firstCharIx = input[start...].firstIndex(of: firstChar) else { return nil }
|
||||
var current = firstCharIx
|
||||
// Then manually match subsequent scalars
|
||||
for scalar in targetScalars.dropFirst() {
|
||||
current = input.index(after: current)
|
||||
guard current < input.endIndex else { return nil }
|
||||
if input[current] != scalar {
|
||||
start = input.index(after: firstCharIx)
|
||||
continue MATCH
|
||||
}
|
||||
}
|
||||
// full match; current is at position of last matching character
|
||||
return firstCharIx
|
||||
}
|
||||
}
|
||||
|
||||
public func consumeTo(_ c: UnicodeScalar) -> String {
|
||||
let offset = nextIndexOf(c)
|
||||
if (offset != -1) {
|
||||
let consumed = cacheString(pos, offset)
|
||||
pos += offset
|
||||
return consumed
|
||||
} else {
|
||||
guard let targetIx = nextIndexOf(c) else {
|
||||
return consumeToEnd()
|
||||
}
|
||||
let consumed = cacheString(pos, targetIx)
|
||||
pos = targetIx
|
||||
return consumed
|
||||
}
|
||||
|
||||
public func consumeTo(_ seq: String) -> String {
|
||||
let offset = nextIndexOf(seq)
|
||||
if (offset != -1) {
|
||||
let consumed = cacheString(pos, offset)
|
||||
pos += offset
|
||||
return consumed
|
||||
} else {
|
||||
guard let targetIx = nextIndexOf(seq) else {
|
||||
return consumeToEnd()
|
||||
}
|
||||
let consumed = cacheString(pos, targetIx)
|
||||
pos = targetIx
|
||||
return consumed
|
||||
}
|
||||
|
||||
public func consumeToAny(_ chars: UnicodeScalar...) -> String {
|
||||
return consumeToAny(chars)
|
||||
}
|
||||
|
||||
public func consumeToAny(_ chars: [UnicodeScalar]) -> String {
|
||||
let start: Int = pos
|
||||
let remaining: Int = length
|
||||
let val = input
|
||||
OUTER: while (pos < remaining) {
|
||||
if chars.contains(val[pos]) {
|
||||
break OUTER
|
||||
}
|
||||
// for c in chars {
|
||||
// if (val[pos] == c){
|
||||
// break OUTER
|
||||
// }
|
||||
// }
|
||||
pos += 1
|
||||
let start = pos
|
||||
while pos < input.endIndex {
|
||||
if chars.contains(input[pos]) {
|
||||
break
|
||||
}
|
||||
pos = input.index(after: pos)
|
||||
}
|
||||
|
||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
||||
return cacheString(start, pos)
|
||||
}
|
||||
|
||||
public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String {
|
||||
return consumeToAnySorted(chars)
|
||||
return consumeToAny(chars)
|
||||
}
|
||||
|
||||
public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String {
|
||||
let start = pos
|
||||
let remaining = length
|
||||
let val = input
|
||||
|
||||
while (pos < remaining) {
|
||||
|
||||
if chars.contains(val[pos]) {
|
||||
break
|
||||
}
|
||||
pos += 1
|
||||
}
|
||||
|
||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
||||
return consumeToAny(chars)
|
||||
}
|
||||
|
||||
static let dataTerminators: [UnicodeScalar] = [.Ampersand, .LessThan, TokeniserStateVars.nullScalr]
|
||||
// read to &, <, or null
|
||||
public func consumeData() -> String {
|
||||
// &, <, null
|
||||
let start = pos
|
||||
let remaining = length
|
||||
let val = input
|
||||
|
||||
while (pos < remaining) {
|
||||
let c: UnicodeScalar = val[pos]
|
||||
if (c == UnicodeScalar.Ampersand || c == UnicodeScalar.LessThan || c == TokeniserStateVars.nullScalr) {
|
||||
break
|
||||
}
|
||||
pos += 1
|
||||
}
|
||||
|
||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
||||
return consumeToAny(CharacterReader.dataTerminators)
|
||||
}
|
||||
|
||||
static let tagNameTerminators: [UnicodeScalar] = [.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr]
|
||||
// read to '\t', '\n', '\r', '\f', ' ', '/', '>', or nullChar
|
||||
public func consumeTagName() -> String {
|
||||
// '\t', '\n', '\r', '\f', ' ', '/', '>', nullChar
|
||||
let start = pos
|
||||
let remaining = length
|
||||
let val = input
|
||||
|
||||
while (pos < remaining) {
|
||||
let c: UnicodeScalar = val[pos]
|
||||
if (c == UnicodeScalar.BackslashT || c == UnicodeScalar.BackslashN || c == UnicodeScalar.BackslashR || c == UnicodeScalar.BackslashF || c == UnicodeScalar.Space || c == UnicodeScalar.Slash || c == UnicodeScalar.GreaterThan || c == TokeniserStateVars.nullScalr) {
|
||||
break
|
||||
}
|
||||
pos += 1
|
||||
}
|
||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
||||
return consumeToAny(CharacterReader.tagNameTerminators)
|
||||
}
|
||||
|
||||
public func consumeToEnd() -> String {
|
||||
let data = cacheString(pos, length-pos)
|
||||
pos = length
|
||||
return data
|
||||
let consumed = cacheString(pos, input.endIndex)
|
||||
pos = input.endIndex
|
||||
return consumed
|
||||
}
|
||||
|
||||
public func consumeLetterSequence() -> String {
|
||||
let start = pos
|
||||
while (pos < length) {
|
||||
let c: UnicodeScalar = input[pos]
|
||||
while pos < input.endIndex {
|
||||
let c = input[pos]
|
||||
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
||||
pos += 1
|
||||
pos = input.index(after: pos)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return cacheString(start, pos - start)
|
||||
return cacheString(start, pos)
|
||||
}
|
||||
|
||||
public func consumeLetterThenDigitSequence() -> String {
|
||||
let start = pos
|
||||
while (pos < length) {
|
||||
while pos < input.endIndex {
|
||||
let c = input[pos]
|
||||
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
||||
pos += 1
|
||||
pos = input.index(after: pos)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
while (!isEmpty()) {
|
||||
while pos < input.endIndex {
|
||||
let c = input[pos]
|
||||
if (c >= "0" && c <= "9") {
|
||||
pos += 1
|
||||
pos = input.index(after: pos)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return cacheString(start, pos - start)
|
||||
return cacheString(start, pos)
|
||||
}
|
||||
|
||||
public func consumeHexSequence() -> String {
|
||||
let start = pos
|
||||
while (pos < length) {
|
||||
while pos < input.endIndex {
|
||||
let c = input[pos]
|
||||
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) {
|
||||
pos+=1
|
||||
pos = input.index(after: pos)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return cacheString(start, pos - start)
|
||||
return cacheString(start, pos)
|
||||
}
|
||||
|
||||
public func consumeDigitSequence() -> String {
|
||||
let start = pos
|
||||
while (pos < length) {
|
||||
while pos < input.endIndex {
|
||||
let c = input[pos]
|
||||
if (c >= "0" && c <= "9") {
|
||||
pos+=1
|
||||
pos = input.index(after: pos)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return cacheString(start, pos - start)
|
||||
return cacheString(start, pos)
|
||||
}
|
||||
|
||||
public func matches(_ c: UnicodeScalar) -> Bool {
|
||||
|
@ -284,180 +236,85 @@ public final class CharacterReader {
|
|||
|
||||
}
|
||||
|
||||
public func matches(_ seq: String) -> Bool {
|
||||
let scanLength = seq.unicodeScalars.count
|
||||
if (scanLength > length - pos) {
|
||||
return false
|
||||
}
|
||||
|
||||
for offset in 0..<scanLength {
|
||||
if (seq.unicodeScalar(offset) != input[pos+offset]) {
|
||||
return false
|
||||
public func matches(_ seq: String, ignoreCase: Bool = false, consume: Bool = false) -> Bool {
|
||||
var current = pos
|
||||
let scalars = seq.unicodeScalars
|
||||
for scalar in scalars {
|
||||
guard current < input.endIndex else { return false }
|
||||
if ignoreCase {
|
||||
guard input[current].uppercase == scalar.uppercase else { return false }
|
||||
} else {
|
||||
guard input[current] == scalar else { return false }
|
||||
}
|
||||
current = input.index(after: current)
|
||||
}
|
||||
if consume {
|
||||
pos = current
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public func matchesIgnoreCase(_ seq: String ) -> Bool {
|
||||
|
||||
let scanLength = seq.unicodeScalars.count
|
||||
if(scanLength == 0) {
|
||||
return false
|
||||
}
|
||||
if (scanLength > length - pos) {
|
||||
return false
|
||||
}
|
||||
|
||||
for offset in 0..<scanLength {
|
||||
let upScan: UnicodeScalar = seq.unicodeScalar(offset).uppercase
|
||||
let upTarget: UnicodeScalar = input[pos+offset].uppercase
|
||||
if (upScan != upTarget) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
return matches(seq, ignoreCase: true)
|
||||
}
|
||||
|
||||
public func matchesAny(_ seq: UnicodeScalar...) -> Bool {
|
||||
if (isEmpty()) {
|
||||
return false
|
||||
}
|
||||
|
||||
let c: UnicodeScalar = input[pos]
|
||||
for seek in seq {
|
||||
if (seek == c) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return matchesAny(seq)
|
||||
}
|
||||
|
||||
public func matchesAny(_ seq: [UnicodeScalar]) -> Bool {
|
||||
guard pos < input.endIndex else { return false }
|
||||
return seq.contains(input[pos])
|
||||
}
|
||||
|
||||
public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool {
|
||||
return !isEmpty() && seq.contains(input[pos])
|
||||
return matchesAny(seq)
|
||||
}
|
||||
|
||||
public func matchesLetter() -> Bool {
|
||||
if (isEmpty()) {
|
||||
return false
|
||||
}
|
||||
let c = input[pos]
|
||||
guard pos < input.endIndex else { return false }
|
||||
let c = input[pos]
|
||||
return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)
|
||||
}
|
||||
|
||||
public func matchesDigit() -> Bool {
|
||||
if (isEmpty()) {
|
||||
return false
|
||||
}
|
||||
let c = input[pos]
|
||||
return (c >= "0" && c <= "9")
|
||||
guard pos < input.endIndex else { return false }
|
||||
let c = input[pos]
|
||||
return c >= "0" && c <= "9"
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
public func matchConsume(_ seq: String) -> Bool {
|
||||
if (matches(seq)) {
|
||||
pos += seq.unicodeScalars.count
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
return matches(seq, consume: true)
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
public func matchConsumeIgnoreCase(_ seq: String) -> Bool {
|
||||
if (matchesIgnoreCase(seq)) {
|
||||
pos += seq.unicodeScalars.count
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
return matches(seq, ignoreCase: true, consume: true)
|
||||
}
|
||||
|
||||
public func containsIgnoreCase(_ seq: String ) -> Bool {
|
||||
// used to check presence of </title>, </style>. only finds consistent case.
|
||||
let loScan = seq.lowercased(with: Locale(identifier: "en"))
|
||||
let hiScan = seq.uppercased(with: Locale(identifier: "eng"))
|
||||
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1)
|
||||
return nextIndexOf(loScan) != nil || nextIndexOf(hiScan) != nil
|
||||
}
|
||||
|
||||
public func toString() -> String {
|
||||
return String(input[pos..<length])
|
||||
//return String.unicodescalars(Array(input[pos..<length]))
|
||||
//return input.string(pos, length - pos)
|
||||
return String(input[pos...])
|
||||
}
|
||||
|
||||
/**
|
||||
* Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks.
|
||||
* <p />
|
||||
* Simplistic, and on hash collisions just falls back to creating a new string, vs a full HashMap with Entry list.
|
||||
* That saves both having to create objects as hash keys, and running through the entry list, at the expense of
|
||||
* some more duplicates.
|
||||
* Originally intended as a caching mechanism for strings, but caching doesn't
|
||||
* seem to improve performance. Now just a stub.
|
||||
*/
|
||||
private func cacheString(_ start: Int, _ count: Int) -> String {
|
||||
return String(input[start..<start+count])
|
||||
// Too Slow
|
||||
// var cache: [String?] = stringCache
|
||||
//
|
||||
// // limit (no cache):
|
||||
// if (count > CharacterReader.maxCacheLen) {
|
||||
// return String(val[start..<start+count].flatMap { Character($0) })
|
||||
// }
|
||||
//
|
||||
// // calculate hash:
|
||||
// var hash: Int = 0
|
||||
// var offset = start
|
||||
// for _ in 0..<count {
|
||||
// let ch = val[offset].value
|
||||
// hash = Int.addWithOverflow(Int.multiplyWithOverflow(31, hash).0, Int(ch)).0
|
||||
// offset+=1
|
||||
// }
|
||||
//
|
||||
// // get from cache
|
||||
// hash = abs(hash)
|
||||
// let i = hash % cache.count
|
||||
// let index: Int = abs(i) //Int(hash & Int(cache.count) - 1)
|
||||
// var cached = cache[index]
|
||||
//
|
||||
// if (cached == nil) { // miss, add
|
||||
// cached = String(val[start..<start+count].flatMap { Character($0) })
|
||||
// //cached = val.string(start, count)
|
||||
// cache[Int(index)] = cached
|
||||
// } else { // hashcode hit, check equality
|
||||
// if (rangeEquals(start, count, cached!)) { // hit
|
||||
// return cached!
|
||||
// } else { // hashcode conflict
|
||||
// cached = String(val[start..<start+count].flatMap { Character($0) })
|
||||
// //cached = val.string(start, count)
|
||||
// cache[index] = cached // update the cache, as recently used strings are more likely to show up again
|
||||
// }
|
||||
// }
|
||||
// return cached!
|
||||
private func cacheString(_ start: String.UnicodeScalarView.Index, _ end: String.UnicodeScalarView.Index) -> String {
|
||||
return String(input[start..<end])
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Check if the value of the provided range equals the string.
|
||||
// */
|
||||
// public func rangeEquals(_ start: Int, _ count: Int, _ cached: String) -> Bool {
|
||||
// if (count == cached.unicodeScalars.count) {
|
||||
// var count = count
|
||||
// let one = input
|
||||
// var i = start
|
||||
// var j = 0
|
||||
// while (count != 0) {
|
||||
// count -= 1
|
||||
// if (one[i] != cached.unicodeScalar(j) ) {
|
||||
// return false
|
||||
// }
|
||||
// j += 1
|
||||
// i += 1
|
||||
// }
|
||||
// return true
|
||||
// }
|
||||
// return false
|
||||
// }
|
||||
}
|
||||
|
||||
extension CharacterReader: CustomDebugStringConvertible {
|
||||
public var debugDescription: String {
|
||||
return self.toString()
|
||||
return toString()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -925,9 +925,9 @@ open class Element: Node {
|
|||
if let textNode = (node as? TextNode) {
|
||||
Element.appendNormalisedText(accum, textNode)
|
||||
} else if let element = (node as? Element) {
|
||||
if (accum.length > 0 &&
|
||||
if !accum.isEmpty &&
|
||||
(element.isBlock() || element._tag.getName() == "br") &&
|
||||
!TextNode.lastCharIsWhitespace(accum)) {
|
||||
!TextNode.lastCharIsWhitespace(accum) {
|
||||
accum.append(" ")
|
||||
}
|
||||
}
|
||||
|
@ -1201,7 +1201,7 @@ open class Element: Node {
|
|||
|
||||
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
|
||||
if (out.prettyPrint() && (_tag.formatAsBlock() || (parent() != nil && parent()!.tag().formatAsBlock()) || out.outline())) {
|
||||
if (accum.length > 0) {
|
||||
if !accum.isEmpty {
|
||||
indent(accum, depth, out)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -189,7 +189,7 @@ open class Elements: NSCopying {
|
|||
open func text()throws->String {
|
||||
let sb: StringBuilder = StringBuilder()
|
||||
for element: Element in this {
|
||||
if (sb.length != 0) {
|
||||
if !sb.isEmpty {
|
||||
sb.append(" ")
|
||||
}
|
||||
sb.append(try element.text())
|
||||
|
@ -216,7 +216,7 @@ open class Elements: NSCopying {
|
|||
open func html()throws->String {
|
||||
let sb: StringBuilder = StringBuilder()
|
||||
for element: Element in this {
|
||||
if (sb.length != 0) {
|
||||
if !sb.isEmpty {
|
||||
sb.append("\n")
|
||||
}
|
||||
sb.append(try element.html())
|
||||
|
@ -233,7 +233,7 @@ open class Elements: NSCopying {
|
|||
open func outerHtml()throws->String {
|
||||
let sb: StringBuilder = StringBuilder()
|
||||
for element in this {
|
||||
if (sb.length != 0) {
|
||||
if !sb.isEmpty {
|
||||
sb.append("\n")
|
||||
}
|
||||
sb.append(try element.outerHtml())
|
||||
|
|
|
@ -18,7 +18,7 @@ public class Entities {
|
|||
private static let emptyName = ""
|
||||
private static let codepointRadix: Int = 36
|
||||
|
||||
public struct EscapeMode: Equatable {
|
||||
public class EscapeMode: Equatable {
|
||||
|
||||
/** Restricted entities suitable for XHTML output: lt, gt, amp, and quot only. */
|
||||
public static let xhtml: EscapeMode = EscapeMode(string: Entities.xhtml, size: 4, id: 0)
|
||||
|
@ -29,13 +29,19 @@ public class Entities {
|
|||
|
||||
fileprivate let value: Int
|
||||
|
||||
// table of named references to their codepoints. sorted so we can binary search. built by BuildEntities.
|
||||
fileprivate var nameKeys: [String]
|
||||
fileprivate var codeVals: [Int] // limitation is the few references with multiple characters; those go into multipoints.
|
||||
struct NamedCodepoint {
|
||||
let scalar: UnicodeScalar
|
||||
let name: String
|
||||
}
|
||||
|
||||
// Array of named references, sorted by name for binary search. built by BuildEntities.
|
||||
// The few entities that map to a multi-codepoint sequence go into multipoints.
|
||||
fileprivate var entitiesByName: [NamedCodepoint] = []
|
||||
|
||||
// table of codepoints to named entities.
|
||||
fileprivate var codeKeys: [Int] // we don' support multicodepoints to single named value currently
|
||||
fileprivate var nameVals: [String]
|
||||
// Array of entities in first-codepoint order. We don't currently support
|
||||
// multicodepoints to single named value currently. Lazy because this index
|
||||
// is used only when generating HTML text.
|
||||
fileprivate lazy var entitiesByCodepoint = entitiesByName.sorted() { a, b in a.scalar < b.scalar }
|
||||
|
||||
public static func == (left: EscapeMode, right: EscapeMode) -> Bool {
|
||||
return left.value == right.value
|
||||
|
@ -46,23 +52,14 @@ public class Entities {
|
|||
}
|
||||
|
||||
private static let codeDelims: [UnicodeScalar] = [",", ";"]
|
||||
|
||||
|
||||
init(string: String, size: Int, id: Int) {
|
||||
nameKeys = [String](repeating: "", count: size)
|
||||
codeVals = [Int](repeating: 0, count: size)
|
||||
codeKeys = [Int](repeating: 0, count: size)
|
||||
nameVals = [String](repeating: "", count: size)
|
||||
value = id
|
||||
|
||||
//Load()
|
||||
|
||||
var i = 0
|
||||
|
||||
|
||||
value = id
|
||||
let reader: CharacterReader = CharacterReader(string)
|
||||
|
||||
while (!reader.isEmpty()) {
|
||||
// NotNestedLessLess=10913,824;1887
|
||||
|
||||
|
||||
entitiesByName.reserveCapacity(size)
|
||||
while !reader.isEmpty() {
|
||||
let name: String = reader.consumeTo("=")
|
||||
reader.advance()
|
||||
let cp1: Int = Int(reader.consumeToAny(EscapeMode.codeDelims), radix: codepointRadix) ?? 0
|
||||
|
@ -75,100 +72,46 @@ public class Entities {
|
|||
} else {
|
||||
cp2 = empty
|
||||
}
|
||||
let index: Int = Int(reader.consumeTo("\n"), radix: codepointRadix) ?? 0
|
||||
let _ = Int(reader.consumeTo("\n"), radix: codepointRadix) ?? 0
|
||||
reader.advance()
|
||||
|
||||
nameKeys[i] = name
|
||||
codeVals[i] = cp1
|
||||
codeKeys[index] = cp1
|
||||
nameVals[index] = name
|
||||
entitiesByName.append(NamedCodepoint(scalar: UnicodeScalar(cp1)!, name: name))
|
||||
|
||||
if (cp2 != empty) {
|
||||
var s = String()
|
||||
s.append(Character(UnicodeScalar(cp1)!))
|
||||
s.append(Character(UnicodeScalar(cp2)!))
|
||||
multipoints[name] = s
|
||||
}
|
||||
i = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// init(string: String, size: Int, id: Int) {
|
||||
// nameKeys = [String](repeating: "", count: size)
|
||||
// codeVals = [Int](repeating: 0, count: size)
|
||||
// codeKeys = [Int](repeating: 0, count: size)
|
||||
// nameVals = [String](repeating: "", count: size)
|
||||
// value = id
|
||||
//
|
||||
// let components = string.components(separatedBy: "\n")
|
||||
//
|
||||
// var i = 0
|
||||
// for entry in components {
|
||||
// let match = Entities.entityPattern.matcher(in: entry)
|
||||
// if (match.find()) {
|
||||
// let name = match.group(1)!
|
||||
// let cp1 = Int(match.group(2)!, radix: codepointRadix)
|
||||
// //let cp2 = Int(Int.parseInt(s: match.group(3), radix: codepointRadix))
|
||||
// let cp2 = match.group(3) != nil ? Int(match.group(3)!, radix: codepointRadix) : empty
|
||||
// let index = Int(match.group(4)!, radix: codepointRadix)
|
||||
//
|
||||
// nameKeys[i] = name
|
||||
// codeVals[i] = cp1!
|
||||
// codeKeys[index!] = cp1!
|
||||
// nameVals[index!] = name
|
||||
//
|
||||
// if (cp2 != empty) {
|
||||
// var s = String()
|
||||
// s.append(Character(UnicodeScalar(cp1!)!))
|
||||
// s.append(Character(UnicodeScalar(cp2!)!))
|
||||
// multipoints[name] = s
|
||||
// }
|
||||
// i += 1
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public func codepointForName(_ name: String) -> Int {
|
||||
// for s in nameKeys {
|
||||
// if s == name {
|
||||
// return codeVals[nameKeys.index(of: s)!]
|
||||
// }
|
||||
// }
|
||||
guard let index = nameKeys.firstIndex(of: name) else {
|
||||
return empty
|
||||
}
|
||||
return codeVals[index]
|
||||
}
|
||||
|
||||
public func nameForCodepoint(_ codepoint: Int ) -> String {
|
||||
//let ss = codeKeys.index(of: codepoint)
|
||||
|
||||
var index = -1
|
||||
for s in codeKeys {
|
||||
if s == codepoint {
|
||||
index = codeKeys.firstIndex(of: codepoint)!
|
||||
multipoints[name] = [UnicodeScalar(cp1)!, UnicodeScalar(cp2)!]
|
||||
}
|
||||
}
|
||||
// Entities should start in name order, but better safe than sorry...
|
||||
entitiesByName.sort() { a, b in a.name < b.name }
|
||||
}
|
||||
|
||||
if (index >= 0) {
|
||||
// the results are ordered so lower case versions of same codepoint come after uppercase, and we prefer to emit lower
|
||||
// (and binary search for same item with multi results is undefined
|
||||
return (index < nameVals.count-1 && codeKeys[index+1] == codepoint) ?
|
||||
nameVals[index+1] : nameVals[index]
|
||||
// Only returns the first of potentially multiple codepoints
|
||||
public func codepointForName(_ name: String) -> UnicodeScalar? {
|
||||
let ix = entitiesByName.binarySearch { $0.name < name }
|
||||
guard ix < entitiesByName.endIndex else { return nil }
|
||||
let entity = entitiesByName[ix]
|
||||
guard entity.name == name else { return nil }
|
||||
return entity.scalar
|
||||
}
|
||||
|
||||
// Search by first codepoint only
|
||||
public func nameForCodepoint(_ codepoint: UnicodeScalar ) -> String? {
|
||||
var ix = entitiesByCodepoint.binarySearch { $0.scalar < codepoint }
|
||||
var matches: [String] = []
|
||||
while ix < entitiesByCodepoint.endIndex && entitiesByCodepoint[ix].scalar == codepoint {
|
||||
matches.append(entitiesByCodepoint[ix].name)
|
||||
ix = entitiesByCodepoint.index(after: ix)
|
||||
}
|
||||
return emptyName
|
||||
return matches.isEmpty ? nil : matches.sorted().last!
|
||||
}
|
||||
|
||||
private func size() -> Int {
|
||||
return nameKeys.count
|
||||
return entitiesByName.count
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static var multipoints: Dictionary<String, String> = Dictionary<String, String>() // name -> multiple character references
|
||||
|
||||
private init() {
|
||||
}
|
||||
private static var multipoints: [String: [UnicodeScalar]] = [:] // name -> multiple character references
|
||||
|
||||
/**
|
||||
* Check if the input is a known named entity
|
||||
|
@ -176,7 +119,7 @@ public class Entities {
|
|||
* @return true if a known named entity
|
||||
*/
|
||||
public static func isNamedEntity(_ name: String ) -> Bool {
|
||||
return (EscapeMode.extended.codepointForName(name) != empty)
|
||||
return (EscapeMode.extended.codepointForName(name) != nil)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -186,17 +129,7 @@ public class Entities {
|
|||
* @see #isNamedEntity(String)
|
||||
*/
|
||||
public static func isBaseNamedEntity(_ name: String) -> Bool {
|
||||
return EscapeMode.base.codepointForName(name) != empty
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Character value of the named entity
|
||||
* @param name named entity (e.g. "lt" or "amp")
|
||||
* @return the Character value of the named entity (e.g. '{@literal <}' or '{@literal &}')
|
||||
* @deprecated does not support characters outside the BMP or multiple character names
|
||||
*/
|
||||
public static func getCharacterByName(name: String) -> Character {
|
||||
return Character.convertFromIntegerLiteral(value: EscapeMode.extended.codepointForName(name))
|
||||
return EscapeMode.base.codepointForName(name) != nil
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -204,30 +137,20 @@ public class Entities {
|
|||
* @param name entity (e.g. "lt" or "amp")
|
||||
* @return the string value of the character(s) represented by this entity, or "" if not defined
|
||||
*/
|
||||
public static func getByName(name: String) -> String {
|
||||
let val = multipoints[name]
|
||||
if (val != nil) {return val!}
|
||||
let codepoint = EscapeMode.extended.codepointForName(name)
|
||||
if (codepoint != empty) {
|
||||
return String(Character(UnicodeScalar(codepoint)!))
|
||||
public static func getByName(name: String) -> String? {
|
||||
if let scalars = codepointsForName(name) {
|
||||
return String(String.UnicodeScalarView(scalars))
|
||||
}
|
||||
return emptyName
|
||||
return nil
|
||||
}
|
||||
|
||||
public static func codepointsForName(_ name: String, codepoints: inout [UnicodeScalar]) -> Int {
|
||||
|
||||
if let val: String = multipoints[name] {
|
||||
codepoints[0] = val.unicodeScalar(0)
|
||||
codepoints[1] = val.unicodeScalar(1)
|
||||
return 2
|
||||
public static func codepointsForName(_ name: String) -> [UnicodeScalar]? {
|
||||
if let scalars = multipoints[name] {
|
||||
return scalars
|
||||
} else if let scalar = EscapeMode.extended.codepointForName(name) {
|
||||
return [scalar]
|
||||
}
|
||||
|
||||
let codepoint = EscapeMode.extended.codepointForName(name)
|
||||
if (codepoint != empty) {
|
||||
codepoints[0] = UnicodeScalar(codepoint)!
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
return nil
|
||||
}
|
||||
|
||||
public static func escape(_ string: String, _ encode: String.Encoding = .utf8 ) -> String {
|
||||
|
@ -326,9 +249,9 @@ public class Entities {
|
|||
}
|
||||
|
||||
private static func appendEncoded(accum: StringBuilder, escapeMode: EscapeMode, codePoint: UnicodeScalar) {
|
||||
let name = escapeMode.nameForCodepoint(Int(codePoint.value))
|
||||
if (name != emptyName) // ok for identity check
|
||||
{accum.append(UnicodeScalar.Ampersand).append(name).append(";")
|
||||
if let name = escapeMode.nameForCodepoint(codePoint) {
|
||||
// ok for identity check
|
||||
accum.append(UnicodeScalar.Ampersand).append(name).append(";")
|
||||
} else {
|
||||
accum.append("&#x").append(String.toHexString(n: Int(codePoint.value)) ).append(";")
|
||||
}
|
||||
|
|
|
@ -12,21 +12,27 @@ import Foundation
|
|||
* HTML Tree Builder; creates a DOM from Tokens.
|
||||
*/
|
||||
class HtmlTreeBuilder: TreeBuilder {
|
||||
// tag searches
|
||||
public static let TagsSearchInScope: [String] = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
|
||||
private static let TagSearchList: [String] = ["ol", "ul"]
|
||||
private static let TagSearchButton: [String] = ["button"]
|
||||
private static let TagSearchTableScope: [String] = ["html", "table"]
|
||||
private static let TagSearchSelectScope: [String] = ["optgroup", "option"]
|
||||
private static let TagSearchEndTags: [String] = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
|
||||
private static let TagSearchSpecial: [String] = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
|
||||
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
|
||||
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
|
||||
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
|
||||
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
|
||||
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
|
||||
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
|
||||
"title", "tr", "ul", "wbr", "xmp"]
|
||||
|
||||
private enum TagSets {
|
||||
// tag searches
|
||||
static let inScope = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
|
||||
static let list = ["ol", "ul"]
|
||||
static let button = ["button"]
|
||||
static let tableScope = ["html", "table"]
|
||||
static let selectScope = ["optgroup", "option"]
|
||||
static let endTags = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
|
||||
static let titleTextarea = ["title", "textarea"]
|
||||
static let frames = ["iframe", "noembed", "noframes", "style", "xmp"]
|
||||
|
||||
static let special: Set<String> = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
|
||||
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
|
||||
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
|
||||
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
|
||||
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
|
||||
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
|
||||
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
|
||||
"title", "tr", "ul", "wbr", "xmp"]
|
||||
}
|
||||
|
||||
private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state
|
||||
private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state
|
||||
|
@ -71,19 +77,19 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
}
|
||||
|
||||
// initialise the tokeniser state:
|
||||
let contextTag: String = context.tagName()
|
||||
if (StringUtil.inString(contextTag, haystack: "title", "textarea")) {
|
||||
tokeniser.transition(TokeniserState.Rcdata)
|
||||
} else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")) {
|
||||
tokeniser.transition(TokeniserState.Rawtext)
|
||||
} else if (contextTag=="script") {
|
||||
tokeniser.transition(TokeniserState.ScriptData)
|
||||
} else if (contextTag==("noscript")) {
|
||||
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
|
||||
} else if (contextTag=="plaintext") {
|
||||
tokeniser.transition(TokeniserState.Data)
|
||||
} else {
|
||||
tokeniser.transition(TokeniserState.Data) // default
|
||||
switch context.tagName() {
|
||||
case TagSets.titleTextarea:
|
||||
tokeniser.transition(TokeniserState.Rcdata)
|
||||
case TagSets.frames:
|
||||
tokeniser.transition(TokeniserState.Rawtext)
|
||||
case "script":
|
||||
tokeniser.transition(TokeniserState.ScriptData)
|
||||
case "noscript":
|
||||
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
|
||||
case "plaintext":
|
||||
tokeniser.transition(TokeniserState.Data)
|
||||
default:
|
||||
tokeniser.transition(TokeniserState.Data)
|
||||
}
|
||||
|
||||
root = try Element(Tag.valueOf("html", settings), baseUri)
|
||||
|
@ -346,7 +352,7 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
for pos in (0..<stack.count).reversed() {
|
||||
let next: Element = stack[pos]
|
||||
stack.remove(at: pos)
|
||||
if (StringUtil.inString(next.nodeName(), elNames)) {
|
||||
if elNames.contains(next.nodeName()) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -381,7 +387,8 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
private func clearStackToContext(_ nodeNames: [String]) {
|
||||
for pos in (0..<stack.count).reversed() {
|
||||
let next: Element = stack[pos]
|
||||
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html") {
|
||||
let nextName = next.nodeName()
|
||||
if nodeNames.contains(nextName) || nextName == "html" {
|
||||
break
|
||||
} else {
|
||||
stack.remove(at: pos)
|
||||
|
@ -482,25 +489,21 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
// todo: tidy up in specific scope methods
|
||||
private var specificScopeTarget: [String?] = [nil]
|
||||
|
||||
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
|
||||
specificScopeTarget[0] = targetName
|
||||
return try inSpecificScope(specificScopeTarget, baseTypes, extraTypes)
|
||||
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
|
||||
return try inSpecificScope([targetName], baseTypes, extraTypes)
|
||||
}
|
||||
|
||||
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
|
||||
private func inSpecificScope(_ targetNames: [String], _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
|
||||
for pos in (0..<stack.count).reversed() {
|
||||
let el: Element = stack[pos]
|
||||
let elName: String = el.nodeName()
|
||||
if (StringUtil.inString(elName, targetNames)) {
|
||||
let el = stack[pos]
|
||||
let elName = el.nodeName()
|
||||
if targetNames.contains(elName) {
|
||||
return true
|
||||
}
|
||||
if (StringUtil.inString(elName, baseTypes)) {
|
||||
if baseTypes.contains(elName) {
|
||||
return false
|
||||
}
|
||||
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)) {
|
||||
if let extraTypes = extraTypes, extraTypes.contains(elName) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -509,39 +512,34 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
}
|
||||
|
||||
func inScope(_ targetNames: [String])throws->Bool {
|
||||
return try inSpecificScope(targetNames, HtmlTreeBuilder.TagsSearchInScope, nil)
|
||||
return try inSpecificScope(targetNames, TagSets.inScope)
|
||||
}
|
||||
|
||||
func inScope(_ targetName: String)throws->Bool {
|
||||
return try inScope(targetName, nil)
|
||||
}
|
||||
|
||||
func inScope(_ targetName: String, _ extras: [String]?)throws->Bool {
|
||||
return try inSpecificScope(targetName, HtmlTreeBuilder.TagsSearchInScope, extras)
|
||||
func inScope(_ targetName: String, _ extras: [String]? = nil)throws->Bool {
|
||||
return try inSpecificScope(targetName, TagSets.inScope, extras)
|
||||
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
|
||||
// todo: in svg namespace: forignOjbect, desc, title
|
||||
}
|
||||
|
||||
func inListItemScope(_ targetName: String)throws->Bool {
|
||||
return try inScope(targetName, HtmlTreeBuilder.TagSearchList)
|
||||
return try inScope(targetName, TagSets.list)
|
||||
}
|
||||
|
||||
func inButtonScope(_ targetName: String)throws->Bool {
|
||||
return try inScope(targetName, HtmlTreeBuilder.TagSearchButton)
|
||||
return try inScope(targetName, TagSets.button)
|
||||
}
|
||||
|
||||
func inTableScope(_ targetName: String)throws->Bool {
|
||||
return try inSpecificScope(targetName, HtmlTreeBuilder.TagSearchTableScope, nil)
|
||||
return try inSpecificScope(targetName, TagSets.tableScope)
|
||||
}
|
||||
|
||||
func inSelectScope(_ targetName: String)throws->Bool {
|
||||
for pos in (0..<stack.count).reversed() {
|
||||
let el: Element = stack[pos]
|
||||
let elName: String = el.nodeName()
|
||||
if (elName.equals(targetName)) {
|
||||
let elName = stack[pos].nodeName()
|
||||
if elName == targetName {
|
||||
return true
|
||||
}
|
||||
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)) { // all elements except
|
||||
if !TagSets.selectScope.contains(elName) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -595,23 +593,28 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
process, then the UA must perform the above steps as if that element was not in the above list.
|
||||
*/
|
||||
|
||||
func generateImpliedEndTags(_ excludeTag: String?) {
|
||||
|
||||
while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
|
||||
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
|
||||
pop()
|
||||
func generateImpliedEndTags(_ excludeTag: String? = nil) {
|
||||
// Is this correct? I get the sense that something is supposed to happen here
|
||||
// even if excludeTag == nil. But the original code doesn't seem to do that. -GS
|
||||
//
|
||||
// while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
|
||||
// StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
|
||||
// pop()
|
||||
// }
|
||||
guard let excludeTag = excludeTag else { return }
|
||||
while true {
|
||||
let nodeName = currentElement()!.nodeName()
|
||||
guard nodeName != excludeTag else { return }
|
||||
guard TagSets.endTags.contains(nodeName) else { return }
|
||||
pop()
|
||||
}
|
||||
}
|
||||
|
||||
func generateImpliedEndTags() {
|
||||
generateImpliedEndTags(nil)
|
||||
}
|
||||
|
||||
func isSpecial(_ el: Element) -> Bool {
|
||||
// todo: mathml's mi, mo, mn
|
||||
// todo: svg's foreigObject, desc, title
|
||||
let name: String = el.nodeName()
|
||||
return StringUtil.inString(name, HtmlTreeBuilder.TagSearchSpecial)
|
||||
return TagSets.special.contains(name)
|
||||
}
|
||||
|
||||
func lastFormattingElement() -> Element? {
|
||||
|
@ -769,3 +772,10 @@ class HtmlTreeBuilder: TreeBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fileprivate func ~= (pattern: [String], value: String) -> Bool {
|
||||
return pattern.contains(value)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -36,6 +36,29 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
case AfterAfterBody
|
||||
case AfterAfterFrameset
|
||||
case ForeignContent
|
||||
|
||||
private enum TagSets {
|
||||
static let outer = ["head", "body", "html", "br"]
|
||||
static let outer2 = ["body", "html", "br"]
|
||||
static let outer3 = ["body", "html"]
|
||||
static let baseEtc = ["base", "basefont", "bgsound", "command", "link"]
|
||||
static let baseEtc2 = ["basefont", "bgsound", "link", "meta", "noframes", "style"]
|
||||
static let baseEtc3 = ["base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"]
|
||||
static let headNoscript = ["head", "noscript"]
|
||||
static let table = ["table", "tbody", "tfoot", "thead", "tr"]
|
||||
static let tableSections = ["tbody", "tfoot", "thead"]
|
||||
static let tableMix = ["body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
|
||||
static let tableMix2 = ["body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
|
||||
static let tableMix3 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead"]
|
||||
static let tableMix4 = ["body", "caption", "col", "colgroup", "html", "td", "th", "tr"]
|
||||
static let tableMix5 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"]
|
||||
static let tableMix6 = ["body", "caption", "col", "colgroup", "html", "td", "th"]
|
||||
static let tableMix7 = ["body", "caption", "col", "colgroup", "html"]
|
||||
static let tableMix8 = ["caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"]
|
||||
static let tableRowsAndCols = ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"]
|
||||
static let thTd = ["th", "td"]
|
||||
static let inputKeygenTextarea = ["input", "keygen", "textarea"]
|
||||
}
|
||||
|
||||
private static let nullString: String = "\u{0000}"
|
||||
|
||||
|
@ -83,10 +106,10 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
try tb.insert(t.asComment())
|
||||
} else if (HtmlTreeBuilderState.isWhitespace(t)) {
|
||||
return true // ignore whitespace
|
||||
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
|
||||
} else if t.startTagNormalName() == "html" {
|
||||
try tb.insert(t.asStartTag())
|
||||
tb.transition(.BeforeHead)
|
||||
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) {
|
||||
} else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
|
||||
return try anythingElse(t, tb)
|
||||
} else if (t.isEndTag()) {
|
||||
tb.error(self)
|
||||
|
@ -103,13 +126,13 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if (t.isDoctype()) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
|
||||
} else if t.startTagNormalName() == "html" {
|
||||
return try HtmlTreeBuilderState.InBody.process(t, tb) // does not transition
|
||||
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("head"))!) {
|
||||
} else if t.startTagNormalName() == "head" {
|
||||
let head: Element = try tb.insert(t.asStartTag())
|
||||
tb.setHeadElement(head)
|
||||
tb.transition(.InHead)
|
||||
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) {
|
||||
} else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
|
||||
try tb.processStartTag("head")
|
||||
return try tb.process(t)
|
||||
} else if (t.isEndTag()) {
|
||||
|
@ -142,7 +165,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
var name: String = start.normalName()!
|
||||
if (name.equals("html")) {
|
||||
return try HtmlTreeBuilderState.InBody.process(t, tb)
|
||||
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "command", "link")) {
|
||||
} else if TagSets.baseEtc.contains(name) {
|
||||
let el: Element = try tb.insertEmpty(start)
|
||||
// jsoup special: update base the frist time it is seen
|
||||
if (name.equals("base") && el.hasAttr("href")) {
|
||||
|
@ -153,7 +176,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
// todo: charset switches
|
||||
} else if (name.equals("title")) {
|
||||
try HtmlTreeBuilderState.handleRcData(start, tb)
|
||||
} else if (StringUtil.inString(name, haystack: "noframes", "style")) {
|
||||
} else if name == "noframes" || name == "style" {
|
||||
try HtmlTreeBuilderState.handleRawtext(start, tb)
|
||||
} else if (name.equals("noscript")) {
|
||||
// else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
|
||||
|
@ -179,7 +202,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
if (name?.equals("head"))! {
|
||||
tb.pop()
|
||||
tb.transition(.AfterHead)
|
||||
} else if (name != nil && StringUtil.inString(name!, haystack: "body", "html", "br")) {
|
||||
} else if let name = name, TagSets.outer2.contains(name) {
|
||||
return try anythingElse(t, tb)
|
||||
} else {
|
||||
tb.error(self)
|
||||
|
@ -198,17 +221,16 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
}
|
||||
if (t.isDoctype()) {
|
||||
tb.error(self)
|
||||
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
|
||||
} else if t.startTagNormalName() == "html" {
|
||||
return try tb.process(t, .InBody)
|
||||
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("noscript"))!) {
|
||||
} else if t.endTagNormalName() == "noscript" {
|
||||
tb.pop()
|
||||
tb.transition(.InHead)
|
||||
} else if (HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
|
||||
haystack: "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
|
||||
} else if HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && TagSets.baseEtc2.contains(t.asStartTag().normalName()!)) {
|
||||
return try tb.process(t, .InHead)
|
||||
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("br"))!) {
|
||||
} else if t.endTagNormalName() == "br" {
|
||||
return try anythingElse(t, tb)
|
||||
} else if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!, haystack: "head", "noscript")) || t.isEndTag()) {
|
||||
} else if (t.isStartTag() && TagSets.headNoscript.contains(t.asStartTag().normalName()!)) || t.isEndTag() {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -241,7 +263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if (name.equals("frameset")) {
|
||||
try tb.insert(startTag)
|
||||
tb.transition(.InFrameset)
|
||||
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) {
|
||||
} else if TagSets.baseEtc3.contains(name) {
|
||||
tb.error(self)
|
||||
let head: Element = tb.getHeadElement()!
|
||||
tb.push(head)
|
||||
|
@ -254,7 +276,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
try anythingElse(t, tb)
|
||||
}
|
||||
} else if (t.isEndTag()) {
|
||||
if (StringUtil.inString(t.asEndTag().normalName()!, haystack: "body", "html")) {
|
||||
if TagSets.outer3.contains(t.asEndTag().normalName()!) {
|
||||
try anythingElse(t, tb)
|
||||
} else {
|
||||
tb.error(self)
|
||||
|
@ -853,7 +875,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
|
||||
tb.error(self)
|
||||
var processed: Bool
|
||||
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) {
|
||||
if let cur = tb.currentElement(), TagSets.table.contains(cur.nodeName()) {
|
||||
tb.setFosterInserts(true)
|
||||
processed = try tb.process(t, .InBody)
|
||||
tb.setFosterInserts(false)
|
||||
|
@ -889,11 +911,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if (name.equals("col")) {
|
||||
try tb.processStartTag("colgroup")
|
||||
return try tb.process(t)
|
||||
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
|
||||
} else if TagSets.tableSections.contains(name) {
|
||||
tb.clearStackToTableContext()
|
||||
try tb.insert(startTag)
|
||||
tb.transition(.InTableBody)
|
||||
} else if (StringUtil.inString(name, haystack: "td", "th", "tr")) {
|
||||
} else if ["td", "th", "tr"].contains(name) {
|
||||
try tb.processStartTag("tbody")
|
||||
return try tb.process(t)
|
||||
} else if (name.equals("table")) {
|
||||
|
@ -901,7 +923,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
let processed: Bool = try tb.processEndTag("table")
|
||||
if (processed) // only ignored if in fragment
|
||||
{return try tb.process(t)}
|
||||
} else if (StringUtil.inString(name, haystack: "style", "script")) {
|
||||
} else if ["style", "script"].contains(name) {
|
||||
return try tb.process(t, .InHead)
|
||||
} else if (name.equals("input")) {
|
||||
if (!startTag._attributes.get(key: "type").equalsIgnoreCase(string: "hidden")) {
|
||||
|
@ -932,8 +954,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
tb.popStackToClose("table")
|
||||
}
|
||||
tb.resetInsertionMode()
|
||||
} else if (StringUtil.inString(name,
|
||||
haystack: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
|
||||
} else if TagSets.tableMix.contains(name) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -970,7 +991,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
if (!HtmlTreeBuilderState.isWhitespace(character)) {
|
||||
// InTable anything else section:
|
||||
tb.error(self)
|
||||
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) {
|
||||
if tb.currentElement() != nil && TagSets.table.contains(tb.currentElement()!.nodeName()) {
|
||||
tb.setFosterInserts(true)
|
||||
try tb.process(Token.Char().data(character), .InBody)
|
||||
tb.setFosterInserts(false)
|
||||
|
@ -988,7 +1009,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
}
|
||||
return true
|
||||
case .InCaption:
|
||||
if (t.isEndTag() && t.asEndTag().normalName()!.equals("caption")) {
|
||||
if t.endTagNormalName() == "caption" {
|
||||
let endTag: Token.EndTag = t.asEndTag()
|
||||
let name: String? = endTag.normalName()
|
||||
if (try name != nil && !tb.inTableScope(name!)) {
|
||||
|
@ -1003,18 +1024,21 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
tb.clearFormattingElementsToLastMarker()
|
||||
tb.transition(.InTable)
|
||||
}
|
||||
} else if ((
|
||||
t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
|
||||
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
|
||||
t.isEndTag() && t.asEndTag().normalName()!.equals("table"))
|
||||
) {
|
||||
} else if (t.isStartTag() && TagSets.tableRowsAndCols.contains(t.asStartTag().normalName()!)) ||
|
||||
(t.isEndTag() && t.asEndTag().normalName()!.equals("table"))
|
||||
{
|
||||
// Note: original code relies on && precedence being higher than ||
|
||||
//
|
||||
// if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
|
||||
// haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
|
||||
// t.isEndTag() && t.asEndTag().normalName()!.equals("table"))) {
|
||||
|
||||
tb.error(self)
|
||||
let processed: Bool = try tb.processEndTag("caption")
|
||||
if (processed) {
|
||||
return try tb.process(t)
|
||||
}
|
||||
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName()!,
|
||||
haystack: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
|
||||
} else if let nName = t.endTagNormalName(), TagSets.tableMix2.contains(nName) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -1102,11 +1126,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
tb.clearStackToTableBodyContext()
|
||||
try tb.insert(startTag)
|
||||
tb.transition(.InRow)
|
||||
} else if (StringUtil.inString(name, haystack: "th", "td")) {
|
||||
} else if let name = name, TagSets.thTd.contains(name) {
|
||||
tb.error(self)
|
||||
try tb.processStartTag("tr")
|
||||
return try tb.process(startTag)
|
||||
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
|
||||
} else if let name = name, TagSets.tableMix3.contains(name) {
|
||||
return try exitTableBody(t, tb)
|
||||
} else {
|
||||
return try anythingElse(t, tb)
|
||||
|
@ -1115,8 +1139,8 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
case .EndTag:
|
||||
let endTag: Token.EndTag = t.asEndTag()
|
||||
let name = endTag.normalName()
|
||||
if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
|
||||
if (try !tb.inTableScope(name!)) {
|
||||
if let name = name, TagSets.tableSections.contains(name) {
|
||||
if (try !tb.inTableScope(name)) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -1126,7 +1150,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
}
|
||||
} else if ("table".equals(name)) {
|
||||
return try exitTableBody(t, tb)
|
||||
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
|
||||
} else if let name = name, TagSets.tableMix4.contains(name) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -1155,12 +1179,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
let startTag: Token.StartTag = t.asStartTag()
|
||||
let name: String? = startTag.normalName()
|
||||
|
||||
if (StringUtil.inString(name, haystack: "th", "td")) {
|
||||
if let name = name, TagSets.thTd.contains(name) {
|
||||
tb.clearStackToTableRowContext()
|
||||
try tb.insert(startTag)
|
||||
tb.transition(.InCell)
|
||||
tb.insertMarkerToFormattingElements()
|
||||
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) {
|
||||
} else if let name = name, TagSets.tableMix5.contains(name) {
|
||||
return try handleMissingTr(t, tb)
|
||||
} else {
|
||||
return try anythingElse(t, tb)
|
||||
|
@ -1179,14 +1203,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
tb.transition(.InTableBody)
|
||||
} else if ("table".equals(name)) {
|
||||
return try handleMissingTr(t, tb)
|
||||
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
|
||||
if (try !tb.inTableScope(name!)) {
|
||||
} else if let name = name, TagSets.tableSections.contains(name) {
|
||||
if (try !tb.inTableScope(name)) {
|
||||
tb.error(self)
|
||||
return false
|
||||
}
|
||||
try tb.processEndTag("tr")
|
||||
return try tb.process(t)
|
||||
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th")) {
|
||||
} else if let name = name, TagSets.tableMix6.contains(name) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else {
|
||||
|
@ -1213,24 +1237,24 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
let endTag: Token.EndTag = t.asEndTag()
|
||||
let name: String? = endTag.normalName()
|
||||
|
||||
if (StringUtil.inString(name, haystack: "td", "th")) {
|
||||
if (try !tb.inTableScope(name!)) {
|
||||
if let name = name, TagSets.thTd.contains(name) {
|
||||
if (try !tb.inTableScope(name)) {
|
||||
tb.error(self)
|
||||
tb.transition(.InRow) // might not be in scope if empty: <td /> and processing fake end tag
|
||||
return false
|
||||
}
|
||||
tb.generateImpliedEndTags()
|
||||
if (!name!.equals(tb.currentElement()?.nodeName())) {
|
||||
if (!name.equals(tb.currentElement()?.nodeName())) {
|
||||
tb.error(self)
|
||||
}
|
||||
tb.popStackToClose(name!)
|
||||
tb.popStackToClose(name)
|
||||
tb.clearFormattingElementsToLastMarker()
|
||||
tb.transition(.InRow)
|
||||
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html")) {
|
||||
} else if let name = name, TagSets.tableMix7.contains(name) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else if (StringUtil.inString(name, haystack: "table", "tbody", "tfoot", "thead", "tr")) {
|
||||
if (try !tb.inTableScope(name!)) {
|
||||
} else if let name = name, TagSets.table.contains(name) {
|
||||
if (try !tb.inTableScope(name)) {
|
||||
tb.error(self)
|
||||
return false
|
||||
}
|
||||
|
@ -1239,9 +1263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else {
|
||||
return try anythingElse(t, tb)
|
||||
}
|
||||
} else if (t.isStartTag() &&
|
||||
StringUtil.inString(t.asStartTag().normalName(),
|
||||
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
|
||||
} else if let nName = t.startTagNormalName(), TagSets.tableRowsAndCols.contains(nName) {
|
||||
if (try !(tb.inTableScope("td") || tb.inTableScope("th"))) {
|
||||
tb.error(self)
|
||||
return false
|
||||
|
@ -1293,7 +1315,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if ("select".equals(name)) {
|
||||
tb.error(self)
|
||||
return try tb.processEndTag("select")
|
||||
} else if (StringUtil.inString(name, haystack: "input", "keygen", "textarea")) {
|
||||
} else if let name = name, TagSets.inputKeygenTextarea.contains(name) {
|
||||
tb.error(self)
|
||||
if (try !tb.inSelectScope("select")) {
|
||||
return false // frag
|
||||
|
@ -1346,17 +1368,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
}
|
||||
return true
|
||||
case .InSelectInTable:
|
||||
if (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
|
||||
if let nName = t.startTagNormalName(), TagSets.tableMix8.contains(nName) {
|
||||
tb.error(self)
|
||||
try tb.processEndTag("select")
|
||||
return try tb.process(t)
|
||||
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
|
||||
} else if let nName = t.endTagNormalName(), TagSets.tableMix8.contains(nName) {
|
||||
tb.error(self)
|
||||
if (try t.asEndTag().normalName() != nil && tb.inTableScope(t.asEndTag().normalName()!)) {
|
||||
if try tb.inTableScope(nName) {
|
||||
try tb.processEndTag("select")
|
||||
return try (tb.process(t))
|
||||
} else {
|
||||
return false
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
return try tb.process(t, .InSelect)
|
||||
|
@ -1369,9 +1391,9 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if (t.isDoctype()) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) {
|
||||
} else if t.startTagNormalName() == "html" {
|
||||
return try tb.process(t, .InBody)
|
||||
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) {
|
||||
} else if t.endTagNormalName() == "html" {
|
||||
if (tb.isFragmentParsing()) {
|
||||
tb.error(self)
|
||||
return false
|
||||
|
@ -1410,7 +1432,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
tb.error(self)
|
||||
return false
|
||||
}
|
||||
} else if (t.isEndTag() && "frameset".equals(t.asEndTag().normalName())) {
|
||||
} else if t.endTagNormalName() == "frameset" {
|
||||
if ("html".equals(tb.currentElement()?.nodeName())) { // frag
|
||||
tb.error(self)
|
||||
return false
|
||||
|
@ -1439,11 +1461,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
} else if (t.isDoctype()) {
|
||||
tb.error(self)
|
||||
return false
|
||||
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) {
|
||||
} else if t.startTagNormalName() == "html" {
|
||||
return try tb.process(t, .InBody)
|
||||
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) {
|
||||
} else if t.endTagNormalName() == "html" {
|
||||
tb.transition(.AfterAfterFrameset)
|
||||
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) {
|
||||
} else if t.startTagNormalName() == "noframes" {
|
||||
return try tb.process(t, .InHead)
|
||||
} else if (t.isEOF()) {
|
||||
// cool your heels, we're complete
|
||||
|
@ -1470,11 +1492,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
|
||||
if (t.isComment()) {
|
||||
try tb.insert(t.asComment())
|
||||
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.isStartTag() && "html".equals(t.asStartTag().normalName()))) {
|
||||
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.startTagNormalName() == "html")) {
|
||||
return try tb.process(t, .InBody)
|
||||
} else if (t.isEOF()) {
|
||||
// nice work chuck
|
||||
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) {
|
||||
} else if t.startTagNormalName() == "noframes" {
|
||||
return try tb.process(t, .InHead)
|
||||
} else {
|
||||
tb.error(self)
|
||||
|
@ -1547,3 +1569,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
|
|||
fileprivate static let InBodyEndTableFosters: [String] = ["table", "tbody", "tfoot", "thead", "tr"]
|
||||
}
|
||||
}
|
||||
|
||||
fileprivate extension Token {
|
||||
|
||||
func endTagNormalName() -> String? {
|
||||
guard isEndTag() else { return nil }
|
||||
return asEndTag().normalName()
|
||||
}
|
||||
|
||||
func startTagNormalName() -> String? {
|
||||
guard isStartTag() else { return nil }
|
||||
return asStartTag().normalName()
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -49,9 +49,7 @@ open class ParseSettings {
|
|||
|
||||
open func normalizeAttributes(_ attributes: Attributes)throws ->Attributes {
|
||||
if (!preserveAttributeCase) {
|
||||
for attr in attributes {
|
||||
try attr.setKey(key: attr.getKey().lowercased())
|
||||
}
|
||||
attributes.lowercaseAllKeys()
|
||||
}
|
||||
return attributes
|
||||
}
|
||||
|
|
|
@ -25,8 +25,9 @@ extension String {
|
|||
}
|
||||
|
||||
func unicodeScalar(_ i: Int) -> UnicodeScalar {
|
||||
return self.unicodeScalars.prefix(i+1).last!
|
||||
}
|
||||
let ix = unicodeScalars.index(unicodeScalars.startIndex, offsetBy: i)
|
||||
return unicodeScalars[ix]
|
||||
}
|
||||
|
||||
func string(_ offset: Int, _ count: Int) -> String {
|
||||
let truncStart = self.unicodeScalars.count-offset
|
||||
|
@ -55,7 +56,7 @@ extension String {
|
|||
func startsWith(_ string: String) -> Bool {
|
||||
return self.hasPrefix(string)
|
||||
}
|
||||
|
||||
|
||||
func indexOf(_ substring: String, _ offset: Int ) -> Int {
|
||||
if(offset > count) {return -1}
|
||||
|
||||
|
@ -81,12 +82,22 @@ extension String {
|
|||
}
|
||||
|
||||
func trim() -> String {
|
||||
return trimmingCharacters(in: NSCharacterSet.whitespacesAndNewlines)
|
||||
// trimmingCharacters() in the stdlib is not very efficiently
|
||||
// implemented, perhaps because it always creates a new string.
|
||||
// Avoid actually calling it if it's not needed.
|
||||
guard count > 0 else { return self }
|
||||
let (firstChar, lastChar) = (first!, last!)
|
||||
if firstChar.isWhitespace || lastChar.isWhitespace || firstChar == "\n" || lastChar == "\n" {
|
||||
return trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
return self
|
||||
}
|
||||
|
||||
func equalsIgnoreCase(string: String?) -> Bool {
|
||||
if(string == nil) {return false}
|
||||
return string!.lowercased() == lowercased()
|
||||
if let string = string {
|
||||
return caseInsensitiveCompare(string) == .orderedSame
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
static func toHexString(n: Int) -> String {
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/**
|
||||
Supports creation of a String from pieces
|
||||
https://gist.github.com/kristopherjohnson/1fc55e811d944a430289
|
||||
Based on https://gist.github.com/kristopherjohnson/1fc55e811d944a430289
|
||||
*/
|
||||
open class StringBuilder {
|
||||
fileprivate var stringValue: Array<Character>
|
||||
fileprivate var buffer: [String] = []
|
||||
|
||||
/**
|
||||
Construct with initial String contents
|
||||
|
@ -11,11 +11,13 @@ open class StringBuilder {
|
|||
:param: string Initial value; defaults to empty string
|
||||
*/
|
||||
public init(string: String = "") {
|
||||
self.stringValue = Array(string)
|
||||
if string != "" {
|
||||
buffer.append(string)
|
||||
}
|
||||
}
|
||||
|
||||
public init(_ size: Int) {
|
||||
self.stringValue = Array()
|
||||
self.buffer = Array()
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -24,15 +26,18 @@ open class StringBuilder {
|
|||
:return: String
|
||||
*/
|
||||
open func toString() -> String {
|
||||
return String(stringValue)
|
||||
return buffer.reduce("", +)
|
||||
}
|
||||
|
||||
/**
|
||||
Return the current length of the String object
|
||||
*/
|
||||
open var length: Int {
|
||||
return self.stringValue.count
|
||||
//return countElements(stringValue)
|
||||
open var xlength: Int {
|
||||
return buffer.map { $0.count }.reduce(0, +)
|
||||
}
|
||||
|
||||
open var isEmpty: Bool {
|
||||
return buffer.isEmpty
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -43,29 +48,27 @@ open class StringBuilder {
|
|||
:return: reference to this StringBuilder instance
|
||||
*/
|
||||
open func append(_ string: String) {
|
||||
stringValue.append(contentsOf: string)
|
||||
buffer.append(string)
|
||||
}
|
||||
|
||||
open func appendCodePoint(_ chr: Character) {
|
||||
stringValue.append(chr)
|
||||
buffer.append(String(chr))
|
||||
}
|
||||
|
||||
open func appendCodePoints(_ chr: [Character]) {
|
||||
stringValue.append(contentsOf: chr)
|
||||
buffer.append(String(chr))
|
||||
}
|
||||
|
||||
open func appendCodePoint(_ ch: Int) {
|
||||
stringValue.append(Character(UnicodeScalar(ch)!))
|
||||
buffer.append(String(UnicodeScalar(ch)!))
|
||||
}
|
||||
|
||||
open func appendCodePoint(_ ch: UnicodeScalar) {
|
||||
stringValue.append(Character(ch))
|
||||
buffer.append(String(ch))
|
||||
}
|
||||
|
||||
open func appendCodePoints(_ chr: [UnicodeScalar]) {
|
||||
for c in chr {
|
||||
appendCodePoint(c)
|
||||
}
|
||||
buffer.append(String(String.UnicodeScalarView(chr)))
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -77,19 +80,13 @@ open class StringBuilder {
|
|||
*/
|
||||
@discardableResult
|
||||
open func append<T: CustomStringConvertible>(_ value: T) -> StringBuilder {
|
||||
stringValue.append(contentsOf: value.description)
|
||||
buffer.append(value.description)
|
||||
return self
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
open func append(_ value: UnicodeScalar) -> StringBuilder {
|
||||
stringValue.append(contentsOf: value.description)
|
||||
return self
|
||||
}
|
||||
|
||||
@discardableResult
|
||||
open func insert<T: CustomStringConvertible>(_ offset: Int, _ value: T) -> StringBuilder {
|
||||
stringValue.insert(contentsOf: value.description, at: offset)
|
||||
buffer.append(value.description)
|
||||
return self
|
||||
}
|
||||
|
||||
|
@ -102,7 +99,8 @@ open class StringBuilder {
|
|||
*/
|
||||
@discardableResult
|
||||
open func appendLine(_ string: String) -> StringBuilder {
|
||||
stringValue.append(contentsOf: "\n")
|
||||
buffer.append(string)
|
||||
buffer.append("\n")
|
||||
return self
|
||||
}
|
||||
|
||||
|
@ -115,8 +113,8 @@ open class StringBuilder {
|
|||
*/
|
||||
@discardableResult
|
||||
open func appendLine<T: CustomStringConvertible>(_ value: T) -> StringBuilder {
|
||||
stringValue.append(contentsOf: value.description)
|
||||
stringValue.append(contentsOf: "\n")
|
||||
buffer.append(value.description)
|
||||
buffer.append("\n")
|
||||
return self
|
||||
}
|
||||
|
||||
|
@ -127,7 +125,7 @@ open class StringBuilder {
|
|||
*/
|
||||
@discardableResult
|
||||
open func clear() -> StringBuilder {
|
||||
stringValue = Array()
|
||||
buffer.removeAll(keepingCapacity: true)
|
||||
return self
|
||||
}
|
||||
}
|
||||
|
|
|
@ -165,19 +165,6 @@ open class StringUtil {
|
|||
}
|
||||
}
|
||||
|
||||
public static func inString(_ needle: String?, haystack: String...) -> Bool {
|
||||
return inString(needle, haystack)
|
||||
}
|
||||
public static func inString(_ needle: String?, _ haystack: [String?]) -> Bool {
|
||||
if(needle == nil) {return false}
|
||||
for hay in haystack {
|
||||
if(hay != nil && hay! == needle!) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// open static func inSorted(_ needle: String, haystack: [String]) -> Bool {
|
||||
// return binarySearch(haystack, searchItem: needle) >= 0
|
||||
// }
|
||||
|
|
|
@ -116,7 +116,7 @@ open class Token {
|
|||
if (_pendingAttributeName != nil) {
|
||||
var attribute: Attribute
|
||||
if (_hasPendingAttributeValue) {
|
||||
attribute = try Attribute(key: _pendingAttributeName!, value: _pendingAttributeValue.length > 0 ? _pendingAttributeValue.toString() : _pendingAttributeValueS!)
|
||||
attribute = try Attribute(key: _pendingAttributeName!, value: !_pendingAttributeValue.isEmpty ? _pendingAttributeValue.toString() : _pendingAttributeValueS!)
|
||||
} else if (_hasEmptyAttributeValue) {
|
||||
attribute = try Attribute(key: _pendingAttributeName!, value: "")
|
||||
} else {
|
||||
|
@ -183,7 +183,7 @@ open class Token {
|
|||
|
||||
func appendAttributeValue(_ append: String) {
|
||||
ensureAttributeValue()
|
||||
if (_pendingAttributeValue.length == 0) {
|
||||
if _pendingAttributeValue.isEmpty {
|
||||
_pendingAttributeValueS = append
|
||||
} else {
|
||||
_pendingAttributeValue.append(append)
|
||||
|
|
|
@ -47,7 +47,7 @@ final class Tokeniser {
|
|||
}
|
||||
|
||||
// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
|
||||
if (charsBuilder.length > 0) {
|
||||
if !charsBuilder.isEmpty {
|
||||
let str: String = charsBuilder.toString()
|
||||
charsBuilder.clear()
|
||||
charsString = nil
|
||||
|
@ -88,7 +88,7 @@ final class Tokeniser {
|
|||
if (charsString == nil) {
|
||||
charsString = str
|
||||
} else {
|
||||
if (charsBuilder.length == 0) { // switching to string builder as more than one emit before read
|
||||
if charsBuilder.isEmpty { // switching to string builder as more than one emit before read
|
||||
charsBuilder.append(charsString!)
|
||||
}
|
||||
charsBuilder.append(str)
|
||||
|
@ -124,9 +124,6 @@ final class Tokeniser {
|
|||
selfClosingFlagAcknowledged = true
|
||||
}
|
||||
|
||||
private var codepointHolder: [UnicodeScalar] = [UnicodeScalar(0)!] // holder to not have to keep creating arrays
|
||||
private var multipointHolder: [UnicodeScalar] = [UnicodeScalar(0)!, UnicodeScalar(0)!]
|
||||
|
||||
func consumeCharacterReference(_ additionalAllowedCharacter: UnicodeScalar?, _ inAttribute: Bool)throws->[UnicodeScalar]? {
|
||||
if (reader.isEmpty()) {
|
||||
return nil
|
||||
|
@ -138,7 +135,6 @@ final class Tokeniser {
|
|||
return nil
|
||||
}
|
||||
|
||||
var codeRef: [UnicodeScalar] = codepointHolder
|
||||
reader.markPos()
|
||||
if (reader.matchConsume("#")) { // numbered
|
||||
let isHexMode: Bool = reader.matchConsumeIgnoreCase("X")
|
||||
|
@ -160,13 +156,11 @@ final class Tokeniser {
|
|||
|
||||
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
|
||||
characterReferenceError("character outside of valid range")
|
||||
codeRef[0] = Tokeniser.replacementChar
|
||||
return codeRef
|
||||
return [Tokeniser.replacementChar]
|
||||
} else {
|
||||
// todo: implement number replacement table
|
||||
// todo: check for extra illegal unicode points as parse errors
|
||||
codeRef[0] = UnicodeScalar(charval)!
|
||||
return codeRef
|
||||
return [UnicodeScalar(charval)!]
|
||||
}
|
||||
} else { // named
|
||||
// get as many letters as possible, and look for matching entities.
|
||||
|
@ -190,16 +184,14 @@ final class Tokeniser {
|
|||
if (!reader.matchConsume(";")) {
|
||||
characterReferenceError("missing semicolon") // missing semi
|
||||
}
|
||||
let numChars: Int = Entities.codepointsForName(nameRef, codepoints: &multipointHolder)
|
||||
if (numChars == 1) {
|
||||
codeRef[0] = multipointHolder[0]
|
||||
return codeRef
|
||||
} else if (numChars == 2) {
|
||||
return multipointHolder
|
||||
} else {
|
||||
try Validate.fail(msg: "Unexpected characters returned for \(nameRef) num: \(numChars)")
|
||||
return multipointHolder
|
||||
if let points = Entities.codepointsForName(nameRef) {
|
||||
if points.count > 2 {
|
||||
try Validate.fail(msg: "Unexpected characters returned for \(nameRef) num: \(points.count)")
|
||||
}
|
||||
return points
|
||||
}
|
||||
try Validate.fail(msg: "Entity name not found: \(nameRef)")
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -84,12 +84,22 @@
|
|||
8CE418721DAA568700240B42 /* SerializationException.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE4184E1DAA568600240B42 /* SerializationException.swift */; };
|
||||
8CE418731DAA568700240B42 /* ArrayExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418501DAA568600240B42 /* ArrayExt.swift */; };
|
||||
8CE418741DAA568700240B42 /* CharacterExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418511DAA568600240B42 /* CharacterExt.swift */; };
|
||||
8CE418761DAA568700240B42 /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
|
||||
8CE418781DAA568700240B42 /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418551DAA568600240B42 /* StreamReader.swift */; };
|
||||
8CE4187A1DAA568700240B42 /* SwiftSoup.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418571DAA568600240B42 /* SwiftSoup.swift */; };
|
||||
8CEA29591DAC112B0064A341 /* CharacterReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA29581DAC112B0064A341 /* CharacterReader.swift */; };
|
||||
8CEA295B1DAC23820064A341 /* String.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA295A1DAC23820064A341 /* String.swift */; };
|
||||
BD3B5B6A1FBED933001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
|
||||
BB57C2D3222CAF8E008933AA /* SwiftSoup.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */; };
|
||||
BB57C2DB222CB0C6008933AA /* ParserBenchmark.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */; };
|
||||
BB57C2E2222CB0E3008933AA /* Google.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DC222CB0E2008933AA /* Google.html */; };
|
||||
BB57C2E3222CB0E3008933AA /* Wikipedia.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DD222CB0E2008933AA /* Wikipedia.html */; };
|
||||
BB57C2E4222CB0E3008933AA /* Reuters.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DE222CB0E2008933AA /* Reuters.html */; };
|
||||
BB57C2E5222CB0E3008933AA /* Wirecutter.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DF222CB0E2008933AA /* Wirecutter.html */; };
|
||||
BB57C2E6222CB0E3008933AA /* GitHub.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2E0222CB0E3008933AA /* GitHub.html */; };
|
||||
BB57C2E7222CB0E3008933AA /* Amazon.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2E1222CB0E3008933AA /* Amazon.html */; };
|
||||
BB57C2EA222CCCB6008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
|
||||
BB57C2EB222CCCC3008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
|
||||
BB57C2EC222CCCC5008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
|
||||
BB57C2ED222CCCC6008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
|
||||
BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
|
||||
BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
|
||||
BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
|
||||
|
@ -146,7 +156,6 @@
|
|||
BD3B5BA01FBED933001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; };
|
||||
BD3B5BA11FBED933001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; };
|
||||
BD3B5BA41FBED933001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
BD3B5BAD1FC063BD001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
|
||||
BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
|
||||
BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
|
||||
BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
|
||||
|
@ -203,7 +212,6 @@
|
|||
BD3B5BE31FC063BD001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; };
|
||||
BD3B5BE41FC063BD001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; };
|
||||
BD3B5BE71FC063BD001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
BD3B5BF01FC06423001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
|
||||
BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
|
||||
BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
|
||||
BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
|
||||
|
@ -270,6 +278,13 @@
|
|||
remoteGlobalIDString = 8CE418151DAA54A900240B42;
|
||||
remoteInfo = SwiftSoup;
|
||||
};
|
||||
BB57C2D4222CAF8E008933AA /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 8CE4180D1DAA54A900240B42 /* Project object */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = BD3B5B681FBED933001FDB3B;
|
||||
remoteInfo = "SwiftSoup-macOS";
|
||||
};
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
|
@ -353,12 +368,21 @@
|
|||
8CE4184E1DAA568600240B42 /* SerializationException.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SerializationException.swift; sourceTree = "<group>"; };
|
||||
8CE418501DAA568600240B42 /* ArrayExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ArrayExt.swift; sourceTree = "<group>"; };
|
||||
8CE418511DAA568600240B42 /* CharacterExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterExt.swift; sourceTree = "<group>"; };
|
||||
8CE418531DAA568600240B42 /* OrderedDictionary.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OrderedDictionary.swift; sourceTree = "<group>"; };
|
||||
8CE418541DAA568600240B42 /* Pattern.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Pattern.swift; sourceTree = "<group>"; };
|
||||
8CE418551DAA568600240B42 /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = "<group>"; };
|
||||
8CE418571DAA568600240B42 /* SwiftSoup.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SwiftSoup.swift; sourceTree = "<group>"; };
|
||||
8CEA29581DAC112B0064A341 /* CharacterReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterReader.swift; sourceTree = "<group>"; };
|
||||
8CEA295A1DAC23820064A341 /* String.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = String.swift; sourceTree = "<group>"; };
|
||||
BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "SwiftSoupTests-macOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
BB57C2D2222CAF8E008933AA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParserBenchmark.swift; sourceTree = "<group>"; };
|
||||
BB57C2DC222CB0E2008933AA /* Google.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Google.html; sourceTree = "<group>"; };
|
||||
BB57C2DD222CB0E2008933AA /* Wikipedia.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Wikipedia.html; sourceTree = "<group>"; };
|
||||
BB57C2DE222CB0E2008933AA /* Reuters.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Reuters.html; sourceTree = "<group>"; };
|
||||
BB57C2DF222CB0E2008933AA /* Wirecutter.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Wirecutter.html; sourceTree = "<group>"; };
|
||||
BB57C2E0222CB0E3008933AA /* GitHub.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = GitHub.html; sourceTree = "<group>"; };
|
||||
BB57C2E1222CB0E3008933AA /* Amazon.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Amazon.html; sourceTree = "<group>"; };
|
||||
BB57C2E9222CCCB5008933AA /* BinarySearch.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BinarySearch.swift; sourceTree = "<group>"; };
|
||||
BD36975B20135EBB00D8FAC6 /* SwiftSoup.podspec */ = {isa = PBXFileReference; lastKnownFileType = text; path = SwiftSoup.podspec; sourceTree = "<group>"; };
|
||||
BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = SwiftSoup.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
BD3B5BAA1FBED934001FDB3B /* InfoMac.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = InfoMac.plist; path = /Users/nabil/Documents/nabil/SwiftSoup/Sources/InfoMac.plist; sourceTree = "<absolute>"; };
|
||||
|
@ -385,6 +409,14 @@
|
|||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BB57C2CB222CAF8E008933AA /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BB57C2D3222CAF8E008933AA /* SwiftSoup.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BD3B5BA21FBED933001FDB3B /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
|
@ -420,9 +452,9 @@
|
|||
8C7ED6731E00B0690032A27C /* shared */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
BB57C2E9222CCCB5008933AA /* BinarySearch.swift */,
|
||||
8CE418501DAA568600240B42 /* ArrayExt.swift */,
|
||||
8CE418511DAA568600240B42 /* CharacterExt.swift */,
|
||||
8CE418531DAA568600240B42 /* OrderedDictionary.swift */,
|
||||
8CE418541DAA568600240B42 /* Pattern.swift */,
|
||||
8CE418551DAA568600240B42 /* StreamReader.swift */,
|
||||
8CEA295A1DAC23820064A341 /* String.swift */,
|
||||
|
@ -526,6 +558,7 @@
|
|||
BD76883E206D8B6900B7F940 /* CHANGELOG.md */,
|
||||
8CE418181DAA54A900240B42 /* Sources */,
|
||||
8CE418231DAA54A900240B42 /* Tests */,
|
||||
BB57C2CF222CAF8E008933AA /* Tests-macOS */,
|
||||
8CE418171DAA54A900240B42 /* Products */,
|
||||
);
|
||||
indentWidth = 4;
|
||||
|
@ -540,6 +573,7 @@
|
|||
BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */,
|
||||
BD3B5BEC1FC063BD001FDB3B /* SwiftSoup.framework */,
|
||||
BD3B5C2F1FC06423001FDB3B /* SwiftSoup.framework */,
|
||||
BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
|
@ -623,6 +657,29 @@
|
|||
name = select;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
BB57C2CF222CAF8E008933AA /* Tests-macOS */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */,
|
||||
BB57C2E8222CB0EE008933AA /* corpus */,
|
||||
BB57C2D2222CAF8E008933AA /* Info.plist */,
|
||||
);
|
||||
path = "Tests-macOS";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
BB57C2E8222CB0EE008933AA /* corpus */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
BB57C2E1222CB0E3008933AA /* Amazon.html */,
|
||||
BB57C2E0222CB0E3008933AA /* GitHub.html */,
|
||||
BB57C2DC222CB0E2008933AA /* Google.html */,
|
||||
BB57C2DE222CB0E2008933AA /* Reuters.html */,
|
||||
BB57C2DD222CB0E2008933AA /* Wikipedia.html */,
|
||||
BB57C2DF222CB0E2008933AA /* Wirecutter.html */,
|
||||
);
|
||||
path = corpus;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXHeadersBuildPhase section */
|
||||
|
@ -697,6 +754,24 @@
|
|||
productReference = 8CE4181F1DAA54A900240B42 /* SwiftSoupTests.xctest */;
|
||||
productType = "com.apple.product-type.bundle.unit-test";
|
||||
};
|
||||
BB57C2CD222CAF8E008933AA /* SwiftSoupTests-macOS */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = BB57C2D6222CAF8E008933AA /* Build configuration list for PBXNativeTarget "SwiftSoupTests-macOS" */;
|
||||
buildPhases = (
|
||||
BB57C2CA222CAF8E008933AA /* Sources */,
|
||||
BB57C2CB222CAF8E008933AA /* Frameworks */,
|
||||
BB57C2CC222CAF8E008933AA /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
BB57C2D5222CAF8E008933AA /* PBXTargetDependency */,
|
||||
);
|
||||
name = "SwiftSoupTests-macOS";
|
||||
productName = "SwiftSoupTests-macOS";
|
||||
productReference = BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */;
|
||||
productType = "com.apple.product-type.bundle.unit-test";
|
||||
};
|
||||
BD3B5B681FBED933001FDB3B /* SwiftSoup-macOS */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */;
|
||||
|
@ -757,8 +832,8 @@
|
|||
8CE4180D1DAA54A900240B42 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
LastSwiftUpdateCheck = 0800;
|
||||
LastUpgradeCheck = 1020;
|
||||
LastSwiftUpdateCheck = 1020;
|
||||
LastUpgradeCheck = 0930;
|
||||
ORGANIZATIONNAME = "Nabil Chatbi";
|
||||
TargetAttributes = {
|
||||
8CE418151DAA54A900240B42 = {
|
||||
|
@ -771,6 +846,11 @@
|
|||
LastSwiftMigration = 1020;
|
||||
ProvisioningStyle = Manual;
|
||||
};
|
||||
BB57C2CD222CAF8E008933AA = {
|
||||
CreatedOnToolsVersion = 10.2;
|
||||
DevelopmentTeam = 5MC4PNHTX6;
|
||||
ProvisioningStyle = Automatic;
|
||||
};
|
||||
BD3B5BAB1FC063BD001FDB3B = {
|
||||
ProvisioningStyle = Manual;
|
||||
};
|
||||
|
@ -797,6 +877,7 @@
|
|||
BD3B5BAB1FC063BD001FDB3B /* SwiftSoup-tvOS */,
|
||||
BD3B5BEE1FC06423001FDB3B /* SwiftSoup-watchOS */,
|
||||
8CE4181E1DAA54A900240B42 /* SwiftSoupTests */,
|
||||
BB57C2CD222CAF8E008933AA /* SwiftSoupTests-macOS */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
@ -816,6 +897,19 @@
|
|||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BB57C2CC222CAF8E008933AA /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BB57C2E5222CB0E3008933AA /* Wirecutter.html in Resources */,
|
||||
BB57C2E4222CB0E3008933AA /* Reuters.html in Resources */,
|
||||
BB57C2E3222CB0E3008933AA /* Wikipedia.html in Resources */,
|
||||
BB57C2E6222CB0E3008933AA /* GitHub.html in Resources */,
|
||||
BB57C2E7222CB0E3008933AA /* Amazon.html in Resources */,
|
||||
BB57C2E2222CB0E3008933AA /* Google.html in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BD3B5BA51FBED933001FDB3B /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
|
@ -844,7 +938,6 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
8CE418761DAA568700240B42 /* OrderedDictionary.swift in Sources */,
|
||||
8CC2FD8D1DB12382002CB469 /* Whitelist.swift in Sources */,
|
||||
8C19C82F1DB7E5D200B8FC22 /* Tokeniser.swift in Sources */,
|
||||
8CD4E8F01E12B0FF0039B951 /* Pattern.swift in Sources */,
|
||||
|
@ -877,6 +970,7 @@
|
|||
8CE4186D1DAA568700240B42 /* FormElement.swift in Sources */,
|
||||
8C73DB4B1DDA605900233A68 /* UnicodeScalar.swift in Sources */,
|
||||
8CE418601DAA568600240B42 /* Validate.swift in Sources */,
|
||||
BB57C2EA222CCCB6008933AA /* BinarySearch.swift in Sources */,
|
||||
8C3617C11DBAC2AE00E00CFE /* Selector.swift in Sources */,
|
||||
8CE418711DAA568700240B42 /* Parser.swift in Sources */,
|
||||
8CE418701DAA568700240B42 /* XmlDeclaration.swift in Sources */,
|
||||
|
@ -934,11 +1028,18 @@
|
|||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BB57C2CA222CAF8E008933AA /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BB57C2DB222CB0C6008933AA /* ParserBenchmark.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
BD3B5B691FBED933001FDB3B /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BD3B5B6A1FBED933001FDB3B /* OrderedDictionary.swift in Sources */,
|
||||
BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */,
|
||||
BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */,
|
||||
BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */,
|
||||
|
@ -971,6 +1072,7 @@
|
|||
BD3B5B881FBED933001FDB3B /* FormElement.swift in Sources */,
|
||||
BD3B5B891FBED933001FDB3B /* UnicodeScalar.swift in Sources */,
|
||||
BD3B5B8A1FBED933001FDB3B /* Validate.swift in Sources */,
|
||||
BB57C2EB222CCCC3008933AA /* BinarySearch.swift in Sources */,
|
||||
BD3B5B8B1FBED933001FDB3B /* Selector.swift in Sources */,
|
||||
BD3B5B8C1FBED933001FDB3B /* Parser.swift in Sources */,
|
||||
BD3B5B8D1FBED933001FDB3B /* XmlDeclaration.swift in Sources */,
|
||||
|
@ -1001,7 +1103,6 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BD3B5BAD1FC063BD001FDB3B /* OrderedDictionary.swift in Sources */,
|
||||
BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */,
|
||||
BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */,
|
||||
BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */,
|
||||
|
@ -1034,6 +1135,7 @@
|
|||
BD3B5BCB1FC063BD001FDB3B /* FormElement.swift in Sources */,
|
||||
BD3B5BCC1FC063BD001FDB3B /* UnicodeScalar.swift in Sources */,
|
||||
BD3B5BCD1FC063BD001FDB3B /* Validate.swift in Sources */,
|
||||
BB57C2EC222CCCC5008933AA /* BinarySearch.swift in Sources */,
|
||||
BD3B5BCE1FC063BD001FDB3B /* Selector.swift in Sources */,
|
||||
BD3B5BCF1FC063BD001FDB3B /* Parser.swift in Sources */,
|
||||
BD3B5BD01FC063BD001FDB3B /* XmlDeclaration.swift in Sources */,
|
||||
|
@ -1064,7 +1166,6 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
BD3B5BF01FC06423001FDB3B /* OrderedDictionary.swift in Sources */,
|
||||
BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */,
|
||||
BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */,
|
||||
BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */,
|
||||
|
@ -1097,6 +1198,7 @@
|
|||
BD3B5C0E1FC06423001FDB3B /* FormElement.swift in Sources */,
|
||||
BD3B5C0F1FC06423001FDB3B /* UnicodeScalar.swift in Sources */,
|
||||
BD3B5C101FC06423001FDB3B /* Validate.swift in Sources */,
|
||||
BB57C2ED222CCCC6008933AA /* BinarySearch.swift in Sources */,
|
||||
BD3B5C111FC06423001FDB3B /* Selector.swift in Sources */,
|
||||
BD3B5C121FC06423001FDB3B /* Parser.swift in Sources */,
|
||||
BD3B5C131FC06423001FDB3B /* XmlDeclaration.swift in Sources */,
|
||||
|
@ -1131,6 +1233,11 @@
|
|||
target = 8CE418151DAA54A900240B42 /* SwiftSoup-iOS */;
|
||||
targetProxy = 8CE418211DAA54A900240B42 /* PBXContainerItemProxy */;
|
||||
};
|
||||
BB57C2D5222CAF8E008933AA /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
target = BD3B5B681FBED933001FDB3B /* SwiftSoup-macOS */;
|
||||
targetProxy = BB57C2D4222CAF8E008933AA /* PBXContainerItemProxy */;
|
||||
};
|
||||
/* End PBXTargetDependency section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
|
@ -1361,6 +1468,53 @@
|
|||
};
|
||||
name = Release;
|
||||
};
|
||||
BB57C2D7222CAF8E008933AA /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
DEVELOPMENT_TEAM = 5MC4PNHTX6;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||
INFOPLIST_FILE = "Tests-macOS/Info.plist";
|
||||
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks";
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.14;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.scinfu.SwiftSoupTests-macOS";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SDKROOT = macosx;
|
||||
SWIFT_VERSION = 5.0;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
BB57C2D8222CAF8E008933AA /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CODE_SIGN_IDENTITY = "-";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
DEVELOPMENT_TEAM = 5MC4PNHTX6;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||
INFOPLIST_FILE = "Tests-macOS/Info.plist";
|
||||
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks";
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.14;
|
||||
MTL_FAST_MATH = YES;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.scinfu.SwiftSoupTests-macOS";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SDKROOT = macosx;
|
||||
SWIFT_VERSION = 5.0;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
BD3B5BA71FBED933001FDB3B /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
|
@ -1580,6 +1734,15 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
BB57C2D6222CAF8E008933AA /* Build configuration list for PBXNativeTarget "SwiftSoupTests-macOS" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
BB57C2D7222CAF8E008933AA /* Debug */,
|
||||
BB57C2D8222CAF8E008933AA /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
LastUpgradeVersion = "1020"
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
|
||||
BuildableName = "SwiftSoup.framework"
|
||||
BlueprintName = "SwiftSoup-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "NO"
|
||||
buildForProfiling = "NO"
|
||||
buildForArchiving = "NO"
|
||||
buildForAnalyzing = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BB57C2CD222CAF8E008933AA"
|
||||
BuildableName = "SwiftSoupTests-macOS.xctest"
|
||||
BlueprintName = "SwiftSoupTests-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
buildConfiguration = "Release"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES">
|
||||
<Testables>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BB57C2CD222CAF8E008933AA"
|
||||
BuildableName = "SwiftSoupTests-macOS.xctest"
|
||||
BlueprintName = "SwiftSoupTests-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
</Testables>
|
||||
<MacroExpansion>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
|
||||
BuildableName = "SwiftSoup.framework"
|
||||
BlueprintName = "SwiftSoup-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
buildConfiguration = "Debug"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
ignoresPersistentStateOnLaunch = "NO"
|
||||
debugDocumentVersioning = "YES"
|
||||
debugServiceExtension = "internal"
|
||||
allowLocationSimulation = "YES">
|
||||
<MacroExpansion>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
|
||||
BuildableName = "SwiftSoup.framework"
|
||||
BlueprintName = "SwiftSoup-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
buildConfiguration = "Release"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
debugDocumentVersioning = "YES">
|
||||
<MacroExpansion>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
|
||||
BuildableName = "SwiftSoup.framework"
|
||||
BlueprintName = "SwiftSoup-macOS"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
|
@ -20,6 +20,20 @@
|
|||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "NO"
|
||||
buildForProfiling = "NO"
|
||||
buildForArchiving = "NO"
|
||||
buildForAnalyzing = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "8CE4181E1DAA54A900240B42"
|
||||
BuildableName = "SwiftSoupTests.xctest"
|
||||
BlueprintName = "SwiftSoupTests"
|
||||
ReferencedContainer = "container:SwiftSoup.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
|
@ -29,9 +43,7 @@
|
|||
shouldUseLaunchSchemeArgsEnv = "YES">
|
||||
<Testables>
|
||||
<TestableReference
|
||||
skipped = "NO"
|
||||
parallelizable = "YES"
|
||||
testExecutionOrdering = "random">
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "8CE4181E1DAA54A900240B42"
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>$(DEVELOPMENT_LANGUAGE)</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>$(EXECUTABLE_NAME)</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>$(PRODUCT_NAME)</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>BNDL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
</dict>
|
||||
</plist>
|
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// ParserBenchmark.swift
|
||||
// SwiftSoupTests
|
||||
//
|
||||
// Created by garth on 2/26/19.
|
||||
// Copyright © 2019 Nabil Chatbi. All rights reserved.
|
||||
//
|
||||
|
||||
import XCTest
|
||||
import SwiftSoup
|
||||
|
||||
class ParserBenchmark: XCTestCase {
|
||||
|
||||
enum Const {
|
||||
static var corpusHTMLData: [String] = []
|
||||
static let repetitions = 5
|
||||
}
|
||||
|
||||
override func setUp() {
|
||||
let bundle = Bundle(for: type(of: self))
|
||||
let urls = bundle.urls(forResourcesWithExtension: ".html", subdirectory: nil)
|
||||
Const.corpusHTMLData = urls!.compactMap { try? Data(contentsOf: $0) }.map { String(decoding: $0, as: UTF8.self) }
|
||||
}
|
||||
|
||||
func testParserPerformance() throws {
|
||||
measure {
|
||||
for htmlDoc in Const.corpusHTMLData {
|
||||
for _ in 1...Const.repetitions {
|
||||
do {
|
||||
let _ = try SwiftSoup.parse(htmlDoc)
|
||||
} catch {
|
||||
XCTFail("Exception while parsing HTML")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -53,10 +53,12 @@ class CharacterReaderTest: XCTestCase {
|
|||
XCTAssertEqual("e", r.consume())
|
||||
XCTAssertTrue(r.isEmpty())
|
||||
|
||||
XCTAssertEqual(CharacterReader.EOF, r.consume())
|
||||
r.unconsume()
|
||||
XCTAssertTrue(r.isEmpty())
|
||||
XCTAssertEqual(CharacterReader.EOF, r.current())
|
||||
// Indexes beyond the end are not allowed in native indexing
|
||||
//
|
||||
// XCTAssertEqual(CharacterReader.EOF, r.consume())
|
||||
// r.unconsume()
|
||||
// XCTAssertTrue(r.isEmpty())
|
||||
// XCTAssertEqual(CharacterReader.EOF, r.current())
|
||||
}
|
||||
|
||||
func testMark() {
|
||||
|
@ -82,31 +84,31 @@ class CharacterReaderTest: XCTestCase {
|
|||
let input = "blah blah"
|
||||
let r = CharacterReader(input)
|
||||
|
||||
XCTAssertEqual(-1, r.nextIndexOf("x"))
|
||||
XCTAssertEqual(3, r.nextIndexOf("h"))
|
||||
XCTAssertEqual(nil, r.nextIndexOf("x"))
|
||||
XCTAssertEqual(input.index(input.startIndex, offsetBy: 3), r.nextIndexOf("h"))
|
||||
let pull = r.consumeTo("h")
|
||||
XCTAssertEqual("bla", pull)
|
||||
XCTAssertEqual("h", r.consume())
|
||||
XCTAssertEqual(2, r.nextIndexOf("l"))
|
||||
XCTAssertEqual(input.index(input.startIndex, offsetBy: 6), r.nextIndexOf("l"))
|
||||
XCTAssertEqual(" blah", r.consumeToEnd())
|
||||
XCTAssertEqual(-1, r.nextIndexOf("x"))
|
||||
XCTAssertEqual(nil, r.nextIndexOf("x"))
|
||||
}
|
||||
|
||||
func testNextIndexOfString() {
|
||||
let input = "One Two something Two Three Four"
|
||||
let r = CharacterReader(input)
|
||||
|
||||
XCTAssertEqual(-1, r.nextIndexOf("Foo"))
|
||||
XCTAssertEqual(4, r.nextIndexOf("Two"))
|
||||
XCTAssertEqual(nil, r.nextIndexOf("Foo"))
|
||||
XCTAssertEqual(input.index(input.startIndex, offsetBy: 4), r.nextIndexOf("Two"))
|
||||
XCTAssertEqual("One Two ", r.consumeTo("something"))
|
||||
XCTAssertEqual(10, r.nextIndexOf("Two"))
|
||||
XCTAssertEqual(input.index(input.startIndex, offsetBy: 18), r.nextIndexOf("Two"))
|
||||
XCTAssertEqual("something Two Three Four", r.consumeToEnd())
|
||||
XCTAssertEqual(-1, r.nextIndexOf("Two"))
|
||||
XCTAssertEqual(nil, r.nextIndexOf("Two"))
|
||||
}
|
||||
|
||||
func testNextIndexOfUnmatched() {
|
||||
let r = CharacterReader("<[[one]]")
|
||||
XCTAssertEqual(-1, r.nextIndexOf("]]>"))
|
||||
XCTAssertEqual(nil, r.nextIndexOf("]]>"))
|
||||
}
|
||||
|
||||
func testConsumeToChar() {
|
||||
|
|
|
@ -50,15 +50,15 @@ class EntitiesTest: XCTestCase {
|
|||
|
||||
func testXhtml() {
|
||||
//let text = "& > < "";
|
||||
XCTAssertEqual(38, Entities.EscapeMode.xhtml.codepointForName("amp"))
|
||||
XCTAssertEqual(62, Entities.EscapeMode.xhtml.codepointForName("gt"))
|
||||
XCTAssertEqual(60, Entities.EscapeMode.xhtml.codepointForName("lt"))
|
||||
XCTAssertEqual(34, Entities.EscapeMode.xhtml.codepointForName("quot"))
|
||||
XCTAssertEqual(UnicodeScalar(38), Entities.EscapeMode.xhtml.codepointForName("amp"))
|
||||
XCTAssertEqual(UnicodeScalar(62), Entities.EscapeMode.xhtml.codepointForName("gt"))
|
||||
XCTAssertEqual(UnicodeScalar(60), Entities.EscapeMode.xhtml.codepointForName("lt"))
|
||||
XCTAssertEqual(UnicodeScalar(34), Entities.EscapeMode.xhtml.codepointForName("quot"))
|
||||
|
||||
XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(38))
|
||||
XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(62))
|
||||
XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(60))
|
||||
XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(34))
|
||||
XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(38)!))
|
||||
XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(62)!))
|
||||
XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(60)!))
|
||||
XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(34)!))
|
||||
}
|
||||
|
||||
func testGetByName() {
|
||||
|
|
Loading…
Reference in New Issue