Compare commits

...

10 Commits

Author SHA1 Message Date
Nabil Chatbi b2e20da314 Merge branch 'master' into pr/110 2019-06-06 23:05:57 +02:00
Garth Snyder 33b0fccade Don't use Character.isNewline - Linux doesn't like 2019-03-04 13:36:40 -08:00
Garth Snyder efb95005d1 Avoid actually trimming strings unless they need it 2019-03-03 21:03:11 -08:00
Garth Snyder 72919d7292 Convert all tag sets to constant arrays, use native contains() 2019-03-03 21:03:11 -08:00
Garth Snyder c36d7daf42 Use binary search when handling HTML named entities 2019-03-03 21:03:06 -08:00
Garth Snyder ed4761b42d Convert StringBuilder to String array basis 2019-03-03 21:03:06 -08:00
Garth Snyder f9fd88731e Convert Attributes to use a simple array as backing store 2019-03-03 21:03:06 -08:00
Garth Snyder dbed44d543 Convert CharacterReader to native string indexing 2019-03-03 21:03:06 -08:00
Garth Snyder d987bb6ed8 Set up macOS test target to allow performance testing 2019-03-03 21:03:00 -08:00
Garth Snyder bdffeac923 Add benchmark tests for parser, with HTML corpus 2019-03-03 16:21:55 -08:00
28 changed files with 12463 additions and 732 deletions

View File

@ -119,7 +119,7 @@ open class Attribute {
} }
public func isBooleanAttribute() -> Bool { public func isBooleanAttribute() -> Bool {
return Attribute.booleanAttributes.contains(key) return Attribute.booleanAttributes.contains(key.lowercased())
} }
public func hashCode() -> Int { public func hashCode() -> Int {

View File

@ -25,9 +25,9 @@ open class Attributes: NSCopying {
public static var dataPrefix: String = "data-" public static var dataPrefix: String = "data-"
var attributes: OrderedDictionary<String, Attribute> = OrderedDictionary<String, Attribute>() // Stored by lowercased key, but key case is checked against the copy inside
// linked hash map to preserve insertion order. // the Attribute on retrieval.
// null be default as so many elements have no attributes -- saves a good chunk of memory var attributes: [Attribute] = []
public init() {} public init() {}
@ -37,9 +37,11 @@ open class Attributes: NSCopying {
@return the attribute value if set; or empty string if not set. @return the attribute value if set; or empty string if not set.
@see #hasKey(String) @see #hasKey(String)
*/ */
open func get(key: String) -> String { open func get(key: String) -> String {
let attr: Attribute? = attributes.get(key: key) if let attr = attributes.first(where: { $0.getKey() == key }) {
return attr != nil ? attr!.getValue() : "" return attr.getValue()
}
return ""
} }
/** /**
@ -49,11 +51,8 @@ open class Attributes: NSCopying {
*/ */
open func getIgnoreCase(key: String )throws -> String { open func getIgnoreCase(key: String )throws -> String {
try Validate.notEmpty(string: key) try Validate.notEmpty(string: key)
if let attr = attributes.first(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame }) {
for attrKey in (attributes.keySet()) { return attr.getValue()
if attrKey.equalsIgnoreCase(string: key) {
return attributes.get(key: attrKey)!.getValue()
}
} }
return "" return ""
} }
@ -82,11 +81,16 @@ open class Attributes: NSCopying {
} }
/** /**
Set a new attribute, or replace an existing one by key. Set a new attribute, or replace an existing one by (case-sensitive) key.
@param attribute attribute @param attribute attribute
*/ */
open func put(attribute: Attribute) { open func put(attribute: Attribute) {
attributes.put(value: attribute, forKey: attribute.getKey()) let key = attribute.getKey()
if let ix = attributes.firstIndex(where: { $0.getKey() == key }) {
attributes[ix] = attribute
} else {
attributes.append(attribute)
}
} }
/** /**
@ -95,7 +99,8 @@ open class Attributes: NSCopying {
*/ */
open func remove(key: String)throws { open func remove(key: String)throws {
try Validate.notEmpty(string: key) try Validate.notEmpty(string: key)
attributes.remove(key: key) if let ix = attributes.firstIndex(where: { $0.getKey() == key }) {
attributes.remove(at: ix) }
} }
/** /**
@ -104,10 +109,8 @@ open class Attributes: NSCopying {
*/ */
open func removeIgnoreCase(key: String ) throws { open func removeIgnoreCase(key: String ) throws {
try Validate.notEmpty(string: key) try Validate.notEmpty(string: key)
for attrKey in attributes.keySet() { if let ix = attributes.firstIndex(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame}) {
if (attrKey.equalsIgnoreCase(string: key)) { attributes.remove(at: ix)
attributes.remove(key: attrKey)
}
} }
} }
@ -117,7 +120,7 @@ open class Attributes: NSCopying {
@return true if key exists, false otherwise @return true if key exists, false otherwise
*/ */
open func hasKey(key: String) -> Bool { open func hasKey(key: String) -> Bool {
return attributes.containsKey(key: key) return attributes.contains(where: { $0.getKey() == key })
} }
/** /**
@ -126,12 +129,7 @@ open class Attributes: NSCopying {
@return true if key exists, false otherwise @return true if key exists, false otherwise
*/ */
open func hasKeyIgnoreCase(key: String) -> Bool { open func hasKeyIgnoreCase(key: String) -> Bool {
for attrKey in attributes.keySet() { return attributes.contains(where: { $0.getKey().caseInsensitiveCompare(key) == .orderedSame})
if (attrKey.equalsIgnoreCase(string: key)) {
return true
}
}
return false
} }
/** /**
@ -139,7 +137,7 @@ open class Attributes: NSCopying {
@return size @return size
*/ */
open func size() -> Int { open func size() -> Int {
return attributes.count//TODO: check retyrn right size return attributes.count
} }
/** /**
@ -147,35 +145,19 @@ open class Attributes: NSCopying {
@param incoming attributes to add to these attributes. @param incoming attributes to add to these attributes.
*/ */
open func addAll(incoming: Attributes?) { open func addAll(incoming: Attributes?) {
guard let incoming = incoming else { guard let incoming = incoming else { return }
return for attr in incoming.attributes {
put(attribute: attr)
} }
if (incoming.size() == 0) {
return
}
attributes.putAll(all: incoming.attributes)
} }
// open func iterator() -> IndexingIterator<Array<Attribute>> {
// if (attributes.isEmpty) {
// let args: [Attribute] = []
// return args.makeIterator()
// }
// return attributes.orderedValues.makeIterator()
// }
/** /**
Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
to keys will not be recognised in the containing set. to keys will not be recognised in the containing set.
@return an view of the attributes as a List. @return an view of the attributes as a List.
*/ */
open func asList() -> Array<Attribute> { open func asList() -> [Attribute] {
var list: Array<Attribute> = Array(/*attributes.size()*/) return attributes
for entry in attributes.orderedValues {
list.append(entry)
}
return list
} }
/** /**
@ -183,17 +165,11 @@ open class Attributes: NSCopying {
* starting with {@code data-}. * starting with {@code data-}.
* @return map of custom data attributes. * @return map of custom data attributes.
*/ */
//Map<String, String> open func dataset() -> [String: String] {
open func dataset() -> Dictionary<String, String> { let prefixLength = Attributes.dataPrefix.count
var dataset = Dictionary<String, String>() let pairs = attributes.filter { $0.isDataAttribute() }
for attribute in attributes { .map { ($0.getKey().substring(prefixLength), $0.getValue()) }
let attr = attribute.1 return Dictionary(uniqueKeysWithValues: pairs)
if(attr.isDataAttribute()) {
let key = attr.getKey().substring(Attributes.dataPrefix.count)
dataset[key] = attribute.1.getValue()
}
}
return dataset
} }
/** /**
@ -208,9 +184,9 @@ open class Attributes: NSCopying {
} }
public func html(accum: StringBuilder, out: OutputSettings ) throws { public func html(accum: StringBuilder, out: OutputSettings ) throws {
for attribute in attributes.orderedValues { for attr in attributes {
accum.append(" ") accum.append(" ")
attribute.html(accum: accum, out: out) attr.html(accum: accum, out: out)
} }
} }
@ -226,21 +202,19 @@ open class Attributes: NSCopying {
open func equals(o: AnyObject?) -> Bool { open func equals(o: AnyObject?) -> Bool {
if(o == nil) {return false} if(o == nil) {return false}
if (self === o.self) {return true} if (self === o.self) {return true}
guard let that: Attributes = o as? Attributes else {return false} guard let that = o as? Attributes else {return false}
return (attributes == that.attributes) return (attributes == that.attributes)
} }
/** open func lowercaseAllKeys() {
* Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. for ix in attributes.indices {
* @return calculated hashcode attributes[ix].key = attributes[ix].key.lowercased()
*/ }
open func hashCode() -> Int {
return attributes.hashCode()
} }
public func copy(with zone: NSZone? = nil) -> Any { public func copy(with zone: NSZone? = nil) -> Any {
let clone = Attributes() let clone = Attributes()
clone.attributes = attributes.clone() clone.attributes = attributes
return clone return clone
} }
@ -255,10 +229,7 @@ open class Attributes: NSCopying {
} }
extension Attributes: Sequence { extension Attributes: Sequence {
public func makeIterator() -> AnyIterator<Attribute> { public func makeIterator() -> AnyIterator<Attribute> {
var list = attributes.orderedValues return AnyIterator(attributes.makeIterator())
return AnyIterator { }
return list.count > 0 ? list.removeFirst() : nil
}
}
} }

View File

@ -0,0 +1,95 @@
//
// BinarySearch.swift
// SwiftSoup-iOS
//
// Created by Garth Snyder on 2/28/19.
// Copyright © 2019 Nabil Chatbi. All rights reserved.
//
// Adapted from https://stackoverflow.com/questions/31904396/swift-binary-search-for-standard-array
//
import Foundation
extension Collection {
/// Generalized binary search algorithm for ordered Collections
///
/// Behavior is undefined if the collection is not properly sorted.
///
/// This is only O(logN) for RandomAccessCollections; Collections in
/// general may implement offsetting of indexes as an O(K) operation. (E.g.,
/// Strings are like this).
///
/// - Note: If you are using this for searching only (not insertion), you
/// must always test the element at the returned index to ensure that
/// it's a genuine match. If the element is not present in the array,
/// you will still get a valid index back that represents the location
/// where it should be inserted. Also check to be sure the returned
/// index isn't off the end of the collection.
///
/// - Parameter predicate: Reports the ordering of a given Element relative
/// to the desired Element. Typically, this is <.
///
/// - Returns: Index N such that the predicate is true for all elements up to
/// but not including N, and is false for all elements N and beyond
func binarySearch(predicate: (Element) -> Bool) -> Index {
var low = startIndex
var high = endIndex
while low != high {
let mid = index(low, offsetBy: distance(from: low, to: high)/2)
if predicate(self[mid]) {
low = index(after: mid)
} else {
high = mid
}
}
return low
}
/// Binary search lookup for ordered Collections using a KeyPath
/// relative to Element.
///
/// Behavior is undefined if the collection is not properly sorted.
///
/// This is only O(logN) for RandomAccessCollections; Collections in
/// general may implement offsetting of indexes as an O(K) operation. (E.g.,
/// Strings are like this).
///
/// - Note: If you are using this for searching only (not insertion), you
/// must always test the element at the returned index to ensure that
/// it's a genuine match. If the element is not present in the array,
/// you will still get a valid index back that represents the location
/// where it should be inserted. Also check to be sure the returned
/// index isn't off the end of the collection.
///
/// - Parameter keyPath: KeyPath that extracts the Element value on which
/// the Collection is presorted. Must be Comparable and Equatable.
/// ordering is presumed to be <, however that is defined for the type.
///
/// - Returns: The index of a matching element, or nil if not found. If
/// the return value is non-nil, it is always a valid index.
func indexOfElement<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> Index? where T: Comparable & Equatable {
let ix = binarySearch { $0[keyPath: keyPath] < value }
guard ix < endIndex else { return nil }
guard self[ix][keyPath: keyPath] == value else { return nil }
return ix
}
func element<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> Element? where T: Comparable & Equatable {
if let ix = indexOfElement(withValue: value, atKeyPath: keyPath) {
return self[ix]
}
return nil
}
func elements<T>(withValue value: T, atKeyPath keyPath: KeyPath<Element, T>) -> [Element] where T: Comparable & Equatable {
guard let start = indexOfElement(withValue: value, atKeyPath: keyPath) else { return [] }
var end = index(after: start)
while end < endIndex && self[end][keyPath: keyPath] == value {
end = index(after: end)
}
return Array(self[start..<end])
}
}

View File

@ -14,43 +14,47 @@ import Foundation
public final class CharacterReader { public final class CharacterReader {
private static let empty = "" private static let empty = ""
public static let EOF: UnicodeScalar = "\u{FFFF}"//65535 public static let EOF: UnicodeScalar = "\u{FFFF}"//65535
private let input: [UnicodeScalar] private let input: String.UnicodeScalarView
private let length: Int private var pos: String.UnicodeScalarView.Index
private var pos: Int = 0 private var mark: String.UnicodeScalarView.Index
private var mark: Int = 0
//private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage //private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage
public init(_ input: String) { public init(_ input: String) {
self.input = Array(input.unicodeScalars) self.input = input.unicodeScalars
self.length = self.input.count self.pos = input.startIndex
//stringCache = Array(repeating:nil, count:512) self.mark = input.startIndex
} }
public func getPos() -> Int { public func getPos() -> Int {
return self.pos return input.distance(from: input.startIndex, to: pos)
} }
public func isEmpty() -> Bool { public func isEmpty() -> Bool {
return pos >= length return pos >= input.endIndex
} }
public func current() -> UnicodeScalar { public func current() -> UnicodeScalar {
return (pos >= length) ? CharacterReader.EOF : input[pos] return (pos >= input.endIndex) ? CharacterReader.EOF : input[pos]
} }
@discardableResult @discardableResult
public func consume() -> UnicodeScalar { public func consume() -> UnicodeScalar {
let val = (pos >= length) ? CharacterReader.EOF : input[pos] guard pos < input.endIndex else {
pos += 1 return CharacterReader.EOF
}
let val = input[pos]
pos = input.index(after: pos)
return val return val
} }
public func unconsume() { public func unconsume() {
pos -= 1 guard pos > input.startIndex else { return }
pos = input.index(before: pos)
} }
public func advance() { public func advance() {
pos += 1 guard pos < input.endIndex else { return }
pos = input.index(after: pos)
} }
public func markPos() { public func markPos() {
@ -62,221 +66,169 @@ public final class CharacterReader {
} }
public func consumeAsString() -> String { public func consumeAsString() -> String {
let p = pos guard pos < input.endIndex else { return "" }
pos+=1 let str = String(input[pos])
return String(input[p]) pos = input.index(after: pos)
//return String(input, pos+=1, 1) return str
} }
/** /**
* Returns the number of characters between the current position and the next instance of the input char * Locate the next occurrence of a Unicode scalar
* @param c scan target
* @return offset between current position and next instance of target. -1 if not found.
*/
public func nextIndexOf(_ c: UnicodeScalar) -> Int {
// doesn't handle scanning for surrogates
for i in pos..<length {
if (c == input[i]) {
return i - pos
}
}
return -1
}
/**
* Returns the number of characters between the current position and the next instance of the input sequence
* *
* @param seq scan target * - Parameter c: scan target
* @return offset between current position and next instance of target. -1 if not found. * - Returns: offset between current position and next instance of target. -1 if not found.
*/ */
public func nextIndexOf(_ seq: String) -> Int { public func nextIndexOf(_ c: UnicodeScalar) -> String.UnicodeScalarView.Index? {
// doesn't handle scanning for surrogates // doesn't handle scanning for surrogates
if(seq.isEmpty) {return -1} return input[pos...].firstIndex(of: c)
let startChar: UnicodeScalar = seq.unicodeScalar(0)
for var offset in pos..<length {
// scan to first instance of startchar:
if (startChar != input[offset]) {
offset+=1
while(offset < length && startChar != input[offset]) { offset+=1 }
}
var i = offset + 1
let last = i + seq.unicodeScalars.count-1
if (offset < length && last <= length) {
var j = 1
while i < last && seq.unicodeScalar(j) == input[i] {
j+=1
i+=1
}
// found full sequence
if (i == last) {
return offset - pos
}
}
}
return -1
} }
/**
* Locate the next occurence of a target string
*
* - Parameter seq: scan target
* - Returns: index of next instance of target. nil if not found.
*/
public func nextIndexOf(_ seq: String) -> String.UnicodeScalarView.Index? {
// doesn't handle scanning for surrogates
var start = pos
let targetScalars = seq.unicodeScalars
guard let firstChar = targetScalars.first else { return pos } // search for "" -> current place
MATCH: while true {
// Match on first scalar
guard let firstCharIx = input[start...].firstIndex(of: firstChar) else { return nil }
var current = firstCharIx
// Then manually match subsequent scalars
for scalar in targetScalars.dropFirst() {
current = input.index(after: current)
guard current < input.endIndex else { return nil }
if input[current] != scalar {
start = input.index(after: firstCharIx)
continue MATCH
}
}
// full match; current is at position of last matching character
return firstCharIx
}
}
public func consumeTo(_ c: UnicodeScalar) -> String { public func consumeTo(_ c: UnicodeScalar) -> String {
let offset = nextIndexOf(c) guard let targetIx = nextIndexOf(c) else {
if (offset != -1) {
let consumed = cacheString(pos, offset)
pos += offset
return consumed
} else {
return consumeToEnd() return consumeToEnd()
} }
let consumed = cacheString(pos, targetIx)
pos = targetIx
return consumed
} }
public func consumeTo(_ seq: String) -> String { public func consumeTo(_ seq: String) -> String {
let offset = nextIndexOf(seq) guard let targetIx = nextIndexOf(seq) else {
if (offset != -1) {
let consumed = cacheString(pos, offset)
pos += offset
return consumed
} else {
return consumeToEnd() return consumeToEnd()
} }
let consumed = cacheString(pos, targetIx)
pos = targetIx
return consumed
} }
public func consumeToAny(_ chars: UnicodeScalar...) -> String { public func consumeToAny(_ chars: UnicodeScalar...) -> String {
return consumeToAny(chars) return consumeToAny(chars)
} }
public func consumeToAny(_ chars: [UnicodeScalar]) -> String { public func consumeToAny(_ chars: [UnicodeScalar]) -> String {
let start: Int = pos let start = pos
let remaining: Int = length while pos < input.endIndex {
let val = input if chars.contains(input[pos]) {
OUTER: while (pos < remaining) { break
if chars.contains(val[pos]) { }
break OUTER pos = input.index(after: pos)
}
// for c in chars {
// if (val[pos] == c){
// break OUTER
// }
// }
pos += 1
} }
return cacheString(start, pos)
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
} }
public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String { public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String {
return consumeToAnySorted(chars) return consumeToAny(chars)
} }
public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String { public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String {
let start = pos return consumeToAny(chars)
let remaining = length
let val = input
while (pos < remaining) {
if chars.contains(val[pos]) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
} }
static let dataTerminators: [UnicodeScalar] = [.Ampersand, .LessThan, TokeniserStateVars.nullScalr]
// read to &, <, or null
public func consumeData() -> String { public func consumeData() -> String {
// &, <, null return consumeToAny(CharacterReader.dataTerminators)
let start = pos
let remaining = length
let val = input
while (pos < remaining) {
let c: UnicodeScalar = val[pos]
if (c == UnicodeScalar.Ampersand || c == UnicodeScalar.LessThan || c == TokeniserStateVars.nullScalr) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
} }
static let tagNameTerminators: [UnicodeScalar] = [.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr]
// read to '\t', '\n', '\r', '\f', ' ', '/', '>', or nullChar
public func consumeTagName() -> String { public func consumeTagName() -> String {
// '\t', '\n', '\r', '\f', ' ', '/', '>', nullChar return consumeToAny(CharacterReader.tagNameTerminators)
let start = pos
let remaining = length
let val = input
while (pos < remaining) {
let c: UnicodeScalar = val[pos]
if (c == UnicodeScalar.BackslashT || c == UnicodeScalar.BackslashN || c == UnicodeScalar.BackslashR || c == UnicodeScalar.BackslashF || c == UnicodeScalar.Space || c == UnicodeScalar.Slash || c == UnicodeScalar.GreaterThan || c == TokeniserStateVars.nullScalr) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
} }
public func consumeToEnd() -> String { public func consumeToEnd() -> String {
let data = cacheString(pos, length-pos) let consumed = cacheString(pos, input.endIndex)
pos = length pos = input.endIndex
return data return consumed
} }
public func consumeLetterSequence() -> String { public func consumeLetterSequence() -> String {
let start = pos let start = pos
while (pos < length) { while pos < input.endIndex {
let c: UnicodeScalar = input[pos] let c = input[pos]
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) { if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
pos += 1 pos = input.index(after: pos)
} else { } else {
break break
} }
} }
return cacheString(start, pos - start) return cacheString(start, pos)
} }
public func consumeLetterThenDigitSequence() -> String { public func consumeLetterThenDigitSequence() -> String {
let start = pos let start = pos
while (pos < length) { while pos < input.endIndex {
let c = input[pos] let c = input[pos]
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) { if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
pos += 1 pos = input.index(after: pos)
} else { } else {
break break
} }
} }
while (!isEmpty()) { while pos < input.endIndex {
let c = input[pos] let c = input[pos]
if (c >= "0" && c <= "9") { if (c >= "0" && c <= "9") {
pos += 1 pos = input.index(after: pos)
} else { } else {
break break
} }
} }
return cacheString(start, pos)
return cacheString(start, pos - start)
} }
public func consumeHexSequence() -> String { public func consumeHexSequence() -> String {
let start = pos let start = pos
while (pos < length) { while pos < input.endIndex {
let c = input[pos] let c = input[pos]
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) { if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) {
pos+=1 pos = input.index(after: pos)
} else { } else {
break break
} }
} }
return cacheString(start, pos - start) return cacheString(start, pos)
} }
public func consumeDigitSequence() -> String { public func consumeDigitSequence() -> String {
let start = pos let start = pos
while (pos < length) { while pos < input.endIndex {
let c = input[pos] let c = input[pos]
if (c >= "0" && c <= "9") { if (c >= "0" && c <= "9") {
pos+=1 pos = input.index(after: pos)
} else { } else {
break break
} }
} }
return cacheString(start, pos - start) return cacheString(start, pos)
} }
public func matches(_ c: UnicodeScalar) -> Bool { public func matches(_ c: UnicodeScalar) -> Bool {
@ -284,180 +236,85 @@ public final class CharacterReader {
} }
public func matches(_ seq: String) -> Bool { public func matches(_ seq: String, ignoreCase: Bool = false, consume: Bool = false) -> Bool {
let scanLength = seq.unicodeScalars.count var current = pos
if (scanLength > length - pos) { let scalars = seq.unicodeScalars
return false for scalar in scalars {
} guard current < input.endIndex else { return false }
if ignoreCase {
for offset in 0..<scanLength { guard input[current].uppercase == scalar.uppercase else { return false }
if (seq.unicodeScalar(offset) != input[pos+offset]) { } else {
return false guard input[current] == scalar else { return false }
} }
current = input.index(after: current)
}
if consume {
pos = current
} }
return true return true
} }
public func matchesIgnoreCase(_ seq: String ) -> Bool { public func matchesIgnoreCase(_ seq: String ) -> Bool {
return matches(seq, ignoreCase: true)
let scanLength = seq.unicodeScalars.count
if(scanLength == 0) {
return false
}
if (scanLength > length - pos) {
return false
}
for offset in 0..<scanLength {
let upScan: UnicodeScalar = seq.unicodeScalar(offset).uppercase
let upTarget: UnicodeScalar = input[pos+offset].uppercase
if (upScan != upTarget) {
return false
}
}
return true
} }
public func matchesAny(_ seq: UnicodeScalar...) -> Bool { public func matchesAny(_ seq: UnicodeScalar...) -> Bool {
if (isEmpty()) { return matchesAny(seq)
return false }
}
public func matchesAny(_ seq: [UnicodeScalar]) -> Bool {
let c: UnicodeScalar = input[pos] guard pos < input.endIndex else { return false }
for seek in seq { return seq.contains(input[pos])
if (seek == c) {
return true
}
}
return false
} }
public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool { public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool {
return !isEmpty() && seq.contains(input[pos]) return matchesAny(seq)
} }
public func matchesLetter() -> Bool { public func matchesLetter() -> Bool {
if (isEmpty()) { guard pos < input.endIndex else { return false }
return false let c = input[pos]
}
let c = input[pos]
return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters) return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)
} }
public func matchesDigit() -> Bool { public func matchesDigit() -> Bool {
if (isEmpty()) { guard pos < input.endIndex else { return false }
return false let c = input[pos]
} return c >= "0" && c <= "9"
let c = input[pos]
return (c >= "0" && c <= "9")
} }
@discardableResult @discardableResult
public func matchConsume(_ seq: String) -> Bool { public func matchConsume(_ seq: String) -> Bool {
if (matches(seq)) { return matches(seq, consume: true)
pos += seq.unicodeScalars.count
return true
} else {
return false
}
} }
@discardableResult @discardableResult
public func matchConsumeIgnoreCase(_ seq: String) -> Bool { public func matchConsumeIgnoreCase(_ seq: String) -> Bool {
if (matchesIgnoreCase(seq)) { return matches(seq, ignoreCase: true, consume: true)
pos += seq.unicodeScalars.count
return true
} else {
return false
}
} }
public func containsIgnoreCase(_ seq: String ) -> Bool { public func containsIgnoreCase(_ seq: String ) -> Bool {
// used to check presence of </title>, </style>. only finds consistent case. // used to check presence of </title>, </style>. only finds consistent case.
let loScan = seq.lowercased(with: Locale(identifier: "en")) let loScan = seq.lowercased(with: Locale(identifier: "en"))
let hiScan = seq.uppercased(with: Locale(identifier: "eng")) let hiScan = seq.uppercased(with: Locale(identifier: "eng"))
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1) return nextIndexOf(loScan) != nil || nextIndexOf(hiScan) != nil
} }
public func toString() -> String { public func toString() -> String {
return String(input[pos..<length]) return String(input[pos...])
//return String.unicodescalars(Array(input[pos..<length]))
//return input.string(pos, length - pos)
} }
/** /**
* Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks. * Originally intended as a caching mechanism for strings, but caching doesn't
* <p /> * seem to improve performance. Now just a stub.
* Simplistic, and on hash collisions just falls back to creating a new string, vs a full HashMap with Entry list.
* That saves both having to create objects as hash keys, and running through the entry list, at the expense of
* some more duplicates.
*/ */
private func cacheString(_ start: Int, _ count: Int) -> String { private func cacheString(_ start: String.UnicodeScalarView.Index, _ end: String.UnicodeScalarView.Index) -> String {
return String(input[start..<start+count]) return String(input[start..<end])
// Too Slow
// var cache: [String?] = stringCache
//
// // limit (no cache):
// if (count > CharacterReader.maxCacheLen) {
// return String(val[start..<start+count].flatMap { Character($0) })
// }
//
// // calculate hash:
// var hash: Int = 0
// var offset = start
// for _ in 0..<count {
// let ch = val[offset].value
// hash = Int.addWithOverflow(Int.multiplyWithOverflow(31, hash).0, Int(ch)).0
// offset+=1
// }
//
// // get from cache
// hash = abs(hash)
// let i = hash % cache.count
// let index: Int = abs(i) //Int(hash & Int(cache.count) - 1)
// var cached = cache[index]
//
// if (cached == nil) { // miss, add
// cached = String(val[start..<start+count].flatMap { Character($0) })
// //cached = val.string(start, count)
// cache[Int(index)] = cached
// } else { // hashcode hit, check equality
// if (rangeEquals(start, count, cached!)) { // hit
// return cached!
// } else { // hashcode conflict
// cached = String(val[start..<start+count].flatMap { Character($0) })
// //cached = val.string(start, count)
// cache[index] = cached // update the cache, as recently used strings are more likely to show up again
// }
// }
// return cached!
} }
// /**
// * Check if the value of the provided range equals the string.
// */
// public func rangeEquals(_ start: Int, _ count: Int, _ cached: String) -> Bool {
// if (count == cached.unicodeScalars.count) {
// var count = count
// let one = input
// var i = start
// var j = 0
// while (count != 0) {
// count -= 1
// if (one[i] != cached.unicodeScalar(j) ) {
// return false
// }
// j += 1
// i += 1
// }
// return true
// }
// return false
// }
} }
extension CharacterReader: CustomDebugStringConvertible { extension CharacterReader: CustomDebugStringConvertible {
public var debugDescription: String { public var debugDescription: String {
return self.toString() return toString()
} }
} }

View File

@ -925,9 +925,9 @@ open class Element: Node {
if let textNode = (node as? TextNode) { if let textNode = (node as? TextNode) {
Element.appendNormalisedText(accum, textNode) Element.appendNormalisedText(accum, textNode)
} else if let element = (node as? Element) { } else if let element = (node as? Element) {
if (accum.length > 0 && if !accum.isEmpty &&
(element.isBlock() || element._tag.getName() == "br") && (element.isBlock() || element._tag.getName() == "br") &&
!TextNode.lastCharIsWhitespace(accum)) { !TextNode.lastCharIsWhitespace(accum) {
accum.append(" ") accum.append(" ")
} }
} }
@ -1201,7 +1201,7 @@ open class Element: Node {
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws { override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
if (out.prettyPrint() && (_tag.formatAsBlock() || (parent() != nil && parent()!.tag().formatAsBlock()) || out.outline())) { if (out.prettyPrint() && (_tag.formatAsBlock() || (parent() != nil && parent()!.tag().formatAsBlock()) || out.outline())) {
if (accum.length > 0) { if !accum.isEmpty {
indent(accum, depth, out) indent(accum, depth, out)
} }
} }

View File

@ -189,7 +189,7 @@ open class Elements: NSCopying {
open func text()throws->String { open func text()throws->String {
let sb: StringBuilder = StringBuilder() let sb: StringBuilder = StringBuilder()
for element: Element in this { for element: Element in this {
if (sb.length != 0) { if !sb.isEmpty {
sb.append(" ") sb.append(" ")
} }
sb.append(try element.text()) sb.append(try element.text())
@ -216,7 +216,7 @@ open class Elements: NSCopying {
open func html()throws->String { open func html()throws->String {
let sb: StringBuilder = StringBuilder() let sb: StringBuilder = StringBuilder()
for element: Element in this { for element: Element in this {
if (sb.length != 0) { if !sb.isEmpty {
sb.append("\n") sb.append("\n")
} }
sb.append(try element.html()) sb.append(try element.html())
@ -233,7 +233,7 @@ open class Elements: NSCopying {
open func outerHtml()throws->String { open func outerHtml()throws->String {
let sb: StringBuilder = StringBuilder() let sb: StringBuilder = StringBuilder()
for element in this { for element in this {
if (sb.length != 0) { if !sb.isEmpty {
sb.append("\n") sb.append("\n")
} }
sb.append(try element.outerHtml()) sb.append(try element.outerHtml())

View File

@ -18,7 +18,7 @@ public class Entities {
private static let emptyName = "" private static let emptyName = ""
private static let codepointRadix: Int = 36 private static let codepointRadix: Int = 36
public struct EscapeMode: Equatable { public class EscapeMode: Equatable {
/** Restricted entities suitable for XHTML output: lt, gt, amp, and quot only. */ /** Restricted entities suitable for XHTML output: lt, gt, amp, and quot only. */
public static let xhtml: EscapeMode = EscapeMode(string: Entities.xhtml, size: 4, id: 0) public static let xhtml: EscapeMode = EscapeMode(string: Entities.xhtml, size: 4, id: 0)
@ -29,13 +29,19 @@ public class Entities {
fileprivate let value: Int fileprivate let value: Int
// table of named references to their codepoints. sorted so we can binary search. built by BuildEntities. struct NamedCodepoint {
fileprivate var nameKeys: [String] let scalar: UnicodeScalar
fileprivate var codeVals: [Int] // limitation is the few references with multiple characters; those go into multipoints. let name: String
}
// Array of named references, sorted by name for binary search. built by BuildEntities.
// The few entities that map to a multi-codepoint sequence go into multipoints.
fileprivate var entitiesByName: [NamedCodepoint] = []
// table of codepoints to named entities. // Array of entities in first-codepoint order. We don't currently support
fileprivate var codeKeys: [Int] // we don' support multicodepoints to single named value currently // multicodepoints to single named value currently. Lazy because this index
fileprivate var nameVals: [String] // is used only when generating HTML text.
fileprivate lazy var entitiesByCodepoint = entitiesByName.sorted() { a, b in a.scalar < b.scalar }
public static func == (left: EscapeMode, right: EscapeMode) -> Bool { public static func == (left: EscapeMode, right: EscapeMode) -> Bool {
return left.value == right.value return left.value == right.value
@ -46,23 +52,14 @@ public class Entities {
} }
private static let codeDelims: [UnicodeScalar] = [",", ";"] private static let codeDelims: [UnicodeScalar] = [",", ";"]
init(string: String, size: Int, id: Int) { init(string: String, size: Int, id: Int) {
nameKeys = [String](repeating: "", count: size)
codeVals = [Int](repeating: 0, count: size) value = id
codeKeys = [Int](repeating: 0, count: size)
nameVals = [String](repeating: "", count: size)
value = id
//Load()
var i = 0
let reader: CharacterReader = CharacterReader(string) let reader: CharacterReader = CharacterReader(string)
while (!reader.isEmpty()) { entitiesByName.reserveCapacity(size)
// NotNestedLessLess=10913,824;1887 while !reader.isEmpty() {
let name: String = reader.consumeTo("=") let name: String = reader.consumeTo("=")
reader.advance() reader.advance()
let cp1: Int = Int(reader.consumeToAny(EscapeMode.codeDelims), radix: codepointRadix) ?? 0 let cp1: Int = Int(reader.consumeToAny(EscapeMode.codeDelims), radix: codepointRadix) ?? 0
@ -75,100 +72,46 @@ public class Entities {
} else { } else {
cp2 = empty cp2 = empty
} }
let index: Int = Int(reader.consumeTo("\n"), radix: codepointRadix) ?? 0 let _ = Int(reader.consumeTo("\n"), radix: codepointRadix) ?? 0
reader.advance() reader.advance()
nameKeys[i] = name entitiesByName.append(NamedCodepoint(scalar: UnicodeScalar(cp1)!, name: name))
codeVals[i] = cp1
codeKeys[index] = cp1
nameVals[index] = name
if (cp2 != empty) { if (cp2 != empty) {
var s = String() multipoints[name] = [UnicodeScalar(cp1)!, UnicodeScalar(cp2)!]
s.append(Character(UnicodeScalar(cp1)!))
s.append(Character(UnicodeScalar(cp2)!))
multipoints[name] = s
}
i = i + 1
}
}
// init(string: String, size: Int, id: Int) {
// nameKeys = [String](repeating: "", count: size)
// codeVals = [Int](repeating: 0, count: size)
// codeKeys = [Int](repeating: 0, count: size)
// nameVals = [String](repeating: "", count: size)
// value = id
//
// let components = string.components(separatedBy: "\n")
//
// var i = 0
// for entry in components {
// let match = Entities.entityPattern.matcher(in: entry)
// if (match.find()) {
// let name = match.group(1)!
// let cp1 = Int(match.group(2)!, radix: codepointRadix)
// //let cp2 = Int(Int.parseInt(s: match.group(3), radix: codepointRadix))
// let cp2 = match.group(3) != nil ? Int(match.group(3)!, radix: codepointRadix) : empty
// let index = Int(match.group(4)!, radix: codepointRadix)
//
// nameKeys[i] = name
// codeVals[i] = cp1!
// codeKeys[index!] = cp1!
// nameVals[index!] = name
//
// if (cp2 != empty) {
// var s = String()
// s.append(Character(UnicodeScalar(cp1!)!))
// s.append(Character(UnicodeScalar(cp2!)!))
// multipoints[name] = s
// }
// i += 1
// }
// }
// }
public func codepointForName(_ name: String) -> Int {
// for s in nameKeys {
// if s == name {
// return codeVals[nameKeys.index(of: s)!]
// }
// }
guard let index = nameKeys.firstIndex(of: name) else {
return empty
}
return codeVals[index]
}
public func nameForCodepoint(_ codepoint: Int ) -> String {
//let ss = codeKeys.index(of: codepoint)
var index = -1
for s in codeKeys {
if s == codepoint {
index = codeKeys.firstIndex(of: codepoint)!
} }
} }
// Entities should start in name order, but better safe than sorry...
entitiesByName.sort() { a, b in a.name < b.name }
}
if (index >= 0) { // Only returns the first of potentially multiple codepoints
// the results are ordered so lower case versions of same codepoint come after uppercase, and we prefer to emit lower public func codepointForName(_ name: String) -> UnicodeScalar? {
// (and binary search for same item with multi results is undefined let ix = entitiesByName.binarySearch { $0.name < name }
return (index < nameVals.count-1 && codeKeys[index+1] == codepoint) ? guard ix < entitiesByName.endIndex else { return nil }
nameVals[index+1] : nameVals[index] let entity = entitiesByName[ix]
guard entity.name == name else { return nil }
return entity.scalar
}
// Search by first codepoint only
public func nameForCodepoint(_ codepoint: UnicodeScalar ) -> String? {
var ix = entitiesByCodepoint.binarySearch { $0.scalar < codepoint }
var matches: [String] = []
while ix < entitiesByCodepoint.endIndex && entitiesByCodepoint[ix].scalar == codepoint {
matches.append(entitiesByCodepoint[ix].name)
ix = entitiesByCodepoint.index(after: ix)
} }
return emptyName return matches.isEmpty ? nil : matches.sorted().last!
} }
private func size() -> Int { private func size() -> Int {
return nameKeys.count return entitiesByName.count
} }
} }
private static var multipoints: Dictionary<String, String> = Dictionary<String, String>() // name -> multiple character references private static var multipoints: [String: [UnicodeScalar]] = [:] // name -> multiple character references
private init() {
}
/** /**
* Check if the input is a known named entity * Check if the input is a known named entity
@ -176,7 +119,7 @@ public class Entities {
* @return true if a known named entity * @return true if a known named entity
*/ */
public static func isNamedEntity(_ name: String ) -> Bool { public static func isNamedEntity(_ name: String ) -> Bool {
return (EscapeMode.extended.codepointForName(name) != empty) return (EscapeMode.extended.codepointForName(name) != nil)
} }
/** /**
@ -186,17 +129,7 @@ public class Entities {
* @see #isNamedEntity(String) * @see #isNamedEntity(String)
*/ */
public static func isBaseNamedEntity(_ name: String) -> Bool { public static func isBaseNamedEntity(_ name: String) -> Bool {
return EscapeMode.base.codepointForName(name) != empty return EscapeMode.base.codepointForName(name) != nil
}
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '{@literal <}' or '{@literal &}')
* @deprecated does not support characters outside the BMP or multiple character names
*/
public static func getCharacterByName(name: String) -> Character {
return Character.convertFromIntegerLiteral(value: EscapeMode.extended.codepointForName(name))
} }
/** /**
@ -204,30 +137,20 @@ public class Entities {
* @param name entity (e.g. "lt" or "amp") * @param name entity (e.g. "lt" or "amp")
* @return the string value of the character(s) represented by this entity, or "" if not defined * @return the string value of the character(s) represented by this entity, or "" if not defined
*/ */
public static func getByName(name: String) -> String { public static func getByName(name: String) -> String? {
let val = multipoints[name] if let scalars = codepointsForName(name) {
if (val != nil) {return val!} return String(String.UnicodeScalarView(scalars))
let codepoint = EscapeMode.extended.codepointForName(name)
if (codepoint != empty) {
return String(Character(UnicodeScalar(codepoint)!))
} }
return emptyName return nil
} }
public static func codepointsForName(_ name: String, codepoints: inout [UnicodeScalar]) -> Int { public static func codepointsForName(_ name: String) -> [UnicodeScalar]? {
if let scalars = multipoints[name] {
if let val: String = multipoints[name] { return scalars
codepoints[0] = val.unicodeScalar(0) } else if let scalar = EscapeMode.extended.codepointForName(name) {
codepoints[1] = val.unicodeScalar(1) return [scalar]
return 2
} }
return nil
let codepoint = EscapeMode.extended.codepointForName(name)
if (codepoint != empty) {
codepoints[0] = UnicodeScalar(codepoint)!
return 1
}
return 0
} }
public static func escape(_ string: String, _ encode: String.Encoding = .utf8 ) -> String { public static func escape(_ string: String, _ encode: String.Encoding = .utf8 ) -> String {
@ -326,9 +249,9 @@ public class Entities {
} }
private static func appendEncoded(accum: StringBuilder, escapeMode: EscapeMode, codePoint: UnicodeScalar) { private static func appendEncoded(accum: StringBuilder, escapeMode: EscapeMode, codePoint: UnicodeScalar) {
let name = escapeMode.nameForCodepoint(Int(codePoint.value)) if let name = escapeMode.nameForCodepoint(codePoint) {
if (name != emptyName) // ok for identity check // ok for identity check
{accum.append(UnicodeScalar.Ampersand).append(name).append(";") accum.append(UnicodeScalar.Ampersand).append(name).append(";")
} else { } else {
accum.append("&#x").append(String.toHexString(n: Int(codePoint.value)) ).append(";") accum.append("&#x").append(String.toHexString(n: Int(codePoint.value)) ).append(";")
} }

View File

@ -12,21 +12,27 @@ import Foundation
* HTML Tree Builder; creates a DOM from Tokens. * HTML Tree Builder; creates a DOM from Tokens.
*/ */
class HtmlTreeBuilder: TreeBuilder { class HtmlTreeBuilder: TreeBuilder {
// tag searches
public static let TagsSearchInScope: [String] = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"] private enum TagSets {
private static let TagSearchList: [String] = ["ol", "ul"] // tag searches
private static let TagSearchButton: [String] = ["button"] static let inScope = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
private static let TagSearchTableScope: [String] = ["html", "table"] static let list = ["ol", "ul"]
private static let TagSearchSelectScope: [String] = ["optgroup", "option"] static let button = ["button"]
private static let TagSearchEndTags: [String] = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"] static let tableScope = ["html", "table"]
private static let TagSearchSpecial: [String] = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", static let selectScope = ["optgroup", "option"]
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd", static let endTags = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form", static let titleTextarea = ["title", "textarea"]
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", static let frames = ["iframe", "noembed", "noframes", "style", "xmp"]
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script", static let special: Set<String> = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"title", "tr", "ul", "wbr", "xmp"] "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"]
}
private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state
private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state
@ -71,19 +77,19 @@ class HtmlTreeBuilder: TreeBuilder {
} }
// initialise the tokeniser state: // initialise the tokeniser state:
let contextTag: String = context.tagName() switch context.tagName() {
if (StringUtil.inString(contextTag, haystack: "title", "textarea")) { case TagSets.titleTextarea:
tokeniser.transition(TokeniserState.Rcdata) tokeniser.transition(TokeniserState.Rcdata)
} else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")) { case TagSets.frames:
tokeniser.transition(TokeniserState.Rawtext) tokeniser.transition(TokeniserState.Rawtext)
} else if (contextTag=="script") { case "script":
tokeniser.transition(TokeniserState.ScriptData) tokeniser.transition(TokeniserState.ScriptData)
} else if (contextTag==("noscript")) { case "noscript":
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
} else if (contextTag=="plaintext") { case "plaintext":
tokeniser.transition(TokeniserState.Data) tokeniser.transition(TokeniserState.Data)
} else { default:
tokeniser.transition(TokeniserState.Data) // default tokeniser.transition(TokeniserState.Data)
} }
root = try Element(Tag.valueOf("html", settings), baseUri) root = try Element(Tag.valueOf("html", settings), baseUri)
@ -346,7 +352,7 @@ class HtmlTreeBuilder: TreeBuilder {
for pos in (0..<stack.count).reversed() { for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos] let next: Element = stack[pos]
stack.remove(at: pos) stack.remove(at: pos)
if (StringUtil.inString(next.nodeName(), elNames)) { if elNames.contains(next.nodeName()) {
break break
} }
} }
@ -381,7 +387,8 @@ class HtmlTreeBuilder: TreeBuilder {
private func clearStackToContext(_ nodeNames: [String]) { private func clearStackToContext(_ nodeNames: [String]) {
for pos in (0..<stack.count).reversed() { for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos] let next: Element = stack[pos]
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html") { let nextName = next.nodeName()
if nodeNames.contains(nextName) || nextName == "html" {
break break
} else { } else {
stack.remove(at: pos) stack.remove(at: pos)
@ -482,25 +489,21 @@ class HtmlTreeBuilder: TreeBuilder {
} }
} }
// todo: tidy up in specific scope methods private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
private var specificScopeTarget: [String?] = [nil] return try inSpecificScope([targetName], baseTypes, extraTypes)
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
specificScopeTarget[0] = targetName
return try inSpecificScope(specificScopeTarget, baseTypes, extraTypes)
} }
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool { private func inSpecificScope(_ targetNames: [String], _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
for pos in (0..<stack.count).reversed() { for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos] let el = stack[pos]
let elName: String = el.nodeName() let elName = el.nodeName()
if (StringUtil.inString(elName, targetNames)) { if targetNames.contains(elName) {
return true return true
} }
if (StringUtil.inString(elName, baseTypes)) { if baseTypes.contains(elName) {
return false return false
} }
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)) { if let extraTypes = extraTypes, extraTypes.contains(elName) {
return false return false
} }
} }
@ -509,39 +512,34 @@ class HtmlTreeBuilder: TreeBuilder {
} }
func inScope(_ targetNames: [String])throws->Bool { func inScope(_ targetNames: [String])throws->Bool {
return try inSpecificScope(targetNames, HtmlTreeBuilder.TagsSearchInScope, nil) return try inSpecificScope(targetNames, TagSets.inScope)
} }
func inScope(_ targetName: String)throws->Bool { func inScope(_ targetName: String, _ extras: [String]? = nil)throws->Bool {
return try inScope(targetName, nil) return try inSpecificScope(targetName, TagSets.inScope, extras)
}
func inScope(_ targetName: String, _ extras: [String]?)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagsSearchInScope, extras)
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title // todo: in svg namespace: forignOjbect, desc, title
} }
func inListItemScope(_ targetName: String)throws->Bool { func inListItemScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchList) return try inScope(targetName, TagSets.list)
} }
func inButtonScope(_ targetName: String)throws->Bool { func inButtonScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchButton) return try inScope(targetName, TagSets.button)
} }
func inTableScope(_ targetName: String)throws->Bool { func inTableScope(_ targetName: String)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagSearchTableScope, nil) return try inSpecificScope(targetName, TagSets.tableScope)
} }
func inSelectScope(_ targetName: String)throws->Bool { func inSelectScope(_ targetName: String)throws->Bool {
for pos in (0..<stack.count).reversed() { for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos] let elName = stack[pos].nodeName()
let elName: String = el.nodeName() if elName == targetName {
if (elName.equals(targetName)) {
return true return true
} }
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)) { // all elements except if !TagSets.selectScope.contains(elName) {
return false return false
} }
} }
@ -595,23 +593,28 @@ class HtmlTreeBuilder: TreeBuilder {
process, then the UA must perform the above steps as if that element was not in the above list. process, then the UA must perform the above steps as if that element was not in the above list.
*/ */
func generateImpliedEndTags(_ excludeTag: String?) { func generateImpliedEndTags(_ excludeTag: String? = nil) {
// Is this correct? I get the sense that something is supposed to happen here
while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) && // even if excludeTag == nil. But the original code doesn't seem to do that. -GS
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) { //
pop() // while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
// StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
// pop()
// }
guard let excludeTag = excludeTag else { return }
while true {
let nodeName = currentElement()!.nodeName()
guard nodeName != excludeTag else { return }
guard TagSets.endTags.contains(nodeName) else { return }
pop()
} }
} }
func generateImpliedEndTags() {
generateImpliedEndTags(nil)
}
func isSpecial(_ el: Element) -> Bool { func isSpecial(_ el: Element) -> Bool {
// todo: mathml's mi, mo, mn // todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title // todo: svg's foreigObject, desc, title
let name: String = el.nodeName() let name: String = el.nodeName()
return StringUtil.inString(name, HtmlTreeBuilder.TagSearchSpecial) return TagSets.special.contains(name)
} }
func lastFormattingElement() -> Element? { func lastFormattingElement() -> Element? {
@ -769,3 +772,10 @@ class HtmlTreeBuilder: TreeBuilder {
} }
} }
} }
fileprivate func ~= (pattern: [String], value: String) -> Bool {
return pattern.contains(value)
}

View File

@ -36,6 +36,29 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
case AfterAfterBody case AfterAfterBody
case AfterAfterFrameset case AfterAfterFrameset
case ForeignContent case ForeignContent
private enum TagSets {
static let outer = ["head", "body", "html", "br"]
static let outer2 = ["body", "html", "br"]
static let outer3 = ["body", "html"]
static let baseEtc = ["base", "basefont", "bgsound", "command", "link"]
static let baseEtc2 = ["basefont", "bgsound", "link", "meta", "noframes", "style"]
static let baseEtc3 = ["base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"]
static let headNoscript = ["head", "noscript"]
static let table = ["table", "tbody", "tfoot", "thead", "tr"]
static let tableSections = ["tbody", "tfoot", "thead"]
static let tableMix = ["body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let tableMix2 = ["body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let tableMix3 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead"]
static let tableMix4 = ["body", "caption", "col", "colgroup", "html", "td", "th", "tr"]
static let tableMix5 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"]
static let tableMix6 = ["body", "caption", "col", "colgroup", "html", "td", "th"]
static let tableMix7 = ["body", "caption", "col", "colgroup", "html"]
static let tableMix8 = ["caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"]
static let tableRowsAndCols = ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let thTd = ["th", "td"]
static let inputKeygenTextarea = ["input", "keygen", "textarea"]
}
private static let nullString: String = "\u{0000}" private static let nullString: String = "\u{0000}"
@ -83,10 +106,10 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
try tb.insert(t.asComment()) try tb.insert(t.asComment())
} else if (HtmlTreeBuilderState.isWhitespace(t)) { } else if (HtmlTreeBuilderState.isWhitespace(t)) {
return true // ignore whitespace return true // ignore whitespace
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) { } else if t.startTagNormalName() == "html" {
try tb.insert(t.asStartTag()) try tb.insert(t.asStartTag())
tb.transition(.BeforeHead) tb.transition(.BeforeHead)
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) { } else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
return try anythingElse(t, tb) return try anythingElse(t, tb)
} else if (t.isEndTag()) { } else if (t.isEndTag()) {
tb.error(self) tb.error(self)
@ -103,13 +126,13 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) { } else if (t.isDoctype()) {
tb.error(self) tb.error(self)
return false return false
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) { } else if t.startTagNormalName() == "html" {
return try HtmlTreeBuilderState.InBody.process(t, tb) // does not transition return try HtmlTreeBuilderState.InBody.process(t, tb) // does not transition
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("head"))!) { } else if t.startTagNormalName() == "head" {
let head: Element = try tb.insert(t.asStartTag()) let head: Element = try tb.insert(t.asStartTag())
tb.setHeadElement(head) tb.setHeadElement(head)
tb.transition(.InHead) tb.transition(.InHead)
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) { } else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
try tb.processStartTag("head") try tb.processStartTag("head")
return try tb.process(t) return try tb.process(t)
} else if (t.isEndTag()) { } else if (t.isEndTag()) {
@ -142,7 +165,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
var name: String = start.normalName()! var name: String = start.normalName()!
if (name.equals("html")) { if (name.equals("html")) {
return try HtmlTreeBuilderState.InBody.process(t, tb) return try HtmlTreeBuilderState.InBody.process(t, tb)
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "command", "link")) { } else if TagSets.baseEtc.contains(name) {
let el: Element = try tb.insertEmpty(start) let el: Element = try tb.insertEmpty(start)
// jsoup special: update base the frist time it is seen // jsoup special: update base the frist time it is seen
if (name.equals("base") && el.hasAttr("href")) { if (name.equals("base") && el.hasAttr("href")) {
@ -153,7 +176,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
// todo: charset switches // todo: charset switches
} else if (name.equals("title")) { } else if (name.equals("title")) {
try HtmlTreeBuilderState.handleRcData(start, tb) try HtmlTreeBuilderState.handleRcData(start, tb)
} else if (StringUtil.inString(name, haystack: "noframes", "style")) { } else if name == "noframes" || name == "style" {
try HtmlTreeBuilderState.handleRawtext(start, tb) try HtmlTreeBuilderState.handleRawtext(start, tb)
} else if (name.equals("noscript")) { } else if (name.equals("noscript")) {
// else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript) // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
@ -179,7 +202,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (name?.equals("head"))! { if (name?.equals("head"))! {
tb.pop() tb.pop()
tb.transition(.AfterHead) tb.transition(.AfterHead)
} else if (name != nil && StringUtil.inString(name!, haystack: "body", "html", "br")) { } else if let name = name, TagSets.outer2.contains(name) {
return try anythingElse(t, tb) return try anythingElse(t, tb)
} else { } else {
tb.error(self) tb.error(self)
@ -198,17 +221,16 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} }
if (t.isDoctype()) { if (t.isDoctype()) {
tb.error(self) tb.error(self)
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) { } else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody) return try tb.process(t, .InBody)
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("noscript"))!) { } else if t.endTagNormalName() == "noscript" {
tb.pop() tb.pop()
tb.transition(.InHead) tb.transition(.InHead)
} else if (HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!, } else if HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && TagSets.baseEtc2.contains(t.asStartTag().normalName()!)) {
haystack: "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
return try tb.process(t, .InHead) return try tb.process(t, .InHead)
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("br"))!) { } else if t.endTagNormalName() == "br" {
return try anythingElse(t, tb) return try anythingElse(t, tb)
} else if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!, haystack: "head", "noscript")) || t.isEndTag()) { } else if (t.isStartTag() && TagSets.headNoscript.contains(t.asStartTag().normalName()!)) || t.isEndTag() {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -241,7 +263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (name.equals("frameset")) { } else if (name.equals("frameset")) {
try tb.insert(startTag) try tb.insert(startTag)
tb.transition(.InFrameset) tb.transition(.InFrameset)
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) { } else if TagSets.baseEtc3.contains(name) {
tb.error(self) tb.error(self)
let head: Element = tb.getHeadElement()! let head: Element = tb.getHeadElement()!
tb.push(head) tb.push(head)
@ -254,7 +276,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
try anythingElse(t, tb) try anythingElse(t, tb)
} }
} else if (t.isEndTag()) { } else if (t.isEndTag()) {
if (StringUtil.inString(t.asEndTag().normalName()!, haystack: "body", "html")) { if TagSets.outer3.contains(t.asEndTag().normalName()!) {
try anythingElse(t, tb) try anythingElse(t, tb)
} else { } else {
tb.error(self) tb.error(self)
@ -853,7 +875,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool { func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
tb.error(self) tb.error(self)
var processed: Bool var processed: Bool
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) { if let cur = tb.currentElement(), TagSets.table.contains(cur.nodeName()) {
tb.setFosterInserts(true) tb.setFosterInserts(true)
processed = try tb.process(t, .InBody) processed = try tb.process(t, .InBody)
tb.setFosterInserts(false) tb.setFosterInserts(false)
@ -889,11 +911,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (name.equals("col")) { } else if (name.equals("col")) {
try tb.processStartTag("colgroup") try tb.processStartTag("colgroup")
return try tb.process(t) return try tb.process(t)
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) { } else if TagSets.tableSections.contains(name) {
tb.clearStackToTableContext() tb.clearStackToTableContext()
try tb.insert(startTag) try tb.insert(startTag)
tb.transition(.InTableBody) tb.transition(.InTableBody)
} else if (StringUtil.inString(name, haystack: "td", "th", "tr")) { } else if ["td", "th", "tr"].contains(name) {
try tb.processStartTag("tbody") try tb.processStartTag("tbody")
return try tb.process(t) return try tb.process(t)
} else if (name.equals("table")) { } else if (name.equals("table")) {
@ -901,7 +923,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let processed: Bool = try tb.processEndTag("table") let processed: Bool = try tb.processEndTag("table")
if (processed) // only ignored if in fragment if (processed) // only ignored if in fragment
{return try tb.process(t)} {return try tb.process(t)}
} else if (StringUtil.inString(name, haystack: "style", "script")) { } else if ["style", "script"].contains(name) {
return try tb.process(t, .InHead) return try tb.process(t, .InHead)
} else if (name.equals("input")) { } else if (name.equals("input")) {
if (!startTag._attributes.get(key: "type").equalsIgnoreCase(string: "hidden")) { if (!startTag._attributes.get(key: "type").equalsIgnoreCase(string: "hidden")) {
@ -932,8 +954,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.popStackToClose("table") tb.popStackToClose("table")
} }
tb.resetInsertionMode() tb.resetInsertionMode()
} else if (StringUtil.inString(name, } else if TagSets.tableMix.contains(name) {
haystack: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -970,7 +991,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (!HtmlTreeBuilderState.isWhitespace(character)) { if (!HtmlTreeBuilderState.isWhitespace(character)) {
// InTable anything else section: // InTable anything else section:
tb.error(self) tb.error(self)
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) { if tb.currentElement() != nil && TagSets.table.contains(tb.currentElement()!.nodeName()) {
tb.setFosterInserts(true) tb.setFosterInserts(true)
try tb.process(Token.Char().data(character), .InBody) try tb.process(Token.Char().data(character), .InBody)
tb.setFosterInserts(false) tb.setFosterInserts(false)
@ -988,7 +1009,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} }
return true return true
case .InCaption: case .InCaption:
if (t.isEndTag() && t.asEndTag().normalName()!.equals("caption")) { if t.endTagNormalName() == "caption" {
let endTag: Token.EndTag = t.asEndTag() let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName() let name: String? = endTag.normalName()
if (try name != nil && !tb.inTableScope(name!)) { if (try name != nil && !tb.inTableScope(name!)) {
@ -1003,18 +1024,21 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.clearFormattingElementsToLastMarker() tb.clearFormattingElementsToLastMarker()
tb.transition(.InTable) tb.transition(.InTable)
} }
} else if (( } else if (t.isStartTag() && TagSets.tableRowsAndCols.contains(t.asStartTag().normalName()!)) ||
t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!, (t.isEndTag() && t.asEndTag().normalName()!.equals("table"))
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") || {
t.isEndTag() && t.asEndTag().normalName()!.equals("table")) // Note: original code relies on && precedence being higher than ||
) { //
// if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
// haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
// t.isEndTag() && t.asEndTag().normalName()!.equals("table"))) {
tb.error(self) tb.error(self)
let processed: Bool = try tb.processEndTag("caption") let processed: Bool = try tb.processEndTag("caption")
if (processed) { if (processed) {
return try tb.process(t) return try tb.process(t)
} }
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName()!, } else if let nName = t.endTagNormalName(), TagSets.tableMix2.contains(nName) {
haystack: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -1102,11 +1126,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.clearStackToTableBodyContext() tb.clearStackToTableBodyContext()
try tb.insert(startTag) try tb.insert(startTag)
tb.transition(.InRow) tb.transition(.InRow)
} else if (StringUtil.inString(name, haystack: "th", "td")) { } else if let name = name, TagSets.thTd.contains(name) {
tb.error(self) tb.error(self)
try tb.processStartTag("tr") try tb.processStartTag("tr")
return try tb.process(startTag) return try tb.process(startTag)
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead")) { } else if let name = name, TagSets.tableMix3.contains(name) {
return try exitTableBody(t, tb) return try exitTableBody(t, tb)
} else { } else {
return try anythingElse(t, tb) return try anythingElse(t, tb)
@ -1115,8 +1139,8 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
case .EndTag: case .EndTag:
let endTag: Token.EndTag = t.asEndTag() let endTag: Token.EndTag = t.asEndTag()
let name = endTag.normalName() let name = endTag.normalName()
if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) { if let name = name, TagSets.tableSections.contains(name) {
if (try !tb.inTableScope(name!)) { if (try !tb.inTableScope(name)) {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -1126,7 +1150,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} }
} else if ("table".equals(name)) { } else if ("table".equals(name)) {
return try exitTableBody(t, tb) return try exitTableBody(t, tb)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) { } else if let name = name, TagSets.tableMix4.contains(name) {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -1155,12 +1179,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let startTag: Token.StartTag = t.asStartTag() let startTag: Token.StartTag = t.asStartTag()
let name: String? = startTag.normalName() let name: String? = startTag.normalName()
if (StringUtil.inString(name, haystack: "th", "td")) { if let name = name, TagSets.thTd.contains(name) {
tb.clearStackToTableRowContext() tb.clearStackToTableRowContext()
try tb.insert(startTag) try tb.insert(startTag)
tb.transition(.InCell) tb.transition(.InCell)
tb.insertMarkerToFormattingElements() tb.insertMarkerToFormattingElements()
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) { } else if let name = name, TagSets.tableMix5.contains(name) {
return try handleMissingTr(t, tb) return try handleMissingTr(t, tb)
} else { } else {
return try anythingElse(t, tb) return try anythingElse(t, tb)
@ -1179,14 +1203,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.transition(.InTableBody) tb.transition(.InTableBody)
} else if ("table".equals(name)) { } else if ("table".equals(name)) {
return try handleMissingTr(t, tb) return try handleMissingTr(t, tb)
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) { } else if let name = name, TagSets.tableSections.contains(name) {
if (try !tb.inTableScope(name!)) { if (try !tb.inTableScope(name)) {
tb.error(self) tb.error(self)
return false return false
} }
try tb.processEndTag("tr") try tb.processEndTag("tr")
return try tb.process(t) return try tb.process(t)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th")) { } else if let name = name, TagSets.tableMix6.contains(name) {
tb.error(self) tb.error(self)
return false return false
} else { } else {
@ -1213,24 +1237,24 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let endTag: Token.EndTag = t.asEndTag() let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName() let name: String? = endTag.normalName()
if (StringUtil.inString(name, haystack: "td", "th")) { if let name = name, TagSets.thTd.contains(name) {
if (try !tb.inTableScope(name!)) { if (try !tb.inTableScope(name)) {
tb.error(self) tb.error(self)
tb.transition(.InRow) // might not be in scope if empty: <td /> and processing fake end tag tb.transition(.InRow) // might not be in scope if empty: <td /> and processing fake end tag
return false return false
} }
tb.generateImpliedEndTags() tb.generateImpliedEndTags()
if (!name!.equals(tb.currentElement()?.nodeName())) { if (!name.equals(tb.currentElement()?.nodeName())) {
tb.error(self) tb.error(self)
} }
tb.popStackToClose(name!) tb.popStackToClose(name)
tb.clearFormattingElementsToLastMarker() tb.clearFormattingElementsToLastMarker()
tb.transition(.InRow) tb.transition(.InRow)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html")) { } else if let name = name, TagSets.tableMix7.contains(name) {
tb.error(self) tb.error(self)
return false return false
} else if (StringUtil.inString(name, haystack: "table", "tbody", "tfoot", "thead", "tr")) { } else if let name = name, TagSets.table.contains(name) {
if (try !tb.inTableScope(name!)) { if (try !tb.inTableScope(name)) {
tb.error(self) tb.error(self)
return false return false
} }
@ -1239,9 +1263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else { } else {
return try anythingElse(t, tb) return try anythingElse(t, tb)
} }
} else if (t.isStartTag() && } else if let nName = t.startTagNormalName(), TagSets.tableRowsAndCols.contains(nName) {
StringUtil.inString(t.asStartTag().normalName(),
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
if (try !(tb.inTableScope("td") || tb.inTableScope("th"))) { if (try !(tb.inTableScope("td") || tb.inTableScope("th"))) {
tb.error(self) tb.error(self)
return false return false
@ -1293,7 +1315,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if ("select".equals(name)) { } else if ("select".equals(name)) {
tb.error(self) tb.error(self)
return try tb.processEndTag("select") return try tb.processEndTag("select")
} else if (StringUtil.inString(name, haystack: "input", "keygen", "textarea")) { } else if let name = name, TagSets.inputKeygenTextarea.contains(name) {
tb.error(self) tb.error(self)
if (try !tb.inSelectScope("select")) { if (try !tb.inSelectScope("select")) {
return false // frag return false // frag
@ -1346,17 +1368,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} }
return true return true
case .InSelectInTable: case .InSelectInTable:
if (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) { if let nName = t.startTagNormalName(), TagSets.tableMix8.contains(nName) {
tb.error(self) tb.error(self)
try tb.processEndTag("select") try tb.processEndTag("select")
return try tb.process(t) return try tb.process(t)
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) { } else if let nName = t.endTagNormalName(), TagSets.tableMix8.contains(nName) {
tb.error(self) tb.error(self)
if (try t.asEndTag().normalName() != nil && tb.inTableScope(t.asEndTag().normalName()!)) { if try tb.inTableScope(nName) {
try tb.processEndTag("select") try tb.processEndTag("select")
return try (tb.process(t)) return try (tb.process(t))
} else { } else {
return false return false
} }
} else { } else {
return try tb.process(t, .InSelect) return try tb.process(t, .InSelect)
@ -1369,9 +1391,9 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) { } else if (t.isDoctype()) {
tb.error(self) tb.error(self)
return false return false
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) { } else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody) return try tb.process(t, .InBody)
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) { } else if t.endTagNormalName() == "html" {
if (tb.isFragmentParsing()) { if (tb.isFragmentParsing()) {
tb.error(self) tb.error(self)
return false return false
@ -1410,7 +1432,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.error(self) tb.error(self)
return false return false
} }
} else if (t.isEndTag() && "frameset".equals(t.asEndTag().normalName())) { } else if t.endTagNormalName() == "frameset" {
if ("html".equals(tb.currentElement()?.nodeName())) { // frag if ("html".equals(tb.currentElement()?.nodeName())) { // frag
tb.error(self) tb.error(self)
return false return false
@ -1439,11 +1461,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) { } else if (t.isDoctype()) {
tb.error(self) tb.error(self)
return false return false
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) { } else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody) return try tb.process(t, .InBody)
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) { } else if t.endTagNormalName() == "html" {
tb.transition(.AfterAfterFrameset) tb.transition(.AfterAfterFrameset)
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) { } else if t.startTagNormalName() == "noframes" {
return try tb.process(t, .InHead) return try tb.process(t, .InHead)
} else if (t.isEOF()) { } else if (t.isEOF()) {
// cool your heels, we're complete // cool your heels, we're complete
@ -1470,11 +1492,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (t.isComment()) { if (t.isComment()) {
try tb.insert(t.asComment()) try tb.insert(t.asComment())
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.isStartTag() && "html".equals(t.asStartTag().normalName()))) { } else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.startTagNormalName() == "html")) {
return try tb.process(t, .InBody) return try tb.process(t, .InBody)
} else if (t.isEOF()) { } else if (t.isEOF()) {
// nice work chuck // nice work chuck
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) { } else if t.startTagNormalName() == "noframes" {
return try tb.process(t, .InHead) return try tb.process(t, .InHead)
} else { } else {
tb.error(self) tb.error(self)
@ -1547,3 +1569,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
fileprivate static let InBodyEndTableFosters: [String] = ["table", "tbody", "tfoot", "thead", "tr"] fileprivate static let InBodyEndTableFosters: [String] = ["table", "tbody", "tfoot", "thead", "tr"]
} }
} }
fileprivate extension Token {
func endTagNormalName() -> String? {
guard isEndTag() else { return nil }
return asEndTag().normalName()
}
func startTagNormalName() -> String? {
guard isStartTag() else { return nil }
return asStartTag().normalName()
}
}

View File

@ -49,9 +49,7 @@ open class ParseSettings {
open func normalizeAttributes(_ attributes: Attributes)throws ->Attributes { open func normalizeAttributes(_ attributes: Attributes)throws ->Attributes {
if (!preserveAttributeCase) { if (!preserveAttributeCase) {
for attr in attributes { attributes.lowercaseAllKeys()
try attr.setKey(key: attr.getKey().lowercased())
}
} }
return attributes return attributes
} }

View File

@ -25,8 +25,9 @@ extension String {
} }
func unicodeScalar(_ i: Int) -> UnicodeScalar { func unicodeScalar(_ i: Int) -> UnicodeScalar {
return self.unicodeScalars.prefix(i+1).last! let ix = unicodeScalars.index(unicodeScalars.startIndex, offsetBy: i)
} return unicodeScalars[ix]
}
func string(_ offset: Int, _ count: Int) -> String { func string(_ offset: Int, _ count: Int) -> String {
let truncStart = self.unicodeScalars.count-offset let truncStart = self.unicodeScalars.count-offset
@ -55,7 +56,7 @@ extension String {
func startsWith(_ string: String) -> Bool { func startsWith(_ string: String) -> Bool {
return self.hasPrefix(string) return self.hasPrefix(string)
} }
func indexOf(_ substring: String, _ offset: Int ) -> Int { func indexOf(_ substring: String, _ offset: Int ) -> Int {
if(offset > count) {return -1} if(offset > count) {return -1}
@ -81,12 +82,22 @@ extension String {
} }
func trim() -> String { func trim() -> String {
return trimmingCharacters(in: NSCharacterSet.whitespacesAndNewlines) // trimmingCharacters() in the stdlib is not very efficiently
// implemented, perhaps because it always creates a new string.
// Avoid actually calling it if it's not needed.
guard count > 0 else { return self }
let (firstChar, lastChar) = (first!, last!)
if firstChar.isWhitespace || lastChar.isWhitespace || firstChar == "\n" || lastChar == "\n" {
return trimmingCharacters(in: .whitespacesAndNewlines)
}
return self
} }
func equalsIgnoreCase(string: String?) -> Bool { func equalsIgnoreCase(string: String?) -> Bool {
if(string == nil) {return false} if let string = string {
return string!.lowercased() == lowercased() return caseInsensitiveCompare(string) == .orderedSame
}
return false
} }
static func toHexString(n: Int) -> String { static func toHexString(n: Int) -> String {

View File

@ -1,9 +1,9 @@
/** /**
Supports creation of a String from pieces Supports creation of a String from pieces
https://gist.github.com/kristopherjohnson/1fc55e811d944a430289 Based on https://gist.github.com/kristopherjohnson/1fc55e811d944a430289
*/ */
open class StringBuilder { open class StringBuilder {
fileprivate var stringValue: Array<Character> fileprivate var buffer: [String] = []
/** /**
Construct with initial String contents Construct with initial String contents
@ -11,11 +11,13 @@ open class StringBuilder {
:param: string Initial value; defaults to empty string :param: string Initial value; defaults to empty string
*/ */
public init(string: String = "") { public init(string: String = "") {
self.stringValue = Array(string) if string != "" {
buffer.append(string)
}
} }
public init(_ size: Int) { public init(_ size: Int) {
self.stringValue = Array() self.buffer = Array()
} }
/** /**
@ -24,15 +26,18 @@ open class StringBuilder {
:return: String :return: String
*/ */
open func toString() -> String { open func toString() -> String {
return String(stringValue) return buffer.reduce("", +)
} }
/** /**
Return the current length of the String object Return the current length of the String object
*/ */
open var length: Int { open var xlength: Int {
return self.stringValue.count return buffer.map { $0.count }.reduce(0, +)
//return countElements(stringValue) }
open var isEmpty: Bool {
return buffer.isEmpty
} }
/** /**
@ -43,29 +48,27 @@ open class StringBuilder {
:return: reference to this StringBuilder instance :return: reference to this StringBuilder instance
*/ */
open func append(_ string: String) { open func append(_ string: String) {
stringValue.append(contentsOf: string) buffer.append(string)
} }
open func appendCodePoint(_ chr: Character) { open func appendCodePoint(_ chr: Character) {
stringValue.append(chr) buffer.append(String(chr))
} }
open func appendCodePoints(_ chr: [Character]) { open func appendCodePoints(_ chr: [Character]) {
stringValue.append(contentsOf: chr) buffer.append(String(chr))
} }
open func appendCodePoint(_ ch: Int) { open func appendCodePoint(_ ch: Int) {
stringValue.append(Character(UnicodeScalar(ch)!)) buffer.append(String(UnicodeScalar(ch)!))
} }
open func appendCodePoint(_ ch: UnicodeScalar) { open func appendCodePoint(_ ch: UnicodeScalar) {
stringValue.append(Character(ch)) buffer.append(String(ch))
} }
open func appendCodePoints(_ chr: [UnicodeScalar]) { open func appendCodePoints(_ chr: [UnicodeScalar]) {
for c in chr { buffer.append(String(String.UnicodeScalarView(chr)))
appendCodePoint(c)
}
} }
/** /**
@ -77,19 +80,13 @@ open class StringBuilder {
*/ */
@discardableResult @discardableResult
open func append<T: CustomStringConvertible>(_ value: T) -> StringBuilder { open func append<T: CustomStringConvertible>(_ value: T) -> StringBuilder {
stringValue.append(contentsOf: value.description) buffer.append(value.description)
return self return self
} }
@discardableResult @discardableResult
open func append(_ value: UnicodeScalar) -> StringBuilder { open func append(_ value: UnicodeScalar) -> StringBuilder {
stringValue.append(contentsOf: value.description) buffer.append(value.description)
return self
}
@discardableResult
open func insert<T: CustomStringConvertible>(_ offset: Int, _ value: T) -> StringBuilder {
stringValue.insert(contentsOf: value.description, at: offset)
return self return self
} }
@ -102,7 +99,8 @@ open class StringBuilder {
*/ */
@discardableResult @discardableResult
open func appendLine(_ string: String) -> StringBuilder { open func appendLine(_ string: String) -> StringBuilder {
stringValue.append(contentsOf: "\n") buffer.append(string)
buffer.append("\n")
return self return self
} }
@ -115,8 +113,8 @@ open class StringBuilder {
*/ */
@discardableResult @discardableResult
open func appendLine<T: CustomStringConvertible>(_ value: T) -> StringBuilder { open func appendLine<T: CustomStringConvertible>(_ value: T) -> StringBuilder {
stringValue.append(contentsOf: value.description) buffer.append(value.description)
stringValue.append(contentsOf: "\n") buffer.append("\n")
return self return self
} }
@ -127,7 +125,7 @@ open class StringBuilder {
*/ */
@discardableResult @discardableResult
open func clear() -> StringBuilder { open func clear() -> StringBuilder {
stringValue = Array() buffer.removeAll(keepingCapacity: true)
return self return self
} }
} }

View File

@ -165,19 +165,6 @@ open class StringUtil {
} }
} }
public static func inString(_ needle: String?, haystack: String...) -> Bool {
return inString(needle, haystack)
}
public static func inString(_ needle: String?, _ haystack: [String?]) -> Bool {
if(needle == nil) {return false}
for hay in haystack {
if(hay != nil && hay! == needle!) {
return true
}
}
return false
}
// open static func inSorted(_ needle: String, haystack: [String]) -> Bool { // open static func inSorted(_ needle: String, haystack: [String]) -> Bool {
// return binarySearch(haystack, searchItem: needle) >= 0 // return binarySearch(haystack, searchItem: needle) >= 0
// } // }

View File

@ -116,7 +116,7 @@ open class Token {
if (_pendingAttributeName != nil) { if (_pendingAttributeName != nil) {
var attribute: Attribute var attribute: Attribute
if (_hasPendingAttributeValue) { if (_hasPendingAttributeValue) {
attribute = try Attribute(key: _pendingAttributeName!, value: _pendingAttributeValue.length > 0 ? _pendingAttributeValue.toString() : _pendingAttributeValueS!) attribute = try Attribute(key: _pendingAttributeName!, value: !_pendingAttributeValue.isEmpty ? _pendingAttributeValue.toString() : _pendingAttributeValueS!)
} else if (_hasEmptyAttributeValue) { } else if (_hasEmptyAttributeValue) {
attribute = try Attribute(key: _pendingAttributeName!, value: "") attribute = try Attribute(key: _pendingAttributeName!, value: "")
} else { } else {
@ -183,7 +183,7 @@ open class Token {
func appendAttributeValue(_ append: String) { func appendAttributeValue(_ append: String) {
ensureAttributeValue() ensureAttributeValue()
if (_pendingAttributeValue.length == 0) { if _pendingAttributeValue.isEmpty {
_pendingAttributeValueS = append _pendingAttributeValueS = append
} else { } else {
_pendingAttributeValue.append(append) _pendingAttributeValue.append(append)

View File

@ -47,7 +47,7 @@ final class Tokeniser {
} }
// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read: // if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
if (charsBuilder.length > 0) { if !charsBuilder.isEmpty {
let str: String = charsBuilder.toString() let str: String = charsBuilder.toString()
charsBuilder.clear() charsBuilder.clear()
charsString = nil charsString = nil
@ -88,7 +88,7 @@ final class Tokeniser {
if (charsString == nil) { if (charsString == nil) {
charsString = str charsString = str
} else { } else {
if (charsBuilder.length == 0) { // switching to string builder as more than one emit before read if charsBuilder.isEmpty { // switching to string builder as more than one emit before read
charsBuilder.append(charsString!) charsBuilder.append(charsString!)
} }
charsBuilder.append(str) charsBuilder.append(str)
@ -124,9 +124,6 @@ final class Tokeniser {
selfClosingFlagAcknowledged = true selfClosingFlagAcknowledged = true
} }
private var codepointHolder: [UnicodeScalar] = [UnicodeScalar(0)!] // holder to not have to keep creating arrays
private var multipointHolder: [UnicodeScalar] = [UnicodeScalar(0)!, UnicodeScalar(0)!]
func consumeCharacterReference(_ additionalAllowedCharacter: UnicodeScalar?, _ inAttribute: Bool)throws->[UnicodeScalar]? { func consumeCharacterReference(_ additionalAllowedCharacter: UnicodeScalar?, _ inAttribute: Bool)throws->[UnicodeScalar]? {
if (reader.isEmpty()) { if (reader.isEmpty()) {
return nil return nil
@ -138,7 +135,6 @@ final class Tokeniser {
return nil return nil
} }
var codeRef: [UnicodeScalar] = codepointHolder
reader.markPos() reader.markPos()
if (reader.matchConsume("#")) { // numbered if (reader.matchConsume("#")) { // numbered
let isHexMode: Bool = reader.matchConsumeIgnoreCase("X") let isHexMode: Bool = reader.matchConsumeIgnoreCase("X")
@ -160,13 +156,11 @@ final class Tokeniser {
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) { if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
characterReferenceError("character outside of valid range") characterReferenceError("character outside of valid range")
codeRef[0] = Tokeniser.replacementChar return [Tokeniser.replacementChar]
return codeRef
} else { } else {
// todo: implement number replacement table // todo: implement number replacement table
// todo: check for extra illegal unicode points as parse errors // todo: check for extra illegal unicode points as parse errors
codeRef[0] = UnicodeScalar(charval)! return [UnicodeScalar(charval)!]
return codeRef
} }
} else { // named } else { // named
// get as many letters as possible, and look for matching entities. // get as many letters as possible, and look for matching entities.
@ -190,16 +184,14 @@ final class Tokeniser {
if (!reader.matchConsume(";")) { if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon") // missing semi characterReferenceError("missing semicolon") // missing semi
} }
let numChars: Int = Entities.codepointsForName(nameRef, codepoints: &multipointHolder) if let points = Entities.codepointsForName(nameRef) {
if (numChars == 1) { if points.count > 2 {
codeRef[0] = multipointHolder[0] try Validate.fail(msg: "Unexpected characters returned for \(nameRef) num: \(points.count)")
return codeRef }
} else if (numChars == 2) { return points
return multipointHolder
} else {
try Validate.fail(msg: "Unexpected characters returned for \(nameRef) num: \(numChars)")
return multipointHolder
} }
try Validate.fail(msg: "Entity name not found: \(nameRef)")
return []
} }
} }

View File

@ -84,12 +84,22 @@
8CE418721DAA568700240B42 /* SerializationException.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE4184E1DAA568600240B42 /* SerializationException.swift */; }; 8CE418721DAA568700240B42 /* SerializationException.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE4184E1DAA568600240B42 /* SerializationException.swift */; };
8CE418731DAA568700240B42 /* ArrayExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418501DAA568600240B42 /* ArrayExt.swift */; }; 8CE418731DAA568700240B42 /* ArrayExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418501DAA568600240B42 /* ArrayExt.swift */; };
8CE418741DAA568700240B42 /* CharacterExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418511DAA568600240B42 /* CharacterExt.swift */; }; 8CE418741DAA568700240B42 /* CharacterExt.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418511DAA568600240B42 /* CharacterExt.swift */; };
8CE418761DAA568700240B42 /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
8CE418781DAA568700240B42 /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418551DAA568600240B42 /* StreamReader.swift */; }; 8CE418781DAA568700240B42 /* StreamReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418551DAA568600240B42 /* StreamReader.swift */; };
8CE4187A1DAA568700240B42 /* SwiftSoup.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418571DAA568600240B42 /* SwiftSoup.swift */; }; 8CE4187A1DAA568700240B42 /* SwiftSoup.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418571DAA568600240B42 /* SwiftSoup.swift */; };
8CEA29591DAC112B0064A341 /* CharacterReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA29581DAC112B0064A341 /* CharacterReader.swift */; }; 8CEA29591DAC112B0064A341 /* CharacterReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA29581DAC112B0064A341 /* CharacterReader.swift */; };
8CEA295B1DAC23820064A341 /* String.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA295A1DAC23820064A341 /* String.swift */; }; 8CEA295B1DAC23820064A341 /* String.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CEA295A1DAC23820064A341 /* String.swift */; };
BD3B5B6A1FBED933001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; }; BB57C2D3222CAF8E008933AA /* SwiftSoup.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */; };
BB57C2DB222CB0C6008933AA /* ParserBenchmark.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */; };
BB57C2E2222CB0E3008933AA /* Google.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DC222CB0E2008933AA /* Google.html */; };
BB57C2E3222CB0E3008933AA /* Wikipedia.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DD222CB0E2008933AA /* Wikipedia.html */; };
BB57C2E4222CB0E3008933AA /* Reuters.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DE222CB0E2008933AA /* Reuters.html */; };
BB57C2E5222CB0E3008933AA /* Wirecutter.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2DF222CB0E2008933AA /* Wirecutter.html */; };
BB57C2E6222CB0E3008933AA /* GitHub.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2E0222CB0E3008933AA /* GitHub.html */; };
BB57C2E7222CB0E3008933AA /* Amazon.html in Resources */ = {isa = PBXBuildFile; fileRef = BB57C2E1222CB0E3008933AA /* Amazon.html */; };
BB57C2EA222CCCB6008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
BB57C2EB222CCCC3008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
BB57C2EC222CCCC5008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
BB57C2ED222CCCC6008933AA /* BinarySearch.swift in Sources */ = {isa = PBXBuildFile; fileRef = BB57C2E9222CCCB5008933AA /* BinarySearch.swift */; };
BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; }; BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; }; BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; }; BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
@ -146,7 +156,6 @@
BD3B5BA01FBED933001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; }; BD3B5BA01FBED933001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; };
BD3B5BA11FBED933001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; }; BD3B5BA11FBED933001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; };
BD3B5BA41FBED933001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; }; BD3B5BA41FBED933001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; };
BD3B5BAD1FC063BD001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; }; BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; }; BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; }; BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
@ -203,7 +212,6 @@
BD3B5BE31FC063BD001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; }; BD3B5BE31FC063BD001FDB3B /* DataUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418321DAA568600240B42 /* DataUtil.swift */; };
BD3B5BE41FC063BD001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; }; BD3B5BE41FC063BD001FDB3B /* ParseError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C8301DB7E8CD00B8FC22 /* ParseError.swift */; };
BD3B5BE71FC063BD001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; }; BD3B5BE71FC063BD001FDB3B /* SwiftSoup.h in Headers */ = {isa = PBXBuildFile; fileRef = 8CE418191DAA54A900240B42 /* SwiftSoup.h */; settings = {ATTRIBUTES = (Public, ); }; };
BD3B5BF01FC06423001FDB3B /* OrderedDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418531DAA568600240B42 /* OrderedDictionary.swift */; };
BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; }; BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CC2FD8C1DB12382002CB469 /* Whitelist.swift */; };
BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; }; BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C19C82E1DB7E5D200B8FC22 /* Tokeniser.swift */; };
BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; }; BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8CE418541DAA568600240B42 /* Pattern.swift */; };
@ -270,6 +278,13 @@
remoteGlobalIDString = 8CE418151DAA54A900240B42; remoteGlobalIDString = 8CE418151DAA54A900240B42;
remoteInfo = SwiftSoup; remoteInfo = SwiftSoup;
}; };
BB57C2D4222CAF8E008933AA /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 8CE4180D1DAA54A900240B42 /* Project object */;
proxyType = 1;
remoteGlobalIDString = BD3B5B681FBED933001FDB3B;
remoteInfo = "SwiftSoup-macOS";
};
/* End PBXContainerItemProxy section */ /* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
@ -353,12 +368,21 @@
8CE4184E1DAA568600240B42 /* SerializationException.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SerializationException.swift; sourceTree = "<group>"; }; 8CE4184E1DAA568600240B42 /* SerializationException.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SerializationException.swift; sourceTree = "<group>"; };
8CE418501DAA568600240B42 /* ArrayExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ArrayExt.swift; sourceTree = "<group>"; }; 8CE418501DAA568600240B42 /* ArrayExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ArrayExt.swift; sourceTree = "<group>"; };
8CE418511DAA568600240B42 /* CharacterExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterExt.swift; sourceTree = "<group>"; }; 8CE418511DAA568600240B42 /* CharacterExt.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterExt.swift; sourceTree = "<group>"; };
8CE418531DAA568600240B42 /* OrderedDictionary.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OrderedDictionary.swift; sourceTree = "<group>"; };
8CE418541DAA568600240B42 /* Pattern.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Pattern.swift; sourceTree = "<group>"; }; 8CE418541DAA568600240B42 /* Pattern.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Pattern.swift; sourceTree = "<group>"; };
8CE418551DAA568600240B42 /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = "<group>"; }; 8CE418551DAA568600240B42 /* StreamReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = StreamReader.swift; sourceTree = "<group>"; };
8CE418571DAA568600240B42 /* SwiftSoup.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SwiftSoup.swift; sourceTree = "<group>"; }; 8CE418571DAA568600240B42 /* SwiftSoup.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SwiftSoup.swift; sourceTree = "<group>"; };
8CEA29581DAC112B0064A341 /* CharacterReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterReader.swift; sourceTree = "<group>"; }; 8CEA29581DAC112B0064A341 /* CharacterReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CharacterReader.swift; sourceTree = "<group>"; };
8CEA295A1DAC23820064A341 /* String.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = String.swift; sourceTree = "<group>"; }; 8CEA295A1DAC23820064A341 /* String.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = String.swift; sourceTree = "<group>"; };
BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "SwiftSoupTests-macOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
BB57C2D2222CAF8E008933AA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParserBenchmark.swift; sourceTree = "<group>"; };
BB57C2DC222CB0E2008933AA /* Google.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Google.html; sourceTree = "<group>"; };
BB57C2DD222CB0E2008933AA /* Wikipedia.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Wikipedia.html; sourceTree = "<group>"; };
BB57C2DE222CB0E2008933AA /* Reuters.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Reuters.html; sourceTree = "<group>"; };
BB57C2DF222CB0E2008933AA /* Wirecutter.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Wirecutter.html; sourceTree = "<group>"; };
BB57C2E0222CB0E3008933AA /* GitHub.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = GitHub.html; sourceTree = "<group>"; };
BB57C2E1222CB0E3008933AA /* Amazon.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = Amazon.html; sourceTree = "<group>"; };
BB57C2E9222CCCB5008933AA /* BinarySearch.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BinarySearch.swift; sourceTree = "<group>"; };
BD36975B20135EBB00D8FAC6 /* SwiftSoup.podspec */ = {isa = PBXFileReference; lastKnownFileType = text; path = SwiftSoup.podspec; sourceTree = "<group>"; }; BD36975B20135EBB00D8FAC6 /* SwiftSoup.podspec */ = {isa = PBXFileReference; lastKnownFileType = text; path = SwiftSoup.podspec; sourceTree = "<group>"; };
BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = SwiftSoup.framework; sourceTree = BUILT_PRODUCTS_DIR; }; BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = SwiftSoup.framework; sourceTree = BUILT_PRODUCTS_DIR; };
BD3B5BAA1FBED934001FDB3B /* InfoMac.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = InfoMac.plist; path = /Users/nabil/Documents/nabil/SwiftSoup/Sources/InfoMac.plist; sourceTree = "<absolute>"; }; BD3B5BAA1FBED934001FDB3B /* InfoMac.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = InfoMac.plist; path = /Users/nabil/Documents/nabil/SwiftSoup/Sources/InfoMac.plist; sourceTree = "<absolute>"; };
@ -385,6 +409,14 @@
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
BB57C2CB222CAF8E008933AA /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
BB57C2D3222CAF8E008933AA /* SwiftSoup.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
BD3B5BA21FBED933001FDB3B /* Frameworks */ = { BD3B5BA21FBED933001FDB3B /* Frameworks */ = {
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
@ -420,9 +452,9 @@
8C7ED6731E00B0690032A27C /* shared */ = { 8C7ED6731E00B0690032A27C /* shared */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
BB57C2E9222CCCB5008933AA /* BinarySearch.swift */,
8CE418501DAA568600240B42 /* ArrayExt.swift */, 8CE418501DAA568600240B42 /* ArrayExt.swift */,
8CE418511DAA568600240B42 /* CharacterExt.swift */, 8CE418511DAA568600240B42 /* CharacterExt.swift */,
8CE418531DAA568600240B42 /* OrderedDictionary.swift */,
8CE418541DAA568600240B42 /* Pattern.swift */, 8CE418541DAA568600240B42 /* Pattern.swift */,
8CE418551DAA568600240B42 /* StreamReader.swift */, 8CE418551DAA568600240B42 /* StreamReader.swift */,
8CEA295A1DAC23820064A341 /* String.swift */, 8CEA295A1DAC23820064A341 /* String.swift */,
@ -526,6 +558,7 @@
BD76883E206D8B6900B7F940 /* CHANGELOG.md */, BD76883E206D8B6900B7F940 /* CHANGELOG.md */,
8CE418181DAA54A900240B42 /* Sources */, 8CE418181DAA54A900240B42 /* Sources */,
8CE418231DAA54A900240B42 /* Tests */, 8CE418231DAA54A900240B42 /* Tests */,
BB57C2CF222CAF8E008933AA /* Tests-macOS */,
8CE418171DAA54A900240B42 /* Products */, 8CE418171DAA54A900240B42 /* Products */,
); );
indentWidth = 4; indentWidth = 4;
@ -540,6 +573,7 @@
BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */, BD3B5BA91FBED933001FDB3B /* SwiftSoup.framework */,
BD3B5BEC1FC063BD001FDB3B /* SwiftSoup.framework */, BD3B5BEC1FC063BD001FDB3B /* SwiftSoup.framework */,
BD3B5C2F1FC06423001FDB3B /* SwiftSoup.framework */, BD3B5C2F1FC06423001FDB3B /* SwiftSoup.framework */,
BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
@ -623,6 +657,29 @@
name = select; name = select;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
BB57C2CF222CAF8E008933AA /* Tests-macOS */ = {
isa = PBXGroup;
children = (
BB57C2D9222CB0BE008933AA /* ParserBenchmark.swift */,
BB57C2E8222CB0EE008933AA /* corpus */,
BB57C2D2222CAF8E008933AA /* Info.plist */,
);
path = "Tests-macOS";
sourceTree = "<group>";
};
BB57C2E8222CB0EE008933AA /* corpus */ = {
isa = PBXGroup;
children = (
BB57C2E1222CB0E3008933AA /* Amazon.html */,
BB57C2E0222CB0E3008933AA /* GitHub.html */,
BB57C2DC222CB0E2008933AA /* Google.html */,
BB57C2DE222CB0E2008933AA /* Reuters.html */,
BB57C2DD222CB0E2008933AA /* Wikipedia.html */,
BB57C2DF222CB0E2008933AA /* Wirecutter.html */,
);
path = corpus;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */ /* Begin PBXHeadersBuildPhase section */
@ -697,6 +754,24 @@
productReference = 8CE4181F1DAA54A900240B42 /* SwiftSoupTests.xctest */; productReference = 8CE4181F1DAA54A900240B42 /* SwiftSoupTests.xctest */;
productType = "com.apple.product-type.bundle.unit-test"; productType = "com.apple.product-type.bundle.unit-test";
}; };
BB57C2CD222CAF8E008933AA /* SwiftSoupTests-macOS */ = {
isa = PBXNativeTarget;
buildConfigurationList = BB57C2D6222CAF8E008933AA /* Build configuration list for PBXNativeTarget "SwiftSoupTests-macOS" */;
buildPhases = (
BB57C2CA222CAF8E008933AA /* Sources */,
BB57C2CB222CAF8E008933AA /* Frameworks */,
BB57C2CC222CAF8E008933AA /* Resources */,
);
buildRules = (
);
dependencies = (
BB57C2D5222CAF8E008933AA /* PBXTargetDependency */,
);
name = "SwiftSoupTests-macOS";
productName = "SwiftSoupTests-macOS";
productReference = BB57C2CE222CAF8E008933AA /* SwiftSoupTests-macOS.xctest */;
productType = "com.apple.product-type.bundle.unit-test";
};
BD3B5B681FBED933001FDB3B /* SwiftSoup-macOS */ = { BD3B5B681FBED933001FDB3B /* SwiftSoup-macOS */ = {
isa = PBXNativeTarget; isa = PBXNativeTarget;
buildConfigurationList = BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */; buildConfigurationList = BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */;
@ -757,8 +832,8 @@
8CE4180D1DAA54A900240B42 /* Project object */ = { 8CE4180D1DAA54A900240B42 /* Project object */ = {
isa = PBXProject; isa = PBXProject;
attributes = { attributes = {
LastSwiftUpdateCheck = 0800; LastSwiftUpdateCheck = 1020;
LastUpgradeCheck = 1020; LastUpgradeCheck = 0930;
ORGANIZATIONNAME = "Nabil Chatbi"; ORGANIZATIONNAME = "Nabil Chatbi";
TargetAttributes = { TargetAttributes = {
8CE418151DAA54A900240B42 = { 8CE418151DAA54A900240B42 = {
@ -771,6 +846,11 @@
LastSwiftMigration = 1020; LastSwiftMigration = 1020;
ProvisioningStyle = Manual; ProvisioningStyle = Manual;
}; };
BB57C2CD222CAF8E008933AA = {
CreatedOnToolsVersion = 10.2;
DevelopmentTeam = 5MC4PNHTX6;
ProvisioningStyle = Automatic;
};
BD3B5BAB1FC063BD001FDB3B = { BD3B5BAB1FC063BD001FDB3B = {
ProvisioningStyle = Manual; ProvisioningStyle = Manual;
}; };
@ -797,6 +877,7 @@
BD3B5BAB1FC063BD001FDB3B /* SwiftSoup-tvOS */, BD3B5BAB1FC063BD001FDB3B /* SwiftSoup-tvOS */,
BD3B5BEE1FC06423001FDB3B /* SwiftSoup-watchOS */, BD3B5BEE1FC06423001FDB3B /* SwiftSoup-watchOS */,
8CE4181E1DAA54A900240B42 /* SwiftSoupTests */, 8CE4181E1DAA54A900240B42 /* SwiftSoupTests */,
BB57C2CD222CAF8E008933AA /* SwiftSoupTests-macOS */,
); );
}; };
/* End PBXProject section */ /* End PBXProject section */
@ -816,6 +897,19 @@
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
BB57C2CC222CAF8E008933AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
BB57C2E5222CB0E3008933AA /* Wirecutter.html in Resources */,
BB57C2E4222CB0E3008933AA /* Reuters.html in Resources */,
BB57C2E3222CB0E3008933AA /* Wikipedia.html in Resources */,
BB57C2E6222CB0E3008933AA /* GitHub.html in Resources */,
BB57C2E7222CB0E3008933AA /* Amazon.html in Resources */,
BB57C2E2222CB0E3008933AA /* Google.html in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
BD3B5BA51FBED933001FDB3B /* Resources */ = { BD3B5BA51FBED933001FDB3B /* Resources */ = {
isa = PBXResourcesBuildPhase; isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
@ -844,7 +938,6 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
8CE418761DAA568700240B42 /* OrderedDictionary.swift in Sources */,
8CC2FD8D1DB12382002CB469 /* Whitelist.swift in Sources */, 8CC2FD8D1DB12382002CB469 /* Whitelist.swift in Sources */,
8C19C82F1DB7E5D200B8FC22 /* Tokeniser.swift in Sources */, 8C19C82F1DB7E5D200B8FC22 /* Tokeniser.swift in Sources */,
8CD4E8F01E12B0FF0039B951 /* Pattern.swift in Sources */, 8CD4E8F01E12B0FF0039B951 /* Pattern.swift in Sources */,
@ -877,6 +970,7 @@
8CE4186D1DAA568700240B42 /* FormElement.swift in Sources */, 8CE4186D1DAA568700240B42 /* FormElement.swift in Sources */,
8C73DB4B1DDA605900233A68 /* UnicodeScalar.swift in Sources */, 8C73DB4B1DDA605900233A68 /* UnicodeScalar.swift in Sources */,
8CE418601DAA568600240B42 /* Validate.swift in Sources */, 8CE418601DAA568600240B42 /* Validate.swift in Sources */,
BB57C2EA222CCCB6008933AA /* BinarySearch.swift in Sources */,
8C3617C11DBAC2AE00E00CFE /* Selector.swift in Sources */, 8C3617C11DBAC2AE00E00CFE /* Selector.swift in Sources */,
8CE418711DAA568700240B42 /* Parser.swift in Sources */, 8CE418711DAA568700240B42 /* Parser.swift in Sources */,
8CE418701DAA568700240B42 /* XmlDeclaration.swift in Sources */, 8CE418701DAA568700240B42 /* XmlDeclaration.swift in Sources */,
@ -934,11 +1028,18 @@
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
BB57C2CA222CAF8E008933AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
BB57C2DB222CB0C6008933AA /* ParserBenchmark.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
BD3B5B691FBED933001FDB3B /* Sources */ = { BD3B5B691FBED933001FDB3B /* Sources */ = {
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
BD3B5B6A1FBED933001FDB3B /* OrderedDictionary.swift in Sources */,
BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */, BD3B5B6B1FBED933001FDB3B /* Whitelist.swift in Sources */,
BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */, BD3B5B6C1FBED933001FDB3B /* Tokeniser.swift in Sources */,
BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */, BD3B5B6D1FBED933001FDB3B /* Pattern.swift in Sources */,
@ -971,6 +1072,7 @@
BD3B5B881FBED933001FDB3B /* FormElement.swift in Sources */, BD3B5B881FBED933001FDB3B /* FormElement.swift in Sources */,
BD3B5B891FBED933001FDB3B /* UnicodeScalar.swift in Sources */, BD3B5B891FBED933001FDB3B /* UnicodeScalar.swift in Sources */,
BD3B5B8A1FBED933001FDB3B /* Validate.swift in Sources */, BD3B5B8A1FBED933001FDB3B /* Validate.swift in Sources */,
BB57C2EB222CCCC3008933AA /* BinarySearch.swift in Sources */,
BD3B5B8B1FBED933001FDB3B /* Selector.swift in Sources */, BD3B5B8B1FBED933001FDB3B /* Selector.swift in Sources */,
BD3B5B8C1FBED933001FDB3B /* Parser.swift in Sources */, BD3B5B8C1FBED933001FDB3B /* Parser.swift in Sources */,
BD3B5B8D1FBED933001FDB3B /* XmlDeclaration.swift in Sources */, BD3B5B8D1FBED933001FDB3B /* XmlDeclaration.swift in Sources */,
@ -1001,7 +1103,6 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
BD3B5BAD1FC063BD001FDB3B /* OrderedDictionary.swift in Sources */,
BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */, BD3B5BAE1FC063BD001FDB3B /* Whitelist.swift in Sources */,
BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */, BD3B5BAF1FC063BD001FDB3B /* Tokeniser.swift in Sources */,
BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */, BD3B5BB01FC063BD001FDB3B /* Pattern.swift in Sources */,
@ -1034,6 +1135,7 @@
BD3B5BCB1FC063BD001FDB3B /* FormElement.swift in Sources */, BD3B5BCB1FC063BD001FDB3B /* FormElement.swift in Sources */,
BD3B5BCC1FC063BD001FDB3B /* UnicodeScalar.swift in Sources */, BD3B5BCC1FC063BD001FDB3B /* UnicodeScalar.swift in Sources */,
BD3B5BCD1FC063BD001FDB3B /* Validate.swift in Sources */, BD3B5BCD1FC063BD001FDB3B /* Validate.swift in Sources */,
BB57C2EC222CCCC5008933AA /* BinarySearch.swift in Sources */,
BD3B5BCE1FC063BD001FDB3B /* Selector.swift in Sources */, BD3B5BCE1FC063BD001FDB3B /* Selector.swift in Sources */,
BD3B5BCF1FC063BD001FDB3B /* Parser.swift in Sources */, BD3B5BCF1FC063BD001FDB3B /* Parser.swift in Sources */,
BD3B5BD01FC063BD001FDB3B /* XmlDeclaration.swift in Sources */, BD3B5BD01FC063BD001FDB3B /* XmlDeclaration.swift in Sources */,
@ -1064,7 +1166,6 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
BD3B5BF01FC06423001FDB3B /* OrderedDictionary.swift in Sources */,
BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */, BD3B5BF11FC06423001FDB3B /* Whitelist.swift in Sources */,
BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */, BD3B5BF21FC06423001FDB3B /* Tokeniser.swift in Sources */,
BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */, BD3B5BF31FC06423001FDB3B /* Pattern.swift in Sources */,
@ -1097,6 +1198,7 @@
BD3B5C0E1FC06423001FDB3B /* FormElement.swift in Sources */, BD3B5C0E1FC06423001FDB3B /* FormElement.swift in Sources */,
BD3B5C0F1FC06423001FDB3B /* UnicodeScalar.swift in Sources */, BD3B5C0F1FC06423001FDB3B /* UnicodeScalar.swift in Sources */,
BD3B5C101FC06423001FDB3B /* Validate.swift in Sources */, BD3B5C101FC06423001FDB3B /* Validate.swift in Sources */,
BB57C2ED222CCCC6008933AA /* BinarySearch.swift in Sources */,
BD3B5C111FC06423001FDB3B /* Selector.swift in Sources */, BD3B5C111FC06423001FDB3B /* Selector.swift in Sources */,
BD3B5C121FC06423001FDB3B /* Parser.swift in Sources */, BD3B5C121FC06423001FDB3B /* Parser.swift in Sources */,
BD3B5C131FC06423001FDB3B /* XmlDeclaration.swift in Sources */, BD3B5C131FC06423001FDB3B /* XmlDeclaration.swift in Sources */,
@ -1131,6 +1233,11 @@
target = 8CE418151DAA54A900240B42 /* SwiftSoup-iOS */; target = 8CE418151DAA54A900240B42 /* SwiftSoup-iOS */;
targetProxy = 8CE418211DAA54A900240B42 /* PBXContainerItemProxy */; targetProxy = 8CE418211DAA54A900240B42 /* PBXContainerItemProxy */;
}; };
BB57C2D5222CAF8E008933AA /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = BD3B5B681FBED933001FDB3B /* SwiftSoup-macOS */;
targetProxy = BB57C2D4222CAF8E008933AA /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */ /* End PBXTargetDependency section */
/* Begin XCBuildConfiguration section */ /* Begin XCBuildConfiguration section */
@ -1361,6 +1468,53 @@
}; };
name = Release; name = Release;
}; };
BB57C2D7222CAF8E008933AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = 5MC4PNHTX6;
GCC_C_LANGUAGE_STANDARD = gnu11;
INFOPLIST_FILE = "Tests-macOS/Info.plist";
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 10.14;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
PRODUCT_BUNDLE_IDENTIFIER = "com.scinfu.SwiftSoupTests-macOS";
PRODUCT_NAME = "$(TARGET_NAME)";
SDKROOT = macosx;
SWIFT_VERSION = 5.0;
};
name = Debug;
};
BB57C2D8222CAF8E008933AA /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CODE_SIGN_IDENTITY = "-";
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = 5MC4PNHTX6;
GCC_C_LANGUAGE_STANDARD = gnu11;
INFOPLIST_FILE = "Tests-macOS/Info.plist";
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 10.14;
MTL_FAST_MATH = YES;
PRODUCT_BUNDLE_IDENTIFIER = "com.scinfu.SwiftSoupTests-macOS";
PRODUCT_NAME = "$(TARGET_NAME)";
SDKROOT = macosx;
SWIFT_VERSION = 5.0;
};
name = Release;
};
BD3B5BA71FBED933001FDB3B /* Debug */ = { BD3B5BA71FBED933001FDB3B /* Debug */ = {
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
@ -1580,6 +1734,15 @@
defaultConfigurationIsVisible = 0; defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release; defaultConfigurationName = Release;
}; };
BB57C2D6222CAF8E008933AA /* Build configuration list for PBXNativeTarget "SwiftSoupTests-macOS" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BB57C2D7222CAF8E008933AA /* Debug */,
BB57C2D8222CAF8E008933AA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */ = { BD3B5BA61FBED933001FDB3B /* Build configuration list for PBXNativeTarget "SwiftSoup-macOS" */ = {
isa = XCConfigurationList; isa = XCConfigurationList;
buildConfigurations = ( buildConfigurations = (

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1020"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
BuildableName = "SwiftSoup.framework"
BlueprintName = "SwiftSoup-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</BuildActionEntry>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "NO"
buildForProfiling = "NO"
buildForArchiving = "NO"
buildForAnalyzing = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BB57C2CD222CAF8E008933AA"
BuildableName = "SwiftSoupTests-macOS.xctest"
BlueprintName = "SwiftSoupTests-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Release"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
<TestableReference
skipped = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BB57C2CD222CAF8E008933AA"
BuildableName = "SwiftSoupTests-macOS.xctest"
BlueprintName = "SwiftSoupTests-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</TestableReference>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
BuildableName = "SwiftSoup.framework"
BlueprintName = "SwiftSoup-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
BuildableName = "SwiftSoup.framework"
BlueprintName = "SwiftSoup-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "BD3B5B681FBED933001FDB3B"
BuildableName = "SwiftSoup.framework"
BlueprintName = "SwiftSoup-macOS"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>

View File

@ -20,6 +20,20 @@
ReferencedContainer = "container:SwiftSoup.xcodeproj"> ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference> </BuildableReference>
</BuildActionEntry> </BuildActionEntry>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "NO"
buildForProfiling = "NO"
buildForArchiving = "NO"
buildForAnalyzing = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8CE4181E1DAA54A900240B42"
BuildableName = "SwiftSoupTests.xctest"
BlueprintName = "SwiftSoupTests"
ReferencedContainer = "container:SwiftSoup.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries> </BuildActionEntries>
</BuildAction> </BuildAction>
<TestAction <TestAction
@ -29,9 +43,7 @@
shouldUseLaunchSchemeArgsEnv = "YES"> shouldUseLaunchSchemeArgsEnv = "YES">
<Testables> <Testables>
<TestableReference <TestableReference
skipped = "NO" skipped = "NO">
parallelizable = "YES"
testExecutionOrdering = "random">
<BuildableReference <BuildableReference
BuildableIdentifier = "primary" BuildableIdentifier = "primary"
BlueprintIdentifier = "8CE4181E1DAA54A900240B42" BlueprintIdentifier = "8CE4181E1DAA54A900240B42"

22
Tests-macOS/Info.plist Normal file
View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>BNDL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>

View File

@ -0,0 +1,39 @@
//
// ParserBenchmark.swift
// SwiftSoupTests
//
// Created by garth on 2/26/19.
// Copyright © 2019 Nabil Chatbi. All rights reserved.
//
import XCTest
import SwiftSoup
class ParserBenchmark: XCTestCase {
enum Const {
static var corpusHTMLData: [String] = []
static let repetitions = 5
}
override func setUp() {
let bundle = Bundle(for: type(of: self))
let urls = bundle.urls(forResourcesWithExtension: ".html", subdirectory: nil)
Const.corpusHTMLData = urls!.compactMap { try? Data(contentsOf: $0) }.map { String(decoding: $0, as: UTF8.self) }
}
func testParserPerformance() throws {
measure {
for htmlDoc in Const.corpusHTMLData {
for _ in 1...Const.repetitions {
do {
let _ = try SwiftSoup.parse(htmlDoc)
} catch {
XCTFail("Exception while parsing HTML")
}
}
}
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -53,10 +53,12 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("e", r.consume()) XCTAssertEqual("e", r.consume())
XCTAssertTrue(r.isEmpty()) XCTAssertTrue(r.isEmpty())
XCTAssertEqual(CharacterReader.EOF, r.consume()) // Indexes beyond the end are not allowed in native indexing
r.unconsume() //
XCTAssertTrue(r.isEmpty()) // XCTAssertEqual(CharacterReader.EOF, r.consume())
XCTAssertEqual(CharacterReader.EOF, r.current()) // r.unconsume()
// XCTAssertTrue(r.isEmpty())
// XCTAssertEqual(CharacterReader.EOF, r.current())
} }
func testMark() { func testMark() {
@ -82,31 +84,31 @@ class CharacterReaderTest: XCTestCase {
let input = "blah blah" let input = "blah blah"
let r = CharacterReader(input) let r = CharacterReader(input)
XCTAssertEqual(-1, r.nextIndexOf("x")) XCTAssertEqual(nil, r.nextIndexOf("x"))
XCTAssertEqual(3, r.nextIndexOf("h")) XCTAssertEqual(input.index(input.startIndex, offsetBy: 3), r.nextIndexOf("h"))
let pull = r.consumeTo("h") let pull = r.consumeTo("h")
XCTAssertEqual("bla", pull) XCTAssertEqual("bla", pull)
XCTAssertEqual("h", r.consume()) XCTAssertEqual("h", r.consume())
XCTAssertEqual(2, r.nextIndexOf("l")) XCTAssertEqual(input.index(input.startIndex, offsetBy: 6), r.nextIndexOf("l"))
XCTAssertEqual(" blah", r.consumeToEnd()) XCTAssertEqual(" blah", r.consumeToEnd())
XCTAssertEqual(-1, r.nextIndexOf("x")) XCTAssertEqual(nil, r.nextIndexOf("x"))
} }
func testNextIndexOfString() { func testNextIndexOfString() {
let input = "One Two something Two Three Four" let input = "One Two something Two Three Four"
let r = CharacterReader(input) let r = CharacterReader(input)
XCTAssertEqual(-1, r.nextIndexOf("Foo")) XCTAssertEqual(nil, r.nextIndexOf("Foo"))
XCTAssertEqual(4, r.nextIndexOf("Two")) XCTAssertEqual(input.index(input.startIndex, offsetBy: 4), r.nextIndexOf("Two"))
XCTAssertEqual("One Two ", r.consumeTo("something")) XCTAssertEqual("One Two ", r.consumeTo("something"))
XCTAssertEqual(10, r.nextIndexOf("Two")) XCTAssertEqual(input.index(input.startIndex, offsetBy: 18), r.nextIndexOf("Two"))
XCTAssertEqual("something Two Three Four", r.consumeToEnd()) XCTAssertEqual("something Two Three Four", r.consumeToEnd())
XCTAssertEqual(-1, r.nextIndexOf("Two")) XCTAssertEqual(nil, r.nextIndexOf("Two"))
} }
func testNextIndexOfUnmatched() { func testNextIndexOfUnmatched() {
let r = CharacterReader("<[[one]]") let r = CharacterReader("<[[one]]")
XCTAssertEqual(-1, r.nextIndexOf("]]>")) XCTAssertEqual(nil, r.nextIndexOf("]]>"))
} }
func testConsumeToChar() { func testConsumeToChar() {

View File

@ -50,15 +50,15 @@ class EntitiesTest: XCTestCase {
func testXhtml() { func testXhtml() {
//let text = "&amp; &gt; &lt; &quot;"; //let text = "&amp; &gt; &lt; &quot;";
XCTAssertEqual(38, Entities.EscapeMode.xhtml.codepointForName("amp")) XCTAssertEqual(UnicodeScalar(38), Entities.EscapeMode.xhtml.codepointForName("amp"))
XCTAssertEqual(62, Entities.EscapeMode.xhtml.codepointForName("gt")) XCTAssertEqual(UnicodeScalar(62), Entities.EscapeMode.xhtml.codepointForName("gt"))
XCTAssertEqual(60, Entities.EscapeMode.xhtml.codepointForName("lt")) XCTAssertEqual(UnicodeScalar(60), Entities.EscapeMode.xhtml.codepointForName("lt"))
XCTAssertEqual(34, Entities.EscapeMode.xhtml.codepointForName("quot")) XCTAssertEqual(UnicodeScalar(34), Entities.EscapeMode.xhtml.codepointForName("quot"))
XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(38)) XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(38)!))
XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(62)) XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(62)!))
XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(60)) XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(60)!))
XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(34)) XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(34)!))
} }
func testGetByName() { func testGetByName() {