This commit is contained in:
Nabil Chatbi 2016-12-28 14:22:28 +01:00
parent 6272c01cd8
commit db2f0bafbc
82 changed files with 4557 additions and 5103 deletions

View File

@ -13,7 +13,6 @@ class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
@ -41,6 +40,4 @@ class AppDelegate: UIResponder, UIApplicationDelegate {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
}

View File

@ -22,15 +22,14 @@ class ViewController: UIViewController {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
func parseDocument()throws->Document {
let html = "<html><head><title>First parse</title></head>"
+ "<body><p>Parsed HTML into a doc.</p></body></html>"
let doc: Document = try SwiftSoup.parse(html)
return doc
}
func testPerformanceDiv() {
let h: String = "<!doctype html>\n" +
"<html>\n" +
@ -69,18 +68,16 @@ class ViewController: UIViewController {
" <foo />bar\n" +
" </body>\n" +
"</html>"
let doc: Document = try! SwiftSoup.parse(h);
let doc: Document = try! SwiftSoup.parse(h)
do {
for _ in 0...100000{
_ = try doc.select("div");
for _ in 0...100000 {
_ = try doc.select("div")
}
}
catch {
} catch {
}
}
func testSite()
{
func testSite() {
let myURLString = "http://apple.com"
guard let myURL = URL(string: myURLString) else {
print("Error: \(myURLString) doesn't seem to be a valid URL")
@ -88,18 +85,14 @@ class ViewController: UIViewController {
}
let html = try! String(contentsOf: myURL, encoding: .utf8)
let doc: Document = try! SwiftSoup.parse(html)
do {
for _ in 0...100{
for _ in 0...100 {
_ = try doc.text()
}
}
catch {
} catch {
print("Error")
}
}
}

View File

@ -3,27 +3,27 @@ import XCTest
import SwiftSoup
class Tests: XCTestCase {
override func setUp() {
super.setUp()
// Put setup code here. This method is called before the invocation of each test method in the class.
}
override func tearDown() {
// Put teardown code here. This method is called after the invocation of each test method in the class.
super.tearDown()
}
func testExample() {
// This is an example of a functional test case.
XCTAssert(true, "Pass")
}
func testPerformanceExample() {
// This is an example of a performance test case.
self.measure() {
// Put the code you want to measure the time of here.
}
}
}

View File

@ -1,5 +1,5 @@
import PackageDescription
let package = Package(
name: "SwiftSoup"
name: "SwiftSoup"
)

View File

@ -8,53 +8,47 @@
import Foundation
extension Array
{
extension Array {
func binarySearch<T: Comparable>(_ collection: [T], _ target: T) -> Int {
let min = 0
let max = collection.count - 1
return binaryMakeGuess(min: min, max: max, target: target, collection: collection)
}
func binaryMakeGuess<T: Comparable>(min: Int, max: Int, target: T, collection: [T]) -> Int {
let guess = (min + max) / 2
if max < min {
// illegal, guess not in array
return -1
} else if collection[guess] == target {
// guess is correct
return guess
} else if collection[guess] > target {
// guess is too high
return binaryMakeGuess(min: min, max: guess - 1, target: target, collection: collection)
} else {
// array[guess] < target
// guess is too low
return binaryMakeGuess(min: guess + 1, max: max, target: target, collection: collection)
}
}
}
extension Array where Element : Equatable
{
func lastIndexOf(_ e: Element) -> Int
{
extension Array where Element : Equatable {
func lastIndexOf(_ e: Element) -> Int {
for pos in (0..<self.count).reversed() {
let next = self[pos]
if (next == e) {
@ -64,5 +58,3 @@ extension Array where Element : Equatable
return -1
}
}

View File

@ -8,34 +8,34 @@
import Foundation
open class Attribute {
open class Attribute {
/// The element type of a dictionary: a tuple containing an individual
/// key-value pair.
static let booleanAttributes: [String] = [
"allowfullscreen", "async", "autofocus", "checked", "compact", "declare", "default", "defer", "disabled",
"formnovalidate", "hidden", "inert", "ismap", "itemscope", "multiple", "muted", "nohref", "noresize",
"noshade", "novalidate", "nowrap", "open", "readonly", "required", "reversed", "seamless", "selected",
"sortable", "truespeed", "typemustmatch"
]
var key: String
var value: String
public init(key: String,value :String) throws {
public init(key: String, value: String) throws {
try Validate.notEmpty(string: key)
self.key = key.trim()
self.value = value
}
/**
Get the attribute key.
@return the attribute key
*/
open func getKey() -> String{
open func getKey() -> String {
return key
}
/**
Set the attribute key; case is preserved.
@param key the new key; must not be null
@ -44,7 +44,7 @@ open class Attribute {
try Validate.notEmpty(string: key)
self.key = key.trim()
}
/**
Get the attribute value.
@return the attribute value
@ -52,28 +52,28 @@ open class Attribute {
open func getValue() -> String {
return value
}
/**
Set the attribute value.
@param value the new attribute value; must not be null
*/
@discardableResult
open func setValue(value: String) -> String {
open func setValue(value: String) -> String {
let old = self.value
self.value = value
return old
}
/**
Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
@return HTML
*/
public func html()-> String {
public func html() -> String {
let accum = StringBuilder()
html(accum: accum, out: (Document("")).outputSettings())
return accum.toString()
}
public func html(accum: StringBuilder, out: OutputSettings ) {
accum.append(key)
if (!shouldCollapseAttribute(out: out)) {
@ -82,15 +82,15 @@ open class Attribute {
accum.append("\"")
}
}
/**
Get the string representation of this attribute, implemented as {@link #html()}.
@return string
*/
open func toString()-> String {
open func toString() -> String {
return html()
}
/**
* Create a new Attribute from an unencoded key and a HTML attribute encoded value.
* @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
@ -101,11 +101,11 @@ open class Attribute {
let value = try Entities.unescape(string: encodedValue, strict: true)
return try Attribute(key: unencodedKey, value: value)
}
public func isDataAttribute() -> Bool {
return key.startsWith(Attributes.dataPrefix) && key.characters.count > Attributes.dataPrefix.characters.count
}
/**
* Collapsible if it's a boolean attribute and value is empty or same as name
*
@ -117,39 +117,32 @@ open class Attribute {
&& out.syntax() == OutputSettings.Syntax.html
&& isBooleanAttribute()
}
public func isBooleanAttribute() -> Bool
{
return (Attribute.booleanAttributes.binarySearch(Attribute.booleanAttributes,key) != -1)
public func isBooleanAttribute() -> Bool {
return (Attribute.booleanAttributes.binarySearch(Attribute.booleanAttributes, key) != -1)
}
public func hashCode() -> Int {
var result = key.hashValue
result = 31 * result + value.hashValue
return result
}
public func clone() -> Attribute
{
public func clone() -> Attribute {
do {
return try Attribute(key: key,value: value)
} catch Exception.Error( _ , let msg){
return try Attribute(key: key, value: value)
} catch Exception.Error( _, let msg) {
print(msg)
}catch{
} catch {
}
return try! Attribute(key: "",value: "")
return try! Attribute(key: "", value: "")
}
}
extension Attribute : Equatable
{
static public func == (lhs: Attribute, rhs: Attribute) -> Bool
{
extension Attribute : Equatable {
static public func == (lhs: Attribute, rhs: Attribute) -> Bool {
return lhs.value == rhs.value && lhs.key == rhs.key
}
}

View File

@ -21,75 +21,74 @@ import Foundation
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
open class Attributes : NSCopying {
open static var dataPrefix : String = "data-"
fileprivate var attributes : OrderedDictionary<String, Attribute> = OrderedDictionary<String, Attribute>()
open class Attributes: NSCopying {
open static var dataPrefix: String = "data-"
fileprivate var attributes: OrderedDictionary<String, Attribute> = OrderedDictionary<String, Attribute>()
// linked hash map to preserve insertion order.
// null be default as so many elements have no attributes -- saves a good chunk of memory
public init(){}
public init() {}
/**
Get an attribute value by key.
@param key the (case-sensitive) attribute key
@return the attribute value if set; or empty string if not set.
@see #hasKey(String)
*/
open func get(key : String)-> String {
let attr : Attribute? = attributes.get(key:key)
open func get(key: String) -> String {
let attr: Attribute? = attributes.get(key:key)
return attr != nil ? attr!.getValue() : ""
}
/**
* Get an attribute's value by case-insensitive key
* @param key the attribute name
* @return the first matching attribute value if set; or empty string if not set.
*/
open func getIgnoreCase(key : String )throws -> String {
open func getIgnoreCase(key: String )throws -> String {
try Validate.notEmpty(string: key)
for attrKey in (attributes.keySet())
{
if attrKey.equalsIgnoreCase(string: key){
for attrKey in (attributes.keySet()) {
if attrKey.equalsIgnoreCase(string: key) {
return attributes.get(key: attrKey)!.getValue()
}
}
return ""
}
/**
Set a new attribute, or replace an existing one by key.
@param key attribute key
@param value attribute value
*/
open func put(_ key : String , _ value : String) throws {
open func put(_ key: String, _ value: String) throws {
let attr = try Attribute(key: key, value: value)
put(attribute: attr)
}
/**
Set a new boolean attribute, remove attribute if value is false.
@param key attribute key
@param value attribute value
*/
open func put(_ key : String , _ value : Bool) throws {
if (value){
open func put(_ key: String, _ value: Bool) throws {
if (value) {
try put(attribute: BooleanAttribute(key: key))
}else{
} else {
try remove(key: key)
}
}
/**
Set a new attribute, or replace an existing one by key.
@param attribute attribute
*/
open func put(attribute : Attribute) {
open func put(attribute: Attribute) {
attributes.put(value: attribute, forKey:attribute.getKey())
}
/**
Remove an attribute by key. <b>Case sensitive.</b>
@param key attribute key to remove
@ -98,29 +97,29 @@ open class Attributes : NSCopying {
try Validate.notEmpty(string: key)
attributes.remove(key: key)
}
/**
Remove an attribute by key. <b>Case insensitive.</b>
@param key attribute key to remove
*/
open func removeIgnoreCase(key : String ) throws {
open func removeIgnoreCase(key: String ) throws {
try Validate.notEmpty(string: key)
for attrKey in attributes.keySet(){
if (attrKey.equalsIgnoreCase(string: key)){
for attrKey in attributes.keySet() {
if (attrKey.equalsIgnoreCase(string: key)) {
attributes.remove(key: attrKey)
}
}
}
/**
Tests if these attributes contain an attribute with this key.
@param key case-sensitive key to check for
@return true if key exists, false otherwise
*/
open func hasKey(key : String) -> Bool {
open func hasKey(key: String) -> Bool {
return attributes.containsKey(key: key)
}
/**
Tests if these attributes contain an attribute with this key.
@param key key to check for
@ -128,13 +127,13 @@ open class Attributes : NSCopying {
*/
open func hasKeyIgnoreCase(key: String) -> Bool {
for attrKey in attributes.keySet() {
if (attrKey.equalsIgnoreCase(string: key)){
if (attrKey.equalsIgnoreCase(string: key)) {
return true
}
}
return false
}
/**
Get the number of attributes in this set.
@return size
@ -142,7 +141,7 @@ open class Attributes : NSCopying {
open func size() -> Int {
return attributes.count//TODO: check retyrn right size
}
/**
Add all the attributes from the incoming set to this set.
@param incoming attributes to add to these attributes.
@ -151,17 +150,16 @@ open class Attributes : NSCopying {
guard let incoming = incoming else {
return
}
if (incoming.size() == 0){
if (incoming.size() == 0) {
return
}
attributes.putAll(all: incoming.attributes)
}
open func iterator() -> IndexingIterator<Array<Attribute>> {
if (attributes.isEmpty)
{
let args : [Attribute] = []
if (attributes.isEmpty) {
let args: [Attribute] = []
return args.makeIterator()
}
return attributes.orderedValues.makeIterator()
@ -173,31 +171,31 @@ open class Attributes : NSCopying {
@return an view of the attributes as a List.
*/
open func asList() -> Array<Attribute> {
var list : Array<Attribute> = Array(/*attributes.size()*/)
var list: Array<Attribute> = Array(/*attributes.size()*/)
for entry in attributes.orderedValues {
list.append(entry)
}
return list
}
/**
* Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
* starting with {@code data-}.
* @return map of custom data attributes.
*/
//Map<String, String>
open func dataset() -> Dictionary<String,String> {
var dataset = Dictionary<String,String>()
for attribute in attributes{
open func dataset() -> Dictionary<String, String> {
var dataset = Dictionary<String, String>()
for attribute in attributes {
let attr = attribute.1
if(attr.isDataAttribute()){
if(attr.isDataAttribute()) {
let key = attr.getKey().substring(Attributes.dataPrefix.characters.count)
dataset[key] = attribute.1.getValue()
}
}
return dataset
}
/**
Get the HTML representation of these attributes.
@return HTML
@ -208,30 +206,30 @@ open class Attributes : NSCopying {
try html(accum: accum, out: Document("").outputSettings()) // output settings a bit funky, but this html() seldom used
return accum.toString()
}
public func html(accum: StringBuilder,out: OutputSettings ) throws {
public func html(accum: StringBuilder, out: OutputSettings ) throws {
for attribute in attributes.orderedValues {
accum.append(" ")
attribute.html(accum: accum, out: out)
}
}
open func toString()throws -> String {
return try html()
}
/**
* Checks if these attributes are equal to another set of attributes, by comparing the two sets
* @param o attributes to compare with
* @return if both sets of attributes have the same content
*/
open func equals(o: AnyObject?) -> Bool {
if(o == nil){return false}
if(o == nil) {return false}
if (self === o.self) {return true}
guard let that : Attributes = o as? Attributes else {return false}
guard let that: Attributes = o as? Attributes else {return false}
return (attributes == that.attributes)
}
/**
* Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes.
* @return calculated hashcode
@ -239,30 +237,28 @@ open class Attributes : NSCopying {
open func hashCode() -> Int {
return attributes.hashCode()
}
public func copy(with zone: NSZone? = nil) -> Any
{
public func copy(with zone: NSZone? = nil) -> Any {
let clone = Attributes()
clone.attributes = attributes.clone()
return clone
}
open func clone() -> Attributes {
return self.copy() as! Attributes
}
fileprivate static func dataKey(key: String) -> String {
return dataPrefix + key
}
}
extension Attributes : Sequence {
public func makeIterator() -> AnyIterator<Attribute> {
var list = attributes.orderedValues
return AnyIterator{
return AnyIterator {
return list.count > 0 ? list.removeFirst() : nil
}
}
}

View File

@ -11,16 +11,15 @@ import Foundation
/**
* A boolean attribute that is written out without any value.
*/
open class BooleanAttribute : Attribute {
open class BooleanAttribute: Attribute {
/**
* Create a new boolean attribute from unencoded (raw) key.
* @param key attribute key
*/
init(key : String) throws {
init(key: String) throws {
try super.init(key: key, value: "")
}
override public func isBooleanAttribute() -> Bool {
return true
}

View File

@ -16,126 +16,126 @@ private let symbolSet = CharacterSet.symbols
private let digitSet = CharacterSet.decimalDigits
extension Character {
public static let BackslashF : Character = Character(UnicodeScalar(12))
public static let BackslashF: Character = Character(UnicodeScalar(12))
//http://www.unicode.org/glossary/#supplementary_code_point
public static let MIN_SUPPLEMENTARY_CODE_POINT : UInt32 = 0x010000
public static let MIN_SUPPLEMENTARY_CODE_POINT: UInt32 = 0x010000
/// The first `UnicodeScalar` of `self`.
var unicodeScalar: UnicodeScalar {
let unicodes = String(self).unicodeScalars
return unicodes[unicodes.startIndex]
}
/// True for any space character, and the control characters \t, \n, \r, \f, \v.
var isWhitespace: Bool {
switch self {
case " ", "\t", "\n", "\r", "\r\n", Character.BackslashF: return true
case "\u{000B}", "\u{000C}": return true // Form Feed, vertical tab
default: return false
}
}
/// True for any Unicode space character, and the control characters \t, \n, \r, \f, \v.
var isUnicodeSpace: Bool {
switch self {
case " ", "\t", "\n", "\r", "\r\n",Character.BackslashF: return true
case " ", "\t", "\n", "\r", "\r\n", Character.BackslashF: return true
case "\u{000C}", "\u{000B}", "\u{0085}": return true // Form Feed, vertical tab, next line (nel)
case "\u{00A0}", "\u{1680}", "\u{180E}": return true // No-break space, ogham space mark, mongolian vowel
case "\u{2000}"..."\u{200D}": return true // En quad, em quad, en space, em space, three-per-em space, four-per-em space, six-per-em space, figure space, ponctuation space, thin space, hair space, zero width space, zero width non-joiner, zero width joiner.
case "\u{2028}", "\u{2029}": return true // Line separator, paragraph separator.
case "\u{202F}", "\u{205F}", "\u{2060}", "\u{3000}", "\u{FEFF}": return true // Narrow no-break space, medium mathematical space, word joiner, ideographic space, zero width no-break space.
default: return false
}
}
/// `true` if `self` normalized contains a single code unit that is in the categories of Uppercase and Titlecase Letters.
var isUppercase: Bool {
return isMemberOfCharacterSet(uppercaseSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Lowercase Letters.
var isLowercase: Bool {
return isMemberOfCharacterSet(lowercaseSet)
}
/// `true` if `self` normalized contains a single code unit that is in the categories of Letters and Marks.
var isAlpha: Bool {
return isMemberOfCharacterSet(alphaSet)
}
/// `true` if `self` normalized contains a single code unit that is in th categories of Letters, Marks, and Numbers.
var isAlphaNumeric: Bool {
return isMemberOfCharacterSet(alphaNumericSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Symbols. These characters include, for example, the dollar sign ($) and the plus (+) sign.
var isSymbol: Bool {
return isMemberOfCharacterSet(symbolSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Decimal Numbers.
var isDigit: Bool {
return isMemberOfCharacterSet(digitSet)
}
/// `true` if `self` is an ASCII decimal digit, i.e. between "0" and "9".
var isDecimalDigit: Bool {
return "0123456789".characters.contains(self)
}
/// `true` if `self` is an ASCII hexadecimal digit, i.e. "0"..."9", "a"..."f", "A"..."F".
var isHexadecimalDigit: Bool {
return "01234567890abcdefABCDEF".characters.contains(self)
}
/// `true` if `self` is an ASCII octal digit, i.e. between '0' and '7'.
var isOctalDigit: Bool {
return "01234567".characters.contains(self)
}
/// Lowercase `self`.
var lowercase: Character {
let str = String(self).lowercased()
return str[str.startIndex]
}
func isChar(inSet set: CharacterSet) -> Bool {
var found = true
for ch in String(self).utf16 {
@ -143,58 +143,53 @@ extension Character {
}
return found
}
/// Uppercase `self`.
var uppercase: Character {
let str = String(self).uppercased()
return str[str.startIndex]
}
/// Return `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
///
/// - parameter set: The `NSCharacterSet` used to test for membership.
/// - returns: `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool {
let normalized = String(self).precomposedStringWithCanonicalMapping
let unicodes = normalized.unicodeScalars
guard unicodes.count == 1 else { return false }
return set.contains(UnicodeScalar(unicodes.first!.value)!)
}
static func convertFromIntegerLiteral(value: IntegerLiteralType) -> Character {
return Character(UnicodeScalar(value)!)
}
func unicodeScalarCodePoint() -> UInt32
{
func unicodeScalarCodePoint() -> UInt32 {
return unicodeScalar.value
}
static func charCount(codePoint: UInt32) -> Int {
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1
}
static func isLetter(_ char: Character)->Bool{
static func isLetter(_ char: Character) -> Bool {
return char.isLetter()
}
func isLetter()->Bool{
func isLetter() -> Bool {
return self.isMemberOfCharacterSet(CharacterSet.letters)
}
static func isLetterOrDigit(_ char: Character)->Bool
{
static func isLetterOrDigit(_ char: Character) -> Bool {
return char.isLetterOrDigit()
}
func isLetterOrDigit()->Bool
{
func isLetterOrDigit() -> Bool {
if(self.isLetter()) {return true}
return self.isDigit
}
}

View File

@ -11,80 +11,78 @@ import Foundation
/**
CharacterReader consumes tokens off a string. To replace the old TokenQueue.
*/
public final class CharacterReader
{
public static let EOF : UnicodeScalar = "\u{FFFF}"//65535
private static let maxCacheLen : Int = 12
private let input : [UnicodeScalar]
private let length : Int
private var pos : Int = 0
private var mark : Int = 0
private let stringCache : Array<String?> // holds reused strings in this doc, to lessen garbage
public init(_ input: String)
{
public final class CharacterReader {
public static let EOF: UnicodeScalar = "\u{FFFF}"//65535
private static let maxCacheLen: Int = 12
private let input: [UnicodeScalar]
private let length: Int
private var pos: Int = 0
private var mark: Int = 0
private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage
public init(_ input: String) {
self.input = Array(input.unicodeScalars)
self.length = self.input.count
stringCache = Array(repeating:nil, count:512)
}
public func getPos() -> Int {
return self.pos
}
public func isEmpty() -> Bool {
return pos >= length
}
public func current() -> UnicodeScalar {
return (pos >= length) ? CharacterReader.EOF : input[pos]
}
@discardableResult
public func consume() -> UnicodeScalar {
let val = (pos >= length) ? CharacterReader.EOF : input[pos]
pos += 1
return val
}
public func unconsume() {
pos -= 1
}
public func advance() {
pos += 1
}
public func markPos() {
mark = pos
}
public func rewindToMark() {
pos = mark
}
public func consumeAsString() -> String {
let p = pos
pos+=1
return String(input[p])
//return String(input, pos+=1, 1)
}
/**
* Returns the number of characters between the current position and the next instance of the input char
* @param c scan target
* @return offset between current position and next instance of target. -1 if not found.
*/
public func nextIndexOf(_ c : UnicodeScalar) -> Int {
public func nextIndexOf(_ c: UnicodeScalar) -> Int {
// doesn't handle scanning for surrogates
for i in pos..<length {
if (c == input[i]){
if (c == input[i]) {
return i - pos
}
}
return -1
}
/**
* Returns the number of characters between the current position and the next instance of the input sequence
*
@ -93,33 +91,32 @@ public final class CharacterReader
*/
public func nextIndexOf(_ seq: String) -> Int {
// doesn't handle scanning for surrogates
if(seq.isEmpty){return -1}
let startChar : UnicodeScalar = seq.unicodeScalar(0)
if(seq.isEmpty) {return -1}
let startChar: UnicodeScalar = seq.unicodeScalar(0)
for var offset in pos..<length {
// scan to first instance of startchar:
if (startChar != input[offset]){
if (startChar != input[offset]) {
offset+=1
while(offset < length && startChar != input[offset]) { offset+=1 }
}
var i = offset + 1
let last = i + seq.unicodeScalars.count-1
if (offset < length && last <= length)
{
if (offset < length && last <= length) {
var j = 1
while i < last && seq.unicodeScalar(j) == input[i] {
j+=1
i+=1
}
// found full sequence
if (i == last){
if (i == last) {
return offset - pos
}
}
}
return -1
}
public func consumeTo(_ c : UnicodeScalar) -> String {
public func consumeTo(_ c: UnicodeScalar) -> String {
let offset = nextIndexOf(c)
if (offset != -1) {
let consumed = cacheString(pos, offset)
@ -129,7 +126,7 @@ public final class CharacterReader
return consumeToEnd()
}
}
public func consumeTo(_ seq: String) -> String {
let offset = nextIndexOf(seq)
if (offset != -1) {
@ -140,25 +137,24 @@ public final class CharacterReader
return consumeToEnd()
}
}
public func consumeToAny(_ chars: UnicodeScalar...)->String {
public func consumeToAny(_ chars: UnicodeScalar...) -> String {
return consumeToAny(chars)
}
public func consumeToAny(_ chars: [UnicodeScalar])->String {
let start : Int = pos
let remaining : Int = length
public func consumeToAny(_ chars: [UnicodeScalar]) -> String {
let start: Int = pos
let remaining: Int = length
let val = input
if(start == 2528){
if(start == 2528) {
let d = 1
print(d)
}
OUTER: while (pos < remaining)
{
if(pos == 41708){
OUTER: while (pos < remaining) {
if(pos == 41708) {
let d = 1
print(d)
}
if chars.contains(val[pos]){
if chars.contains(val[pos]) {
break OUTER
}
// for c in chars {
@ -168,207 +164,203 @@ public final class CharacterReader
// }
pos += 1
}
return pos > start ? cacheString(start, pos-start) : ""
}
public func consumeToAnySorted(_ chars: UnicodeScalar...)->String {
public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String {
return consumeToAnySorted(chars)
}
public func consumeToAnySorted(_ chars: [UnicodeScalar])->String {
public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String {
let start = pos
let remaining = length
let val = input
while (pos < remaining) {
if (chars.binarySearch(chars, val[pos]) >= 0){
if (chars.binarySearch(chars, val[pos]) >= 0) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : ""
}
public func consumeData() -> String {
// &, <, null
let start = pos
let remaining = length
let val = input
while (pos < remaining) {
let c : UnicodeScalar = val[pos]
if (c == "&" || c == "<" || c == TokeniserStateVars.nullScalr){
let c: UnicodeScalar = val[pos]
if (c == "&" || c == "<" || c == TokeniserStateVars.nullScalr) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : ""
}
public func consumeTagName()-> String {
public func consumeTagName() -> String {
// '\t', '\n', '\r', '\f', ' ', '/', '>', nullChar
let start = pos
let remaining = length
let val = input
while (pos < remaining) {
let c : UnicodeScalar = val[pos]
if (c == "\t" || c == "\n" || c == "\r" || c == UnicodeScalar.BackslashF || c == " " || c == "/" || c == ">" || c == TokeniserStateVars.nullScalr){
let c: UnicodeScalar = val[pos]
if (c == "\t" || c == "\n" || c == "\r" || c == UnicodeScalar.BackslashF || c == " " || c == "/" || c == ">" || c == TokeniserStateVars.nullScalr) {
break
}
pos += 1
}
return pos > start ? cacheString(start, pos-start) : ""
}
public func consumeToEnd()-> String {
public func consumeToEnd() -> String {
let data = cacheString(pos, length-pos)
pos = length
return data
}
public func consumeLetterSequence()-> String {
public func consumeLetterSequence() -> String {
let start = pos
while (pos < length) {
let c : UnicodeScalar = input[pos]
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)){
let c: UnicodeScalar = input[pos]
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
pos += 1
}else{
} else {
break
}
}
return cacheString(start, pos - start)
}
public func consumeLetterThenDigitSequence()-> String {
public func consumeLetterThenDigitSequence() -> String {
let start = pos
while (pos < length) {
let c = input[pos]
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)){
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
pos += 1
}else{
} else {
break
}
}
while (!isEmpty()) {
let c = input[pos]
if (c >= "0" && c <= "9"){
if (c >= "0" && c <= "9") {
pos += 1
}else{
} else {
break
}
}
return cacheString(start, pos - start)
}
public func consumeHexSequence()-> String {
public func consumeHexSequence() -> String {
let start = pos
while (pos < length) {
let c = input[pos]
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")){
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) {
pos+=1
}else{
} else {
break
}
}
return cacheString(start, pos - start)
}
public func consumeDigitSequence() -> String {
let start = pos
while (pos < length) {
let c = input[pos]
if (c >= "0" && c <= "9"){
if (c >= "0" && c <= "9") {
pos+=1
}else{
} else {
break
}
}
return cacheString(start, pos - start)
}
public func matches(_ c: UnicodeScalar) -> Bool {
return !isEmpty() && input[pos] == c
}
public func matches(_ seq: String)-> Bool {
public func matches(_ seq: String) -> Bool {
let scanLength = seq.unicodeScalars.count
if (scanLength > length - pos){
if (scanLength > length - pos) {
return false
}
for offset in 0..<scanLength{
if (seq.unicodeScalar(offset) != input[pos+offset]){
for offset in 0..<scanLength {
if (seq.unicodeScalar(offset) != input[pos+offset]) {
return false
}
}
return true
}
public func matchesIgnoreCase(_ seq: String )->Bool {
public func matchesIgnoreCase(_ seq: String ) -> Bool {
let scanLength = seq.unicodeScalars.count
if(scanLength == 0){
if(scanLength == 0) {
return false
}
if (scanLength > length - pos){
if (scanLength > length - pos) {
return false
}
for offset in 0..<scanLength{
let upScan : UnicodeScalar = seq.unicodeScalar(offset).uppercase
let upTarget : UnicodeScalar = input[pos+offset].uppercase
if (upScan != upTarget){
for offset in 0..<scanLength {
let upScan: UnicodeScalar = seq.unicodeScalar(offset).uppercase
let upTarget: UnicodeScalar = input[pos+offset].uppercase
if (upScan != upTarget) {
return false
}
}
return true
}
public func matchesAny(_ seq: UnicodeScalar...)->Bool {
if (isEmpty()){
public func matchesAny(_ seq: UnicodeScalar...) -> Bool {
if (isEmpty()) {
return false
}
let c : UnicodeScalar = input[pos]
let c: UnicodeScalar = input[pos]
for seek in seq {
if (seek == c){
if (seek == c) {
return true
}
}
return false
}
public func matchesAnySorted(_ seq : [UnicodeScalar]) -> Bool {
public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool {
return !isEmpty() && seq.binarySearch(seq, input[pos]) >= 0
}
public func matchesLetter()-> Bool {
if (isEmpty()){
public func matchesLetter() -> Bool {
if (isEmpty()) {
return false
}
let c = input[pos]
return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)
}
public func matchesDigit()->Bool {
if (isEmpty()){
public func matchesDigit() -> Bool {
if (isEmpty()) {
return false
}
let c = input[pos]
return (c >= "0" && c <= "9")
}
@discardableResult
public func matchConsume(_ seq: String)->Bool {
public func matchConsume(_ seq: String) -> Bool {
if (matches(seq)) {
pos += seq.unicodeScalars.count
return true
@ -376,9 +368,9 @@ public final class CharacterReader
return false
}
}
@discardableResult
public func matchConsumeIgnoreCase(_ seq: String)->Bool {
public func matchConsumeIgnoreCase(_ seq: String) -> Bool {
if (matchesIgnoreCase(seq)) {
pos += seq.unicodeScalars.count
return true
@ -386,21 +378,19 @@ public final class CharacterReader
return false
}
}
public func containsIgnoreCase(_ seq: String )->Bool {
public func containsIgnoreCase(_ seq: String ) -> Bool {
// used to check presence of </title>, </style>. only finds consistent case.
let loScan = seq.lowercased(with: Locale(identifier: "en"))
let hiScan = seq.uppercased(with: Locale(identifier: "eng"))
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1)
}
public func toString()->String {
public func toString() -> String {
return String.unicodescalars(Array(input[pos..<length]))
//return input.string(pos, length - pos)
}
/**
* Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks.
* <p />
@ -410,30 +400,29 @@ public final class CharacterReader
*/
private func cacheString(_ start: Int, _ count: Int) -> String {
let val = input
var cache : [String?] = stringCache
var cache: [String?] = stringCache
// limit (no cache):
if (count > CharacterReader.maxCacheLen){
if (count > CharacterReader.maxCacheLen) {
return String.unicodescalars(Array(val[start..<start+count]))
}
// calculate hash:
var hash : Int = 0
var hash: Int = 0
var offset = start
for _ in 0..<count{
for _ in 0..<count {
let ch = val[offset].value
hash = Int.addWithOverflow(Int.multiplyWithOverflow(31, hash).0, Int(ch)).0
offset+=1
}
// get from cache
hash = abs(hash)
let i = hash % cache.count
let index : Int = abs(i) //Int(hash & Int(cache.count) - 1)
let index: Int = abs(i) //Int(hash & Int(cache.count) - 1)
var cached = cache[index]
if (cached == nil)
{ // miss, add
if (cached == nil) { // miss, add
cached = String.unicodescalars(Array(val[start..<start+count]))
//cached = val.string(start, count)
cache[Int(index)] = cached
@ -448,21 +437,19 @@ public final class CharacterReader
}
return cached!
}
/**
* Check if the value of the provided range equals the string.
*/
public func rangeEquals(_ start: Int, _ count: Int, _ cached: String) -> Bool {
if (count == cached.unicodeScalars.count)
{
if (count == cached.unicodeScalars.count) {
var count = count
let one = input
var i = start
var j = 0
while (count != 0) {
count -= 1
if (one[i] != cached.unicodeScalar(j) )
{
if (one[i] != cached.unicodeScalar(j) ) {
return false
}
j += 1

View File

@ -8,10 +8,9 @@
import Foundation
open class Cleaner
{
fileprivate let whitelist : Whitelist
open class Cleaner {
fileprivate let whitelist: Whitelist
/**
Create a new cleaner, that sanitizes documents using the supplied whitelist.
@param whitelist white-list to clean with
@ -19,7 +18,7 @@ open class Cleaner
public init(_ whitelist: Whitelist) {
self.whitelist = whitelist
}
/**
Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
The original document is not modified. Only elements from the dirt document's <code>body</code> are used.
@ -35,7 +34,7 @@ open class Cleaner
}
return clean
}
/**
Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes
in the input HTML are allowed by the whitelist.
@ -53,63 +52,58 @@ open class Cleaner
let numDiscarded: Int = try copySafeNodes(dirtyDocument.body()!, clean.body()!)
return numDiscarded == 0
}
@discardableResult
fileprivate func copySafeNodes(_ source: Element, _ dest: Element)throws->Int {
let cleaningVisitor: Cleaner.CleaningVisitor = Cleaner.CleaningVisitor(source, dest,self)
let cleaningVisitor: Cleaner.CleaningVisitor = Cleaner.CleaningVisitor(source, dest, self)
let traversor: NodeTraversor = NodeTraversor(cleaningVisitor)
try traversor.traverse(source)
return cleaningVisitor.numDiscarded
}
fileprivate func createSafeElement(_ sourceEl: Element)throws->ElementMeta {
let sourceTag: String = sourceEl.tagName()
let destAttrs: Attributes = Attributes()
let dest: Element = try Element(Tag.valueOf(sourceTag), sourceEl.getBaseUri(), destAttrs)
var numDiscarded: Int = 0
if let sourceAttrs = sourceEl.getAttributes()
{
for sourceAttr: Attribute in sourceAttrs
{
if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)){
if let sourceAttrs = sourceEl.getAttributes() {
for sourceAttr: Attribute in sourceAttrs {
if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) {
destAttrs.put(attribute: sourceAttr)
}else{
} else {
numDiscarded+=1
}
}
}
let enforcedAttrs: Attributes = try whitelist.getEnforcedAttributes(sourceTag)
destAttrs.addAll(incoming: enforcedAttrs)
return ElementMeta(dest, numDiscarded)
}
}
extension Cleaner
{
fileprivate final class CleaningVisitor : NodeVisitor
{
extension Cleaner {
fileprivate final class CleaningVisitor: NodeVisitor {
var numDiscarded: Int = 0
let root: Element
var destination: Element? // current element to append nodes to
private weak var cleaner : Cleaner?
public init(_ root: Element, _ destination: Element, _ cleaner : Cleaner) {
private weak var cleaner: Cleaner?
public init(_ root: Element, _ destination: Element, _ cleaner: Cleaner) {
self.root = root
self.destination = destination
}
public func head(_ source: Node, _ depth: Int)throws {
if let sourceEl = (source as? Element) {
if (cleaner!.whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
let meta: Cleaner.ElementMeta = try cleaner!.createSafeElement(sourceEl)
let destChild: Element = meta.el
try destination?.appendChild(destChild)
numDiscarded += meta.numAttribsDiscarded
destination = destChild
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
@ -118,11 +112,8 @@ extension Cleaner
} else if let sourceText = (source as? TextNode) {
let destText: TextNode = TextNode(sourceText.getWholeText(), source.getBaseUri())
try destination?.appendChild(destText)
}
else if let sourceData = (source as? DataNode)
{
if sourceData.parent() != nil && cleaner!.whitelist.isSafeTag(sourceData.parent()!.nodeName())
{
} else if let sourceData = (source as? DataNode) {
if sourceData.parent() != nil && cleaner!.whitelist.isSafeTag(sourceData.parent()!.nodeName()) {
//let sourceData: DataNode = (DataNode) source
let destData: DataNode = DataNode(sourceData.getWholeData(), source.getBaseUri())
try destination?.appendChild(destData)
@ -131,13 +122,10 @@ extension Cleaner
numDiscarded+=1
}
}
public func tail(_ source: Node, _ depth: Int)throws
{
if let x = (source as? Element)
{
if cleaner!.whitelist.isSafeTag(x.nodeName())
{
public func tail(_ source: Node, _ depth: Int)throws {
if let x = (source as? Element) {
if cleaner!.whitelist.isSafeTag(x.nodeName()) {
// would have descended, so pop destination stack
destination = destination?.parent()
}
@ -146,12 +134,11 @@ extension Cleaner
}
}
extension Cleaner
{
extension Cleaner {
fileprivate struct ElementMeta {
let el: Element
let el: Element
let numAttribsDiscarded: Int
init(_ el: Element, _ numAttribsDiscarded: Int) {
self.el = el
self.numAttribsDiscarded = numAttribsDiscarded

View File

@ -14,10 +14,10 @@ import Foundation
* @author Jonathan Hedley
*/
open class Collector {
private init() {
}
/**
Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
@param eval Evaluator to test elements against
@ -25,36 +25,35 @@ open class Collector {
@return list of matches; empty if none
*/
open static func collect (_ eval: Evaluator, _ root: Element)throws->Elements {
let elements : Elements = Elements()
let elements: Elements = Elements()
try NodeTraversor(Accumulator(root, elements, eval)).traverse(root)
return elements
}
}
private final class Accumulator : NodeVisitor {
private final class Accumulator: NodeVisitor {
private let root: Element
private let elements: Elements
private let eval: Evaluator
init(_ root: Element, _ elements: Elements, _ eval: Evaluator) {
self.root = root
self.elements = elements
self.eval = eval
}
open func head(_ node: Node, _ depth: Int) {
if let el = node as? Element
{
do{
if (try eval.matches(root, el)){
if let el = node as? Element {
do {
if (try eval.matches(root, el)) {
elements.add(el)
}
}catch{}
} catch {}
}
}
open func tail(_ node: Node, _ depth: Int) {
// void
}

View File

@ -11,113 +11,111 @@ import Foundation
/**
* Base combining (and, or) evaluator.
*/
public class CombiningEvaluator : Evaluator {
public class CombiningEvaluator: Evaluator {
public private(set) var evaluators: Array<Evaluator>
var num : Int = 0
var num: Int = 0
public override init() {
evaluators = Array<Evaluator>()
super.init()
}
public init(_ evaluators: Array<Evaluator>) {
self.evaluators = evaluators
super.init()
updateNumEvaluators()
}
func rightMostEvaluator()->Evaluator? {
func rightMostEvaluator() -> Evaluator? {
return num > 0 && evaluators.count > 0 ? evaluators[num - 1] : nil
}
func replaceRightMostEvaluator(_ replacement: Evaluator) {
evaluators[num - 1] = replacement
}
func updateNumEvaluators() {
// used so we don't need to bash on size() for every match test
num = evaluators.count
}
public final class And : CombiningEvaluator {
public final class And: CombiningEvaluator {
public override init(_ evaluators: Array<Evaluator>) {
super.init(evaluators)
}
public override init(_ evaluators: Evaluator...) {
super.init(evaluators)
}
public override func matches(_ root: Element, _ node: Element)->Bool {
for i in 0..<num
{
public override func matches(_ root: Element, _ node: Element) -> Bool {
for i in 0..<num {
let s = evaluators[i]
do{
if (try !s.matches(root, node)){
do {
if (try !s.matches(root, node)) {
return false
}
}catch{}
} catch {}
}
return true
}
public override func toString()->String {
let ar : [String] = evaluators.map { String($0.toString()) }
public override func toString() -> String {
let ar: [String] = evaluators.map { String($0.toString()) }
return StringUtil.join(ar, sep: " ")
}
}
public final class Or : CombiningEvaluator {
public final class Or: CombiningEvaluator {
/**
* Create a new Or evaluator. The initial evaluators are ANDed together and used as the first clause of the OR.
* @param evaluators initial OR clause (these are wrapped into an AND evaluator).
*/
public override init(_ evaluators: Array<Evaluator>) {
super.init()
if (num > 1){
if (num > 1) {
self.evaluators.append(And(evaluators))
}else{ // 0 or 1
} else { // 0 or 1
self.evaluators.append(contentsOf: evaluators)
}
updateNumEvaluators()
}
override init(_ evaluators: Evaluator...) {
super.init()
if (num > 1){
if (num > 1) {
self.evaluators.append(And(evaluators))
}else{ // 0 or 1
} else { // 0 or 1
self.evaluators.append(contentsOf: evaluators)
}
updateNumEvaluators()
}
override init() {
super.init()
}
public func add(_ e: Evaluator) {
evaluators.append(e)
updateNumEvaluators()
}
public override func matches(_ root: Element, _ node: Element)->Bool {
for i in 0..<num
{
let s : Evaluator = evaluators[i]
do{
if (try s.matches(root, node)){
public override func matches(_ root: Element, _ node: Element) -> Bool {
for i in 0..<num {
let s: Evaluator = evaluators[i]
do {
if (try s.matches(root, node)) {
return true
}
}catch{}
} catch {}
}
return false
}
public override func toString()->String {
return ":or\(evaluators.map{String($0.toString())})"
public override func toString() -> String {
return ":or\(evaluators.map {String($0.toString())})"
}
}
}

View File

@ -11,68 +11,65 @@ import Foundation
/**
A comment node.
*/
public class Comment : Node {
private static let COMMENT_KEY: String = "comment";
public class Comment: Node {
private static let COMMENT_KEY: String = "comment"
/**
Create a new comment node.
@param data The contents of the comment
@param baseUri base URI
*/
public init(_ data: String, _ baseUri: String) {
super.init(baseUri);
do{
try attributes?.put(Comment.COMMENT_KEY, data);
}catch{}
super.init(baseUri)
do {
try attributes?.put(Comment.COMMENT_KEY, data)
} catch {}
}
public override func nodeName()->String {
return "#comment";
public override func nodeName() -> String {
return "#comment"
}
/**
Get the contents of the comment.
@return comment content
*/
public func getData()->String {
return attributes!.get(key: Comment.COMMENT_KEY);
public func getData() -> String {
return attributes!.get(key: Comment.COMMENT_KEY)
}
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
if (out.prettyPrint()){
indent(accum, depth, out);
if (out.prettyPrint()) {
indent(accum, depth, out)
}
accum
.append("<!--")
.append(getData())
.append("-->");
.append("-->")
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
public override func toString()->String {
do{
public override func toString() -> String {
do {
return try
outerHtml();
}catch{
outerHtml()
} catch {
return ""
}
}
public override func copy(with zone: NSZone? = nil) -> Any
{
let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY),baseUri!)
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY),baseUri!)
return copy(clone: clone,parent: parent)
public override func copy(parent: Node?) -> Node {
let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
return super.copy(clone: clone,parent: parent)
public override func copy(clone: Node, parent: Node?) -> Node {
return super.copy(clone: clone, parent: parent)
}
}

View File

@ -8,6 +8,3 @@
import Foundation
//TODO:

View File

@ -11,9 +11,9 @@ import Foundation
/**
A data node, for contents of style, script tags etc, where contents should not show in text().
*/
open class DataNode : Node{
private static let DATA_KEY : String = "data"
open class DataNode: Node {
private static let DATA_KEY: String = "data"
/**
Create a new DataNode.
@param data data contents
@ -21,48 +21,47 @@ open class DataNode : Node{
*/
public init(_ data: String, _ baseUri: String) {
super.init(baseUri)
do{
do {
try attributes?.put(DataNode.DATA_KEY, data)
}catch{}
} catch {}
}
open override func nodeName()->String {
open override func nodeName() -> String {
return "#data"
}
/**
Get the data contents of this node. Will be unescaped and with original new lines, space etc.
@return data
*/
open func getWholeData()->String {
open func getWholeData() -> String {
return attributes!.get(key: DataNode.DATA_KEY)
}
/**
* Set the data contents of this node.
* @param data unencoded data
* @return this node, for chaining
*/
@discardableResult
open func setWholeData(_ data: String)->DataNode {
do{
open func setWholeData(_ data: String) -> DataNode {
do {
try attributes?.put(DataNode.DATA_KEY, data)
}catch{}
} catch {}
return self
}
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
accum.append(getWholeData()) // data is not escaped in return from data nodes, so " in script, style is plain
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
open override func toString()throws->String {
return try outerHtml()
}
/**
Create a new DataNode from HTML encoded data.
@param encodedData encoded data
@ -73,21 +72,18 @@ open class DataNode : Node{
let data = try Entities.unescape(encodedData)
return DataNode(data, baseUri)
}
public override func copy(with zone: NSZone? = nil) -> Any
{
let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY),baseUri!)
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY),baseUri!)
return copy(clone: clone,parent: parent)
public override func copy(parent: Node?) -> Node {
let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
return super.copy(clone: clone,parent: parent)
public override func copy(clone: Node, parent: Node?) -> Node {
return super.copy(clone: clone, parent: parent)
}
}

View File

@ -13,13 +13,12 @@ import Foundation
*
*/
class DataUtil {
static let charsetPattern = "(?i)\\bcharset=\\s*(?:\"|')?([^\\s,;\"']*)"
static let defaultCharset = "UTF-8" // used if not found in header or meta charset
static let bufferSize = 0x20000 // ~130K.
static let UNICODE_BOM = 0xFEFF
static let mimeBoundaryChars = "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".characters
static let boundaryLength = 32
}

View File

@ -8,67 +8,66 @@
import Foundation
open class Document : Element
{
open class Document: Element {
public enum QuirksMode {
case noQuirks, quirks, limitedQuirks
}
private var _outputSettings: OutputSettings = OutputSettings()
private var _quirksMode: Document.QuirksMode = QuirksMode.noQuirks
private let _location: String
private var updateMetaCharset: Bool = false
/**
Create a new, empty Document.
@param baseUri base URI of document
@see org.jsoup.Jsoup#parse
@see #createShell
*/
public init(_ baseUri: String){
public init(_ baseUri: String) {
self._location = baseUri
super.init(try! Tag.valueOf("#root", ParseSettings.htmlDefault), baseUri)
}
/**
Create a valid, empty shell of a document, suitable for adding more elements to.
@param baseUri baseUri of document
@return document with html, head, and body elements.
*/
static open func createShell(_ baseUri: String)->Document {
static open func createShell(_ baseUri: String) -> Document {
let doc: Document = Document(baseUri)
let html: Element = try! doc.appendElement("html")
try! html.appendElement("head")
try! html.appendElement("body")
return doc
}
/**
* Get the URL this Document was parsed from. If the starting URL is a redirect,
* this will return the final URL from which the document was served from.
* @return location
*/
public func location()->String {
public func location() -> String {
return _location
}
/**
Accessor to the document's {@code head} element.
@return {@code head}
*/
public func head()->Element? {
public func head() -> Element? {
return findFirstElementByTagName("head", self)
}
/**
Accessor to the document's {@code body} element.
@return {@code body}
*/
public func body()->Element? {
public func body() -> Element? {
return findFirstElementByTagName("body", self)
}
/**
Get the string contents of the document's {@code title} element.
@return Trimmed title, or empty string if none set.
@ -78,7 +77,7 @@ open class Document : Element
let titleEl: Element? = try getElementsByTag("title").first()
return titleEl != nil ? try StringUtil.normaliseWhitespace(titleEl!.text()).trim() : ""
}
/**
Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if
not present
@ -92,7 +91,7 @@ open class Document : Element
try titleEl?.text(title)
}
}
/**
Create a new Element, with this document's base uri. Does not make the new element a child of this document.
@param tagName element tag name (e.g. {@code a})
@ -101,7 +100,7 @@ open class Document : Element
public func createElement(_ tagName: String)throws->Element {
return try Element(Tag.valueOf(tagName, ParseSettings.preserveCase), self.getBaseUri())
}
/**
Normalise the document. This happens after the parse phase so generally does not need to be called.
Moves any text content that is not in the body element into the body.
@ -110,70 +109,66 @@ open class Document : Element
@discardableResult
public func normalise()throws->Document {
var htmlE: Element? = findFirstElementByTagName("html", self)
if (htmlE == nil){
if (htmlE == nil) {
htmlE = try appendElement("html")
}
let htmlEl: Element = htmlE!
if (head() == nil){
if (head() == nil) {
try htmlEl.prependElement("head")
}
if (body() == nil){
if (body() == nil) {
try htmlEl.appendElement("body")
}
// pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care
// of. do in inverse order to maintain text order.
try normaliseTextNodes(head()!)
try normaliseTextNodes(htmlEl)
try normaliseTextNodes(self)
try normaliseStructure("head", htmlEl)
try normaliseStructure("body", htmlEl)
try ensureMetaCharsetElement()
return self
}
// does not recurse.
private func normaliseTextNodes(_ element: Element)throws {
var toMove: Array<Node> = Array<Node>()
for node:Node in element.childNodes
{
for node: Node in element.childNodes {
if let tn = (node as? TextNode) {
if (!tn.isBlank()){
if (!tn.isBlank()) {
toMove.append(tn)
}
}
}
for i in toMove.count-1...0
{
for i in toMove.count-1...0 {
let node: Node = toMove[i]
try element.removeChild(node)
try body()?.prependChild(TextNode(" ", ""))
try body()?.prependChild(node)
}
}
// merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
private func normaliseStructure(_ tag: String, _ htmlEl: Element)throws {
let elements: Elements = try self.getElementsByTag(tag)
let master: Element? = elements.first() // will always be available as created above if not existent
if (elements.size() > 1) { // dupes, move contents to master
var toMove:Array<Node> = Array<Node>()
for i in 1..<elements.size()
{
var toMove: Array<Node> = Array<Node>()
for i in 1..<elements.size() {
let dupe: Node = elements.get(i)
for node:Node in dupe.childNodes
{
for node: Node in dupe.childNodes {
toMove.append(node)
}
try dupe.remove()
}
for dupe:Node in toMove{
for dupe: Node in toMove {
try master?.appendChild(dupe)
}
}
@ -182,27 +177,26 @@ open class Document : Element
try htmlEl.appendChild(master!) // includes remove()
}
}
// fast method to get first by tag name, used for html, head, body finders
private func findFirstElementByTagName(_ tag: String, _ node: Node)->Element? {
if (node.nodeName()==tag){
private func findFirstElementByTagName(_ tag: String, _ node: Node) -> Element? {
if (node.nodeName()==tag) {
return node as? Element
}else {
for child:Node in node.childNodes {
} else {
for child: Node in node.childNodes {
let found: Element? = findFirstElementByTagName(tag, child)
if (found != nil){
if (found != nil) {
return found
}
}
}
return nil
}
open override func outerHtml()throws->String {
return try super.html() // no outer wrapper tag
}
/**
Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
@param text unencoded text
@ -213,11 +207,11 @@ open class Document : Element
try body()?.text(text) // overridden to not nuke doc structure
return self
}
open override func nodeName()->String {
open override func nodeName() -> String {
return "#document"
}
/**
* Sets the charset used in this document. This method is equivalent
* to {@link OutputSettings#charset(java.nio.charset.Charset)
@ -247,7 +241,7 @@ open class Document : Element
_outputSettings.charset(charset)
try ensureMetaCharsetElement()
}
/**
* Returns the charset used in this document. This method is equivalent
* to {@link OutputSettings#charset()}.
@ -259,7 +253,7 @@ open class Document : Element
public func charset()->String.Encoding {
return _outputSettings.charset()
}
/**
* Sets whether the element with charset information in this document is
* updated on changes through {@link #charset(java.nio.charset.Charset)
@ -276,7 +270,7 @@ open class Document : Element
public func updateMetaCharsetElement(_ update: Bool) {
self.updateMetaCharset = update
}
/**
* Returns whether the element with charset information in this document is
* updated on changes through {@link #charset(java.nio.charset.Charset)
@ -285,10 +279,10 @@ open class Document : Element
* @return Returns <tt>true</tt> if the element is updated on charset
* changes, <tt>false</tt> if not
*/
public func updateMetaCharsetElement()->Bool {
public func updateMetaCharsetElement() -> Bool {
return updateMetaCharset
}
/**
* Ensures a meta charset (html) or xml declaration (xml) with the current
* encoding used. This only applies with
@ -311,32 +305,32 @@ open class Document : Element
private func ensureMetaCharsetElement()throws {
if (updateMetaCharset) {
let syntax: OutputSettings.Syntax = outputSettings().syntax()
if (syntax == OutputSettings.Syntax.html) {
let metaCharset: Element? = try select("meta[charset]").first()
if (metaCharset != nil) {
try metaCharset?.attr("charset", charset().displayName())
} else {
let head: Element? = self.head()
if (head != nil) {
try head?.appendElement("meta").attr("charset", charset().displayName())
}
}
// Remove obsolete elements
let s = try select("meta[name=charset]")
try s.remove()
} else if (syntax == OutputSettings.Syntax.xml) {
let node: Node = getChildNodes()[0]
if let decl = (node as? XmlDeclaration) {
if (decl.name()=="xml") {
try decl.attr("encoding", charset().displayName())
_ = try decl.attr("version")
try decl.attr("version", "1.0")
} else {
@ -344,7 +338,7 @@ open class Document : Element
let decl = XmlDeclaration("xml", baseUri!, false)
try decl.attr("version", "1.0")
try decl.attr("encoding", charset().displayName())
try prependChild(decl)
}
} else {
@ -352,64 +346,60 @@ open class Document : Element
let decl = XmlDeclaration("xml", baseUri!, false)
try decl.attr("version", "1.0")
try decl.attr("encoding", charset().displayName())
try prependChild(decl)
}
}
}
}
/**
* Get the document's current output settings.
* @return the document's current output settings.
*/
public func outputSettings()->OutputSettings {
public func outputSettings() -> OutputSettings {
return _outputSettings
}
/**
* Set the document's output settings.
* @param outputSettings new output settings.
* @return this document, for chaining.
*/
@discardableResult
public func outputSettings(_ outputSettings: OutputSettings)->Document {
public func outputSettings(_ outputSettings: OutputSettings) -> Document {
self._outputSettings = outputSettings
return self
}
public func quirksMode()->Document.QuirksMode {
return _quirksMode
}
@discardableResult
public func quirksMode(_ quirksMode: Document.QuirksMode)->Document {
public func quirksMode(_ quirksMode: Document.QuirksMode) -> Document {
self._quirksMode = quirksMode
return self
}
public override func copy(with zone: NSZone? = nil) -> Any
{
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = Document(_location)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
public override func copy(parent: Node?) -> Node {
let clone = Document(_location)
return copy(clone: clone,parent: parent)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
public override func copy(clone: Node, parent: Node?) -> Node {
let clone = clone as! Document
clone._outputSettings = _outputSettings.copy() as! OutputSettings
clone._quirksMode = _quirksMode
clone.updateMetaCharset = updateMetaCharset
return super.copy(clone: clone,parent: parent)
return super.copy(clone: clone, parent: parent)
}
}
public class OutputSettings: NSCopying {
@ -417,16 +407,16 @@ public class OutputSettings: NSCopying {
* The output serialization syntax.
*/
public enum Syntax {case html, xml}
private var _escapeMode : Entities.EscapeMode = Entities.EscapeMode.base
private var _encoder : String.Encoding = String.Encoding.utf8 // Charset.forName("UTF-8")
private var _prettyPrint : Bool = true
private var _outline : Bool = false
private var _indentAmount : UInt = 1
private var _escapeMode: Entities.EscapeMode = Entities.EscapeMode.base
private var _encoder: String.Encoding = String.Encoding.utf8 // Charset.forName("UTF-8")
private var _prettyPrint: Bool = true
private var _outline: Bool = false
private var _indentAmount: UInt = 1
private var _syntax = Syntax.html
public init() {}
/**
* Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML
* entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>,
@ -438,7 +428,7 @@ public class OutputSettings: NSCopying {
public func escapeMode() -> Entities.EscapeMode {
return _escapeMode
}
/**
* Set the document's escape mode, which determines how characters are escaped when the output character set
* does not support a given character:- using either a named or a numbered escape.
@ -450,7 +440,7 @@ public class OutputSettings: NSCopying {
self._escapeMode = escapeMode
return self
}
/**
* Get the document's current output charset, which is used to control which characters are escaped when
* generating HTML (via the <code>html()</code> methods), and which are kept intact.
@ -465,7 +455,7 @@ public class OutputSettings: NSCopying {
public func charset() -> String.Encoding {
return _encoder
}
/**
* Update the document's output charset.
* @param charset the new charset to use.
@ -476,22 +466,20 @@ public class OutputSettings: NSCopying {
self._encoder = encoder
return self
}
@discardableResult
public func charset(_ e: String.Encoding) -> OutputSettings {
return encoder(e)
}
/**
* Get the document's current output syntax.
* @return current syntax
*/
public func syntax()-> Syntax {
public func syntax() -> Syntax {
return _syntax
}
/**
* Set the document's output syntax. Either {@code html}, with empty tags and boolean attributes (etc), or
* {@code xml}, with self-closing tags.
@ -499,72 +487,71 @@ public class OutputSettings: NSCopying {
* @return the document's output settings, for chaining
*/
@discardableResult
public func syntax(syntax: Syntax)->OutputSettings {
public func syntax(syntax: Syntax) -> OutputSettings {
_syntax = syntax
return self
}
/**
* Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format
* the output, and the output will generally look like the input.
* @return if pretty printing is enabled.
*/
public func prettyPrint()->Bool {
public func prettyPrint() -> Bool {
return _prettyPrint
}
/**
* Enable or disable pretty printing.
* @param pretty new pretty print setting
* @return this, for chaining
*/
@discardableResult
public func prettyPrint(pretty: Bool)->OutputSettings {
public func prettyPrint(pretty: Bool) -> OutputSettings {
_prettyPrint = pretty
return self
}
/**
* Get if outline mode is enabled. Default is false. If enabled, the HTML output methods will consider
* all tags as block.
* @return if outline mode is enabled.
*/
public func outline()->Bool {
public func outline() -> Bool {
return _outline
}
/**
* Enable or disable HTML outline mode.
* @param outlineMode new outline setting
* @return this, for chaining
*/
@discardableResult
public func outline(outlineMode: Bool)->OutputSettings {
public func outline(outlineMode: Bool) -> OutputSettings {
_outline = outlineMode
return self
}
/**
* Get the current tag indent amount, used when pretty printing.
* @return the current indent amount
*/
public func indentAmount()-> UInt {
public func indentAmount() -> UInt {
return _indentAmount
}
/**
* Set the indent amount for pretty printing
* @param indentAmount number of spaces to use for indenting each level. Must be {@literal >=} 0.
* @return this, for chaining
*/
@discardableResult
public func indentAmount(indentAmount: UInt)-> OutputSettings {
public func indentAmount(indentAmount: UInt) -> OutputSettings {
_indentAmount = indentAmount
return self
}
public func copy(with zone: NSZone? = nil) -> Any{
public func copy(with zone: NSZone? = nil) -> Any {
let clone: OutputSettings = OutputSettings()
clone.charset(_encoder) // new charset and charset encoder
clone._escapeMode = _escapeMode//Entities.EscapeMode.valueOf(escapeMode.name())
@ -572,7 +559,4 @@ public class OutputSettings: NSCopying {
return clone
}
}

View File

@ -11,12 +11,12 @@ import Foundation
/**
* A {@code <!DOCTYPE>} node.
*/
public class DocumentType : Node {
private static let NAME: String = "name";
private static let PUBLIC_ID: String = "publicId";
private static let SYSTEM_ID: String = "systemId";
public class DocumentType: Node {
private static let NAME: String = "name"
private static let PUBLIC_ID: String = "publicId"
private static let SYSTEM_ID: String = "systemId"
// todo: quirk mode from publicId and systemId
/**
* Create a new doctype element.
* @param name the doctype's name
@ -25,76 +25,73 @@ public class DocumentType : Node {
* @param baseUri the doctype's base URI
*/
public init(_ name: String, _ publicId: String, _ systemId: String, _ baseUri: String) {
super.init(baseUri);
do{
try attr(DocumentType.NAME, name);
try attr(DocumentType.PUBLIC_ID, publicId);
try attr(DocumentType.SYSTEM_ID, systemId);
}catch{}
super.init(baseUri)
do {
try attr(DocumentType.NAME, name)
try attr(DocumentType.PUBLIC_ID, publicId)
try attr(DocumentType.SYSTEM_ID, systemId)
} catch {}
}
public override func nodeName()->String {
return "#doctype";
public override func nodeName() -> String {
return "#doctype"
}
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
if (out.syntax() == OutputSettings.Syntax.html && !has(DocumentType.PUBLIC_ID) && !has(DocumentType.SYSTEM_ID)) {
// looks like a html5 doctype, go lowercase for aesthetics
accum.append("<!doctype");
accum.append("<!doctype")
} else {
accum.append("<!DOCTYPE");
accum.append("<!DOCTYPE")
}
if (has(DocumentType.NAME)){
do{
accum.append(" ").append(try attr(DocumentType.NAME));
}catch{}
if (has(DocumentType.NAME)) {
do {
accum.append(" ").append(try attr(DocumentType.NAME))
} catch {}
}
if (has(DocumentType.PUBLIC_ID)){
do{
accum.append(" PUBLIC \"").append(try attr(DocumentType.PUBLIC_ID)).append("\"");
}catch{}
if (has(DocumentType.PUBLIC_ID)) {
do {
accum.append(" PUBLIC \"").append(try attr(DocumentType.PUBLIC_ID)).append("\"")
} catch {}
}
if (has(DocumentType.SYSTEM_ID)){
do{
accum.append(" \"").append(try attr(DocumentType.SYSTEM_ID)).append("\"");
}catch{}
if (has(DocumentType.SYSTEM_ID)) {
do {
accum.append(" \"").append(try attr(DocumentType.SYSTEM_ID)).append("\"")
} catch {}
}
accum.append(">");
accum.append(">")
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
}
private func has(_ attribute: String)->Bool {
do{
return !StringUtil.isBlank(try attr(attribute));
}catch{return false}
private func has(_ attribute: String) -> Bool {
do {
return !StringUtil.isBlank(try attr(attribute))
} catch {return false}
}
public override func copy(with zone: NSZone? = nil) -> Any
{
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = DocumentType(attributes!.get(key: DocumentType.NAME),
attributes!.get(key: DocumentType.PUBLIC_ID),
attributes!.get(key: DocumentType.SYSTEM_ID),
baseUri!)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
public override func copy(parent: Node?) -> Node {
let clone = DocumentType(attributes!.get(key: DocumentType.NAME),
attributes!.get(key: DocumentType.PUBLIC_ID),
attributes!.get(key: DocumentType.SYSTEM_ID),
baseUri!)
return copy(clone: clone,parent: parent)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
return super.copy(clone: clone,parent: parent)
public override func copy(clone: Node, parent: Node?) -> Node {
return super.copy(clone: clone, parent: parent)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -16,32 +16,30 @@ import Foundation
//open typealias Elements = Array<Element>
//typealias E = Element
open class Elements: NSCopying
{
fileprivate var this : Array<Element> = Array<Element>()
open class Elements: NSCopying {
fileprivate var this: Array<Element> = Array<Element>()
public init() {
}
public init(_ a: Array<Element>){
public init(_ a: Array<Element>) {
this = a
}
public init(_ a: OrderedSet<Element>){
public init(_ a: OrderedSet<Element>) {
this.append(contentsOf: a)
}
/**
* Creates a deep copy of these elements.
* @return a deep copy
*/
public func copy(with zone: NSZone? = nil) -> Any
{
public func copy(with zone: NSZone? = nil) -> Any {
let clone: Elements = Elements()
for e: Element in this{
for e: Element in this {
clone.add(e.copy() as! Element)
}
return clone;
return clone
}
// attribute methods
/**
Get an attribute value from the first matched element that has the attribute.
@ -52,25 +50,25 @@ open class Elements: NSCopying
*/
open func attr(_ attributeKey: String)throws->String {
for element in this {
if (element.hasAttr(attributeKey)){
if (element.hasAttr(attributeKey)) {
return try element.attr(attributeKey)
}
}
return "";
return ""
}
/**
Checks if any of the matched elements have this attribute set.
@param attributeKey attribute key
@return true if any of the elements have the attribute; false if none do.
*/
open func hasAttr(_ attributeKey: String)->Bool {
open func hasAttr(_ attributeKey: String) -> Bool {
for element in this {
if element.hasAttr(attributeKey) {return true}
}
return false;
return false
}
/**
* Set an attribute on all matched elements.
* @param attributeKey attribute key
@ -80,11 +78,11 @@ open class Elements: NSCopying
@discardableResult
open func attr(_ attributeKey: String, _ attributeValue: String)throws->Elements {
for element in this {
try element.attr(attributeKey, attributeValue);
try element.attr(attributeKey, attributeValue)
}
return self;
return self
}
/**
* Remove an attribute from every matched element.
* @param attributeKey The attribute to remove.
@ -93,11 +91,11 @@ open class Elements: NSCopying
@discardableResult
open func removeAttr(_ attributeKey: String)throws->Elements {
for element in this {
try element.removeAttr(attributeKey);
try element.removeAttr(attributeKey)
}
return self;
return self
}
/**
Add the class name to every matched element's {@code class} attribute.
@param className class name to add
@ -106,11 +104,11 @@ open class Elements: NSCopying
@discardableResult
open func addClass(_ className: String)throws->Elements {
for element in this {
try element.addClass(className);
try element.addClass(className)
}
return self;
return self
}
/**
Remove the class name from every matched element's {@code class} attribute, if present.
@param className class name to remove
@ -119,11 +117,11 @@ open class Elements: NSCopying
@discardableResult
open func removeClass(_ className: String)throws->Elements {
for element: Element in this {
try element.removeClass(className);
try element.removeClass(className)
}
return self;
return self
}
/**
Toggle the class name on every matched element's {@code class} attribute.
@param className class name to add if missing, or remove if present, from every element.
@ -132,38 +130,38 @@ open class Elements: NSCopying
@discardableResult
open func toggleClass(_ className: String)throws->Elements {
for element: Element in this {
try element.toggleClass(className);
try element.toggleClass(className)
}
return self;
return self
}
/**
Determine if any of the matched elements have this class name set in their {@code class} attribute.
@param className class name to check for
@return true if any do, false if none do
*/
open func hasClass(_ className: String)->Bool {
open func hasClass(_ className: String) -> Bool {
for element: Element in this {
if (element.hasClass(className)){
return true;
if (element.hasClass(className)) {
return true
}
}
return false;
return false
}
/**
* Get the form element's value of the first matched element.
* @return The form element's value, or empty if not set.
* @see Element#val()
*/
open func val()throws->String {
if (size() > 0){
return try first()!.val();
if (size() > 0) {
return try first()!.val()
}
return "";
return ""
}
/**
* Set the form element's value in each of the matched elements.
* @param value The value to set into each matched element
@ -171,12 +169,12 @@ open class Elements: NSCopying
*/
@discardableResult
open func val(_ value: String)throws->Elements {
for element: Element in this{
try element.val(value);
for element: Element in this {
try element.val(value)
}
return self;
return self
}
/**
* Get the combined text of all the matched elements.
* <p>
@ -186,27 +184,25 @@ open class Elements: NSCopying
* @see Element#text()
*/
open func text()throws->String {
let sb: StringBuilder = StringBuilder();
for element: Element in this
{
if (sb.length != 0){
let sb: StringBuilder = StringBuilder()
for element: Element in this {
if (sb.length != 0) {
sb.append(" ")
}
sb.append(try element.text());
sb.append(try element.text())
}
return sb.toString();
return sb.toString()
}
open func hasText()->Bool {
for element:Element in this
{
if (element.hasText()){
return true;
open func hasText() -> Bool {
for element: Element in this {
if (element.hasText()) {
return true
}
}
return false;
return false
}
/**
* Get the combined inner HTML of all matched elements.
* @return string of all element's inner HTML.
@ -214,16 +210,16 @@ open class Elements: NSCopying
* @see #outerHtml()
*/
open func html()throws->String {
let sb: StringBuilder = StringBuilder();
let sb: StringBuilder = StringBuilder()
for element: Element in this {
if (sb.length != 0){
if (sb.length != 0) {
sb.append("\n")
}
sb.append(try element.html());
sb.append(try element.html())
}
return sb.toString();
return sb.toString()
}
/**
* Get the combined outer HTML of all matched elements.
* @return string of all element's outer HTML.
@ -231,28 +227,27 @@ open class Elements: NSCopying
* @see #html()
*/
open func outerHtml()throws->String {
let sb: StringBuilder = StringBuilder();
for element in this
{
if (sb.length != 0){
let sb: StringBuilder = StringBuilder()
for element in this {
if (sb.length != 0) {
sb.append("\n")
}
sb.append(try element.outerHtml());
sb.append(try element.outerHtml())
}
return sb.toString();
return sb.toString()
}
/**
* Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}.
* @return string of all element's outer HTML.
* @see #text()
* @see #html()
*/
open func toString()throws->String {
return try outerHtml();
return try outerHtml()
}
/**
* Update the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do
* {@code doc.select("i").tagName("em");}
@ -263,11 +258,11 @@ open class Elements: NSCopying
@discardableResult
open func tagName(_ tagName: String)throws->Elements {
for element: Element in this {
try element.tagName(tagName);
try element.tagName(tagName)
}
return self;
return self
}
/**
* Set the inner HTML of each matched element.
* @param html HTML to parse and set into each matched element.
@ -277,11 +272,11 @@ open class Elements: NSCopying
@discardableResult
open func html(_ html: String)throws->Elements {
for element: Element in this {
try element.html(html);
try element.html(html)
}
return self;
return self
}
/**
* Add the supplied HTML to the start of each matched element's inner HTML.
* @param html HTML to add inside each element, before the existing HTML
@ -291,11 +286,11 @@ open class Elements: NSCopying
@discardableResult
open func prepend(_ html: String)throws->Elements {
for element: Element in this {
try element.prepend(html);
try element.prepend(html)
}
return self;
return self
}
/**
* Add the supplied HTML to the end of each matched element's inner HTML.
* @param html HTML to add inside each element, after the existing HTML
@ -305,11 +300,11 @@ open class Elements: NSCopying
@discardableResult
open func append(_ html: String)throws->Elements {
for element: Element in this {
try element.append(html);
try element.append(html)
}
return self;
return self
}
/**
* Insert the supplied HTML before each matched element's outer HTML.
* @param html HTML to insert before each element
@ -319,11 +314,11 @@ open class Elements: NSCopying
@discardableResult
open func before(_ html: String)throws->Elements {
for element: Element in this {
try element.before(html);
try element.before(html)
}
return self;
return self
}
/**
* Insert the supplied HTML after each matched element's outer HTML.
* @param html HTML to insert after each element
@ -333,11 +328,11 @@ open class Elements: NSCopying
@discardableResult
open func after(_ html: String)throws->Elements {
for element: Element in this {
try element.after(html);
try element.after(html)
}
return self;
return self
}
/**
Wrap the supplied HTML around each matched elements. For example, with HTML
{@code <p><b>This</b> is <b>Jsoup</b></p>},
@ -349,13 +344,13 @@ open class Elements: NSCopying
*/
@discardableResult
open func wrap(_ html: String)throws->Elements {
try Validate.notEmpty(string: html);
try Validate.notEmpty(string: html)
for element: Element in this {
try element.wrap(html);
try element.wrap(html)
}
return self;
return self
}
/**
* Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of
* dropping the elements but keeping their children.
@ -373,11 +368,11 @@ open class Elements: NSCopying
@discardableResult
open func unwrap()throws->Elements {
for element: Element in this {
try element.unwrap();
try element.unwrap()
}
return self;
return self
}
/**
* Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each
* element to nothing.
@ -390,13 +385,13 @@ open class Elements: NSCopying
* @see #remove()
*/
@discardableResult
open func empty()->Elements {
open func empty() -> Elements {
for element: Element in this {
element.empty();
element.empty()
}
return self;
return self
}
/**
* Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing.
* <p>
@ -411,24 +406,23 @@ open class Elements: NSCopying
*/
@discardableResult
open func remove()throws->Elements {
for element in this
{
try element.remove();
for element in this {
try element.remove()
}
return self;
return self
}
// filters
/**
* Find matching elements within this element list.
* @param query A {@link Selector} query
* @return the filtered list of elements, or an empty list if none match.
*/
open func select(_ query: String)throws->Elements {
return try Selector.select(query, this);
return try Selector.select(query, this)
}
/**
* Remove elements from this list that match the {@link Selector} query.
* <p>
@ -440,10 +434,10 @@ open class Elements: NSCopying
* @return a new elements list that contains only the filtered results
*/
open func not(_ query: String)throws->Elements {
let out: Elements = try Selector.select(query, this);
return Selector.filterOut(this, out.this);
let out: Elements = try Selector.select(query, this)
return Selector.filterOut(this, out.this)
}
/**
* Get the <i>nth</i> matched element as an Elements object.
* <p>
@ -451,60 +445,58 @@ open class Elements: NSCopying
* @param index the (zero-based) index of the element in the list to retain
* @return Elements containing only the specified element, or, if that element did not exist, an empty list.
*/
open func eq(_ index: Int)->Elements {
return size() > index ? Elements([get(index)]) : Elements();
open func eq(_ index: Int) -> Elements {
return size() > index ? Elements([get(index)]) : Elements()
}
/**
* Test if any of the matched elements match the supplied query.
* @param query A selector
* @return true if at least one element in the list matches the query.
*/
open func `is`(_ query: String)throws->Bool {
let children: Elements = try select(query);
return !children.isEmpty();
let children: Elements = try select(query)
return !children.isEmpty()
}
/**
* Get all of the parents and ancestor elements of the matched elements.
* @return all of the parents and ancestor elements of the matched elements
*/
open func parents()->Elements {
let combo: OrderedSet<Element> = OrderedSet<Element>();
for e:Element in this
{
combo.append(contentsOf: e.parents().array());
open func parents() -> Elements {
let combo: OrderedSet<Element> = OrderedSet<Element>()
for e: Element in this {
combo.append(contentsOf: e.parents().array())
}
return Elements(combo);
return Elements(combo)
}
// list-like methods
/**
Get the first matched element.
@return The first matched element, or <code>null</code> if contents is empty.
*/
open func first()->Element? {
return isEmpty() ? nil : get(0);
open func first() -> Element? {
return isEmpty() ? nil : get(0)
}
open func isEmpty()->Bool{
open func isEmpty() -> Bool {
return array().count == 0
}
open func size()->Int{
open func size() -> Int {
return array().count
}
/**
Get the last matched element.
@return The last matched element, or <code>null</code> if contents is empty.
*/
open func last()->Element? {
return isEmpty() ? nil : get(size() - 1);
open func last() -> Element? {
return isEmpty() ? nil : get(size() - 1)
}
/**
* Perform a depth-first traversal on each of the selected elements.
* @param nodeVisitor the visitor callbacks to perform on each node
@ -513,29 +505,27 @@ open class Elements: NSCopying
@discardableResult
open func traverse(_ nodeVisitor: NodeVisitor)throws->Elements {
let traversor: NodeTraversor = NodeTraversor(nodeVisitor)
for el:Element in this {
try traversor.traverse(el);
for el: Element in this {
try traversor.traverse(el)
}
return self;
return self
}
/**
* Get the {@link FormElement} forms from the selected elements, if any.
* @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain
* no forms.
*/
open func forms()->Array<FormElement> {
var forms: Array<FormElement> = Array<FormElement>();
for el:Element in this
{
if let el = el as? FormElement
{
var forms: Array<FormElement> = Array<FormElement>()
for el: Element in this {
if let el = el as? FormElement {
forms.append(el)
}
}
return forms;
return forms
}
/**
* Appends the specified element to the end of this list.
*
@ -545,23 +535,21 @@ open class Elements: NSCopying
open func add(_ e: Element) {
this.append(e)
}
open func add(_ index: Int, _ element: Element) {
this.insert(element, at: index)
}
open func get(_ i :Int)->Element{
open func get(_ i: Int) -> Element {
return this[i]
}
open func array()->Array<Element>{
open func array()->Array<Element> {
return this
}
}
extension Elements: Equatable
{
extension Elements: Equatable {
/// Returns a Boolean value indicating whether two values are equal.
///
/// Equality is the inverse of inequality. For any values `a` and `b`,
@ -570,9 +558,7 @@ extension Elements: Equatable
/// - Parameters:
/// - lhs: A value to compare.
/// - rhs: Another value to compare.
public static func ==(lhs: Elements, rhs: Elements) -> Bool
{
public static func ==(lhs: Elements, rhs: Elements) -> Bool {
return lhs.this == rhs.this
}
}

File diff suppressed because one or more lines are too long

View File

@ -13,7 +13,7 @@ import Foundation
*/
public class Evaluator {
init () {}
/**
* Test if the element meets the evaluator's requirements.
*
@ -22,412 +22,403 @@ public class Evaluator {
* @return Returns <tt>true</tt> if the requirements are met or
* <tt>false</tt> otherwise
*/
open func matches(_ root: Element, _ element: Element)throws->Bool{
open func matches(_ root: Element, _ element: Element)throws->Bool {
preconditionFailure("self method must be overridden")
}
open func toString()->String
{
open func toString() -> String {
preconditionFailure("self method must be overridden")
}
/**
* Evaluator for tag name
*/
public class Tag : Evaluator {
private let tagName : String
public class Tag: Evaluator {
private let tagName: String
public init(_ tagName: String) {
self.tagName = tagName
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return (element.tagName().equalsIgnoreCase(string: tagName))
}
open override func toString()->String {
open override func toString() -> String {
return String(tagName)
}
}
/**
* Evaluator for tag name that ends with
*/
public final class TagEndsWith : Evaluator {
private let tagName : String
public final class TagEndsWith: Evaluator {
private let tagName: String
public init(_ tagName: String) {
self.tagName = tagName
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return (element.tagName().hasSuffix(tagName))
}
open override func toString()->String {
open override func toString() -> String {
return String(tagName)
}
}
/**
* Evaluator for element id
*/
public final class Id : Evaluator {
private let id : String
public final class Id: Evaluator {
private let id: String
public init(_ id: String) {
self.id = id
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return (id == element.id())
}
open override func toString()->String {
open override func toString() -> String {
return "#\(id)"
}
}
/**
* Evaluator for element class
*/
public final class Class : Evaluator {
private let className : String
public final class Class: Evaluator {
private let className: String
public init(_ className: String) {
self.className = className
}
open override func matches(_ root: Element, _ element: Element)->Bool {
open override func matches(_ root: Element, _ element: Element) -> Bool {
return (element.hasClass(className))
}
open override func toString()->String {
open override func toString() -> String {
return ".\(className)"
}
}
/**
* Evaluator for attribute name matching
*/
public final class Attribute : Evaluator {
private let key : String
public final class Attribute: Evaluator {
private let key: String
public init(_ key: String) {
self.key = key
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return element.hasAttr(key)
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)]"
}
}
/**
* Evaluator for attribute name prefix matching
*/
public final class AttributeStarting : Evaluator {
private let keyPrefix : String
public final class AttributeStarting: Evaluator {
private let keyPrefix: String
public init(_ keyPrefix: String)throws {
try Validate.notEmpty(string: keyPrefix)
self.keyPrefix = keyPrefix.lowercased()
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if let values = element.getAttributes(){
if let values = element.getAttributes() {
for attribute in values.iterator() {
if (attribute.getKey().lowercased().hasPrefix(keyPrefix)){
if (attribute.getKey().lowercased().hasPrefix(keyPrefix)) {
return true
}
}
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[^\(keyPrefix)]"
}
}
/**
* Evaluator for attribute name/value matching
*/
public final class AttributeWithValue : AttributeKeyPair {
public final class AttributeWithValue: AttributeKeyPair {
public override init(_ key: String, _ value: String)throws {
try super.init(key, value)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool
{
if(element.hasAttr(key)){
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if(element.hasAttr(key)) {
let s = try element.attr(key)
return value.equalsIgnoreCase(string: s.trim())
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)=\(value)]"
}
}
/**
* Evaluator for attribute name != value matching
*/
public final class AttributeWithValueNot : AttributeKeyPair {
public final class AttributeWithValueNot: AttributeKeyPair {
public override init(_ key: String, _ value: String)throws {
try super.init(key, value)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let s = try element.attr(key)
return !value.equalsIgnoreCase(string: s)
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)!=\(value)]"
}
}
/**
* Evaluator for attribute name/value matching (value prefix)
*/
public final class AttributeWithValueStarting : AttributeKeyPair {
public final class AttributeWithValueStarting: AttributeKeyPair {
public override init(_ key: String, _ value: String)throws {
try super.init(key, value)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if(element.hasAttr(key)){
if(element.hasAttr(key)) {
return try element.attr(key).lowercased().hasPrefix(value) // value is lower case already
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)^=\(value)]"
}
}
/**
* Evaluator for attribute name/value matching (value ending)
*/
public final class AttributeWithValueEnding : AttributeKeyPair {
public final class AttributeWithValueEnding: AttributeKeyPair {
public override init(_ key: String, _ value: String)throws {
try super.init(key, value)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if(element.hasAttr(key)){
if(element.hasAttr(key)) {
return try element.attr(key).lowercased().hasSuffix(value) // value is lower case
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)$=\(value)]"
}
}
/**
* Evaluator for attribute name/value matching (value containing)
*/
public final class AttributeWithValueContaining : AttributeKeyPair {
public final class AttributeWithValueContaining: AttributeKeyPair {
public override init(_ key: String, _ value: String)throws {
try super.init(key, value)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if(element.hasAttr(key)){
if(element.hasAttr(key)) {
return try element.attr(key).lowercased().contains(value) // value is lower case
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)*=\(value)]"
}
}
/**
* Evaluator for attribute name/value matching (value regex matching)
*/
public final class AttributeWithValueMatching : Evaluator {
let key : String
let pattern : Pattern
public final class AttributeWithValueMatching: Evaluator {
let key: String
let pattern: Pattern
public init(_ key: String, _ pattern: Pattern) {
self.key = key.trim().lowercased()
self.pattern = pattern
super.init()
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if(element.hasAttr(key)){
if(element.hasAttr(key)) {
let s = try element.attr(key)
return pattern.matcher(in:s).find()
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return "[\(key)~=\(pattern.toString())]"
}
}
/**
* Abstract evaluator for attribute name/value matching
*/
public class AttributeKeyPair : Evaluator {
let key : String
var value : String
public class AttributeKeyPair: Evaluator {
let key: String
var value: String
public init(_ key: String, _ value2: String)throws {
var value2 = value2
try Validate.notEmpty(string: key)
try Validate.notEmpty(string: value2)
self.key = key.trim().lowercased()
if (value2.startsWith("\"") && value2.hasSuffix("\"") || value2.startsWith("'") && value2.hasSuffix("'"))
{
if (value2.startsWith("\"") && value2.hasSuffix("\"") || value2.startsWith("'") && value2.hasSuffix("'")) {
value2 = value2.substring(1, value2.characters.count-2)
}
self.value = value2.trim().lowercased()
}
open override func matches(_ root: Element, _ element: Element)throws->Bool{
open override func matches(_ root: Element, _ element: Element)throws->Bool {
preconditionFailure("self method must be overridden")
}
}
/**
* Evaluator for any / all element matching
*/
public final class AllElements : Evaluator {
public final class AllElements: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return true
}
open override func toString()->String {
open override func toString() -> String {
return "*"
}
}
/**
* Evaluator for matching by sibling index number (e {@literal <} idx)
*/
public final class IndexLessThan : IndexEvaluator {
public final class IndexLessThan: IndexEvaluator {
public override init(_ index: Int) {
super.init(index)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return try element.elementSiblingIndex() < index
}
open override func toString()->String {
open override func toString() -> String {
return ":lt(\(index))"
}
}
/**
* Evaluator for matching by sibling index number (e {@literal >} idx)
*/
public final class IndexGreaterThan : IndexEvaluator {
public final class IndexGreaterThan: IndexEvaluator {
public override init(_ index: Int) {
super.init(index)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return try element.elementSiblingIndex() > index
}
open override func toString()->String {
open override func toString() -> String {
return ":gt(\(index))"
}
}
/**
* Evaluator for matching by sibling index number (e = idx)
*/
public final class IndexEquals : IndexEvaluator {
public final class IndexEquals: IndexEvaluator {
public override init(_ index: Int) {
super.init(index)
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return try element.elementSiblingIndex() == index
}
open override func toString()->String {
open override func toString() -> String {
return ":eq(\(index))"
}
}
/**
* Evaluator for matching the last sibling (css :last-child)
*/
public final class IsLastChild : Evaluator {
public final class IsLastChild: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
if let p = element.parent(){
if let p = element.parent() {
let i = try element.elementSiblingIndex()
return !((p as? Document) != nil) && i == (p.getChildNodes().count - 1)
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return ":last-child"
}
}
public final class IsFirstOfType : IsNthOfType {
public final class IsFirstOfType: IsNthOfType {
public init() {
super.init(0,1)
super.init(0, 1)
}
open override func toString()->String {
open override func toString() -> String {
return ":first-of-type"
}
}
public final class IsLastOfType : IsNthLastOfType {
public final class IsLastOfType: IsNthLastOfType {
public init() {
super.init(0,1)
super.init(0, 1)
}
open override func toString()->String {
open override func toString() -> String {
return ":last-of-type"
}
}
public class CssNthEvaluator : Evaluator {
public let a : Int
public let b : Int
public class CssNthEvaluator: Evaluator {
public let a: Int
public let b: Int
public init(_ a: Int, _ b: Int) {
self.a = a
self.b = b
@ -436,297 +427,291 @@ public class Evaluator {
self.a = 0
self.b = b
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let p: Element? = element.parent()
if (p == nil || (((p as? Document) != nil))) {return false}
let pos: Int = try calculatePosition(root, element)
if (a == 0) {return pos == b}
return (pos-b)*a >= 0 && (pos-b)%a==0
}
open override func toString()->String {
if (a == 0){
open override func toString() -> String {
if (a == 0) {
return ":\(getPseudoClass)(\(b))"
}
if (b == 0){
if (b == 0) {
return ":\(getPseudoClass)(\(a))"
}
return ":\(getPseudoClass)(\(a)\(b))"
}
open func getPseudoClass()->String{
open func getPseudoClass() -> String {
preconditionFailure("self method must be overridden")
}
open func calculatePosition(_ root: Element, _ element: Element)throws->Int{
open func calculatePosition(_ root: Element, _ element: Element)throws->Int {
preconditionFailure("self method must be overridden")
}
}
/**
* css-compatible Evaluator for :eq (css :nth-child)
*
* @see IndexEquals
*/
public final class IsNthChild : CssNthEvaluator {
public final class IsNthChild: CssNthEvaluator {
public override init(_ a: Int, _ b: Int) {
super.init(a,b)
super.init(a, b)
}
open override func calculatePosition(_ root: Element, _ element: Element)throws->Int {
return try element.elementSiblingIndex()+1
}
open override func getPseudoClass()->String {
open override func getPseudoClass() -> String {
return "nth-child"
}
}
/**
* css pseudo class :nth-last-child)
*
* @see IndexEquals
*/
public final class IsNthLastChild : CssNthEvaluator {
public final class IsNthLastChild: CssNthEvaluator {
public override init(_ a: Int, _ b: Int) {
super.init(a,b)
super.init(a, b)
}
open override func calculatePosition(_ root: Element, _ element: Element)throws->Int
{
open override func calculatePosition(_ root: Element, _ element: Element)throws->Int {
var i = 0
if let l = element.parent(){
if let l = element.parent() {
i = l.children().array().count
}
return i - (try element.elementSiblingIndex())
}
open override func getPseudoClass()->String {
open override func getPseudoClass() -> String {
return "nth-last-child"
}
}
/**
* css pseudo class nth-of-type
*
*/
public class IsNthOfType : CssNthEvaluator {
public class IsNthOfType: CssNthEvaluator {
public override init(_ a: Int, _ b: Int) {
super.init(a,b)
super.init(a, b)
}
open override func calculatePosition(_ root: Element, _ element: Element)->Int {
open override func calculatePosition(_ root: Element, _ element: Element) -> Int {
var pos = 0
let family: Elements? = element.parent()?.children()
if let array = family?.array(){
for el in array
{
if let array = family?.array() {
for el in array {
if (el.tag() == element.tag()) {pos+=1}
if (el === element) {break}
}
}
return pos
}
open override func getPseudoClass()->String {
open override func getPseudoClass() -> String {
return "nth-of-type"
}
}
public class IsNthLastOfType : CssNthEvaluator {
public class IsNthLastOfType: CssNthEvaluator {
public override init(_ a: Int, _ b: Int) {
super.init(a, b)
}
open override func calculatePosition(_ root: Element, _ element: Element)throws->Int {
var pos = 0
if let family = element.parent()?.children(){
if let family = element.parent()?.children() {
let x = try element.elementSiblingIndex()
for i in x..<family.array().count {
if (family.get(i).tag() == element.tag()){
if (family.get(i).tag() == element.tag()) {
pos+=1
}
}
}
return pos
}
open override func getPseudoClass()->String {
open override func getPseudoClass() -> String {
return "nth-last-of-type"
}
}
/**
* Evaluator for matching the first sibling (css :first-child)
*/
public final class IsFirstChild : Evaluator {
public final class IsFirstChild: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let p = element.parent()
if(p != nil && !(((p as? Document) != nil))){
if(p != nil && !(((p as? Document) != nil))) {
return (try element.elementSiblingIndex()) == 0
}
return false
}
open override func toString()->String {
open override func toString() -> String {
return ":first-child"
}
}
/**
* css3 pseudo-class :root
* @see <a href="http://www.w3.org/TR/selectors/#root-pseudo">:root selector</a>
*
*/
public final class IsRoot : Evaluator {
public final class IsRoot: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let r: Element = ((root as? Document) != nil) ? root.child(0) : root
return element === r
}
open override func toString()->String {
open override func toString() -> String {
return ":root"
}
}
public final class IsOnlyChild : Evaluator {
public final class IsOnlyChild: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let p = element.parent()
return p != nil && !((p as? Document) != nil) && element.siblingElements().array().count == 0
}
open override func toString()->String {
open override func toString() -> String {
return ":only-child"
}
}
public final class IsOnlyOfType : Evaluator {
public final class IsOnlyOfType: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let p = element.parent()
if (p == nil || (p as? Document) != nil) {return false}
var pos = 0
if let family = p?.children().array(){
if let family = p?.children().array() {
for el in family {
if (el.tag() == element.tag()) {pos+=1}
}
}
return pos == 1
}
open override func toString()->String {
open override func toString() -> String {
return ":only-of-type"
}
}
public final class IsEmpty : Evaluator {
public final class IsEmpty: Evaluator {
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let family: Array<Node> = element.getChildNodes()
for n in family
{
for n in family {
if (!((n as? Comment) != nil || (n as? XmlDeclaration) != nil || (n as? DocumentType) != nil)) {return false}
}
return true
}
open override func toString()->String {
open override func toString() -> String {
return ":empty"
}
}
/**
* Abstract evaluator for sibling index matching
*
* @author ant
*/
public class IndexEvaluator : Evaluator {
public class IndexEvaluator: Evaluator {
let index: Int
public init(_ index: Int) {
self.index = index
}
}
/**
* Evaluator for matching Element (and its descendants) text
*/
public final class ContainsText : Evaluator {
public final class ContainsText: Evaluator {
private let searchText: String
public init(_ searchText: String) {
self.searchText = searchText.lowercased()
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return (try element.text().lowercased().contains(searchText))
}
open override func toString()->String {
open override func toString() -> String {
return ":contains(\(searchText)"
}
}
/**
* Evaluator for matching Element's own text
*/
public final class ContainsOwnText : Evaluator {
private let searchText : String
public final class ContainsOwnText: Evaluator {
private let searchText: String
public init(_ searchText: String) {
self.searchText = searchText.lowercased()
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
return (element.ownText().lowercased().contains(searchText))
}
open override func toString()->String {
open override func toString() -> String {
return ":containsOwn(\(searchText)"
}
}
/**
* Evaluator for matching Element (and its descendants) text with regex
*/
public final class Matches : Evaluator {
public final class Matches: Evaluator {
private let pattern: Pattern
public init(_ pattern: Pattern) {
self.pattern = pattern
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let m = try pattern.matcher(in: element.text())
return m.find()
}
open override func toString()->String {
open override func toString() -> String {
return ":matches(\(pattern)"
}
}
/**
* Evaluator for matching Element's own text with regex
*/
public final class MatchesOwn : Evaluator {
public final class MatchesOwn: Evaluator {
private let pattern: Pattern
public init(_ pattern: Pattern) {
self.pattern = pattern
}
open override func matches(_ root: Element, _ element: Element)throws->Bool {
let m = pattern.matcher(in: element.ownText())
return m.find()
}
open override func toString()->String {
open override func toString() -> String {
return ":matchesOwn(\(pattern.toString())"
}
}

View File

@ -17,8 +17,6 @@ public enum ExceptionType {
case SelectorParseException
}
public enum Exception : Error {
case Error(type:ExceptionType ,Message: String)
public enum Exception: Error {
case Error(type:ExceptionType, Message: String)
}

View File

@ -12,9 +12,9 @@ import Foundation
* A HTML Form Element provides ready access to the form fields/controls that are associated with it. It also allows a
* form to easily be submitted.
*/
public class FormElement : Element {
public class FormElement: Element {
private let _elements: Elements = Elements()
/**
* Create a new, standalone form element.
*
@ -25,26 +25,26 @@ public class FormElement : Element {
public override init(_ tag: Tag, _ baseUri: String, _ attributes: Attributes) {
super.init(tag, baseUri, attributes)
}
/**
* Get the list of form control elements associated with this form.
* @return form controls associated with this element.
*/
public func elements()->Elements {
public func elements() -> Elements {
return _elements
}
/**
* Add a form control element to this form.
* @param element form control to add
* @return this form element, for chaining
*/
@discardableResult
public func addElement(_ element: Element)->FormElement {
public func addElement(_ element: Element) -> FormElement {
_elements.add(element)
return self
}
//todo:
/**
* Prepare to submit this form. A Connection object is created with the request set up from the form values. You
@ -63,7 +63,7 @@ public class FormElement : Element {
// .data(formData())
// .method(method)
// }
//todo:
/**
* Get the data that this form submits. The returned list is a copy of the data, and changes to the contents of the
@ -105,24 +105,21 @@ public class FormElement : Element {
// }
// return data;
// }
public override func copy(with zone: NSZone? = nil) -> Any
{
let clone = FormElement(_tag,baseUri!,attributes!)
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = FormElement(_tag, baseUri!, attributes!)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
let clone = FormElement(_tag,baseUri!,attributes!)
return copy(clone: clone,parent: parent)
public override func copy(parent: Node?) -> Node {
let clone = FormElement(_tag, baseUri!, attributes!)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
public override func copy(clone: Node, parent: Node?) -> Node {
let clone = clone as! FormElement
for att in _elements.array(){
for att in _elements.array() {
clone._elements.add(att)
}
return super.copy(clone: clone,parent: parent)
return super.copy(clone: clone, parent: parent)
}
}

View File

@ -27,10 +27,10 @@ class HtmlTreeBuilder: TreeBuilder {
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"]
private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state
private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state
private var baseUriSetFromDoc: Bool = false
private var headElement: Element? // the current head element
private var formElement: FormElement? // the current form element
@ -38,27 +38,25 @@ class HtmlTreeBuilder: TreeBuilder {
private var formattingElements: Array<Element?> = Array<Element?>() // active (open) formatting elements
private var pendingTableCharacters: Array<String> = Array<String>() // chars in table to be shifted out
private var emptyEnd: Token.EndTag = Token.EndTag() // reused empty end tag
private var _framesetOk: Bool = true // if ok to go into frameset
private var fosterInserts: Bool = false // if next inserts should be fostered
private var fragmentParsing: Bool = false // if parsing a fragment of html
public override init() {
super.init()
}
public override func defaultSettings()->ParseSettings {
public override func defaultSettings() -> ParseSettings {
return ParseSettings.htmlDefault
}
override func parse(_ input: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Document {
_state = HtmlTreeBuilderState.Initial
baseUriSetFromDoc = false
return try super.parse(input, baseUri, errors, settings)
}
func parseFragment(_ inputFragment: String, _ context: Element?, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Array<Node> {
// context may be null
_state = HtmlTreeBuilderState.Initial
@ -66,103 +64,103 @@ class HtmlTreeBuilder: TreeBuilder {
contextElement = context
fragmentParsing = true
var root: Element? = nil
if let context = context {
if let d = context.ownerDocument() { // quirks setup:
doc.quirksMode(d.quirksMode())
}
// initialise the tokeniser state:
let contextTag: String = context.tagName()
if (StringUtil.inString(contextTag, haystack: "title", "textarea")){
if (StringUtil.inString(contextTag, haystack: "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata)
}else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")){
} else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext)
}else if (contextTag=="script"){
} else if (contextTag=="script") {
tokeniser.transition(TokeniserState.ScriptData)
}else if (contextTag==("noscript")){
} else if (contextTag==("noscript")) {
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
}else if (contextTag=="plaintext"){
} else if (contextTag=="plaintext") {
tokeniser.transition(TokeniserState.Data)
}else{
} else {
tokeniser.transition(TokeniserState.Data) // default
}
root = try Element(Tag.valueOf("html", settings), baseUri)
try Validate.notNull(obj:root)
try doc.appendChild(root!)
stack.append(root!)
resetInsertionMode()
// setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
// with form correctly
let contextChain: Elements = context.parents()
contextChain.add(0, context)
for parent:Element in contextChain.array() {
for parent: Element in contextChain.array() {
if let x = (parent as? FormElement) {
formElement = x
break
}
}
}
try runParser()
if (context != nil && root != nil){
if (context != nil && root != nil) {
return root!.getChildNodes()
}else{
} else {
return doc.getChildNodes()
}
}
@discardableResult
public override func process(_ token: Token)throws->Bool{
public override func process(_ token: Token)throws->Bool {
currentToken = token
return try self._state.process(token, self)
}
@discardableResult
func process(_ token: Token, _ state: HtmlTreeBuilderState)throws->Bool {
currentToken = token
return try state.process(token, self)
}
func transition(_ state: HtmlTreeBuilderState) {
self._state = state
}
func state()->HtmlTreeBuilderState {
func state() -> HtmlTreeBuilderState {
return _state
}
func markInsertionMode() {
_originalState = _state
}
func originalState()->HtmlTreeBuilderState {
func originalState() -> HtmlTreeBuilderState {
return _originalState
}
func framesetOk(_ framesetOk: Bool) {
self._framesetOk = framesetOk
}
func framesetOk()->Bool {
func framesetOk() -> Bool {
return _framesetOk
}
func getDocument()->Document {
func getDocument() -> Document {
return doc
}
func getBaseUri()->String {
func getBaseUri() -> String {
return baseUri
}
func maybeSetBaseUri(_ base: Element)throws {
if (baseUriSetFromDoc){ // only listen to the first <base href> in parse
if (baseUriSetFromDoc) { // only listen to the first <base href> in parse
return
}
let href: String = try base.absUrl("href")
if (href.characters.count != 0) { // ignore <base target> etc
baseUri = href
@ -170,17 +168,17 @@ class HtmlTreeBuilder: TreeBuilder {
try doc.setBaseUri(href) // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
}
}
func isFragmentParsing()->Bool {
func isFragmentParsing() -> Bool {
return fragmentParsing
}
func error(_ state: HtmlTreeBuilderState) {
if (errors.canAddError() && currentToken != nil){
if (errors.canAddError() && currentToken != nil) {
errors.add(ParseError(reader.getPos(), "Unexpected token [\(currentToken!.tokenType())] when in state [\(state.rawValue)]"))
}
}
@discardableResult
func insert(_ startTag: Token.StartTag)throws->Element {
// handle empty unknown tags
@ -197,19 +195,19 @@ class HtmlTreeBuilder: TreeBuilder {
try insert(el)
return el
}
@discardableResult
func insertStartTag(_ startTagName: String)throws->Element {
let el: Element = try Element(Tag.valueOf(startTagName, settings), baseUri)
try insert(el)
return el
}
func insert(_ el: Element)throws {
try insertNode(el)
stack.append(el)
}
@discardableResult
func insertEmpty(_ startTag: Token.StartTag)throws->Element {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
@ -227,7 +225,7 @@ class HtmlTreeBuilder: TreeBuilder {
}
return el
}
@discardableResult
func insertForm(_ startTag: Token.StartTag, _ onStack: Bool)throws->FormElement {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
@ -235,71 +233,71 @@ class HtmlTreeBuilder: TreeBuilder {
let el: FormElement = FormElement(tag, baseUri, startTag._attributes)
setFormElement(el)
try insertNode(el)
if (onStack){
if (onStack) {
stack.append(el)
}
return el
}
func insert(_ commentToken: Token.Comment)throws {
let comment: Comment = Comment(commentToken.getData(), baseUri)
try insertNode(comment)
}
func insert(_ characterToken: Token.Char)throws {
var node: Node
// characters in script and style go in as datanodes, not text nodes
let tagName: String? = currentElement()?.tagName()
if (tagName=="script" || tagName=="style"){
if (tagName=="script" || tagName=="style") {
try Validate.notNull(obj: characterToken.getData())
node = DataNode(characterToken.getData()!, baseUri)
}else{
} else {
try Validate.notNull(obj: characterToken.getData())
node = TextNode(characterToken.getData()!, baseUri)
}
try currentElement()?.appendChild(node) // doesn't use insertNode, because we don't foster these; and will always have a stack.
}
private func insertNode(_ node: Node)throws {
// if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
if (stack.count == 0){
if (stack.count == 0) {
try doc.appendChild(node)
}else if (isFosterInserts()){
} else if (isFosterInserts()) {
try insertInFosterParent(node)
}else{
} else {
try currentElement()?.appendChild(node)
}
// connect form controls to their form element
if let n = (node as? Element) {
if(n.tag().isFormListed()){
if ( formElement != nil){
if(n.tag().isFormListed()) {
if ( formElement != nil) {
formElement!.addElement(n)
}
}
}
}
@discardableResult
func pop()->Element {
func pop() -> Element {
let size: Int = stack.count
return stack.remove(at: size-1)
}
func push(_ element: Element) {
stack.append(element)
}
func getStack()->Array<Element> {
return stack
}
@discardableResult
func onStack(_ el: Element)->Bool {
func onStack(_ el: Element) -> Bool {
return isElementInQueue(stack, el)
}
private func isElementInQueue(_ queue: Array<Element?>, _ element: Element?)->Bool {
private func isElementInQueue(_ queue: Array<Element?>, _ element: Element?) -> Bool {
for pos in (0..<queue.count).reversed() {
let next: Element? = queue[pos]
if (next == element) {
@ -308,8 +306,8 @@ class HtmlTreeBuilder: TreeBuilder {
}
return false
}
func getFromStack(_ elName: String)->Element? {
func getFromStack(_ elName: String) -> Element? {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if next.nodeName() == elName {
@ -318,9 +316,9 @@ class HtmlTreeBuilder: TreeBuilder {
}
return nil
}
@discardableResult
func removeFromStack(_ el: Element)->Bool {
func removeFromStack(_ el: Element) -> Bool {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (next == el) {
@ -330,17 +328,17 @@ class HtmlTreeBuilder: TreeBuilder {
}
return false
}
func popStackToClose(_ elName: String) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (next.nodeName() == elName){
if (next.nodeName() == elName) {
break
}
}
}
func popStackToClose(_ elNames: String...) {
popStackToClose(elNames)
}
@ -348,12 +346,12 @@ class HtmlTreeBuilder: TreeBuilder {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (StringUtil.inString(next.nodeName(),elNames)){
if (StringUtil.inString(next.nodeName(), elNames)) {
break
}
}
}
func popStackToBefore(_ elName: String) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
@ -364,34 +362,34 @@ class HtmlTreeBuilder: TreeBuilder {
}
}
}
func clearStackToTableContext() {
clearStackToContext("table")
}
func clearStackToTableBodyContext() {
clearStackToContext("tbody", "tfoot", "thead")
}
func clearStackToTableRowContext() {
clearStackToContext("tr")
}
private func clearStackToContext(_ nodeNames: String...) {
clearStackToContext(nodeNames)
}
private func clearStackToContext(_ nodeNames: [String]) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html"){
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html") {
break
}else{
} else {
stack.remove(at: pos)
}
}
}
func aboveOnStack(_ el: Element)->Element? {
func aboveOnStack(_ el: Element) -> Element? {
//assert(onStack(el), "Invalid parameter")
onStack(el)
for pos in (0..<stack.count).reversed() {
@ -402,17 +400,17 @@ class HtmlTreeBuilder: TreeBuilder {
}
return nil
}
func insertOnStackAfter(_ after: Element, _ input: Element)throws {
let i: Int = stack.lastIndexOf(after)
try Validate.isTrue(val: i != -1)
stack.insert(input, at: i + 1 )
}
func replaceOnStack(_ out: Element, _ input: Element)throws {
try stack = replaceInQueue(stack, out, input)
}
private func replaceInQueue(_ queue: Array<Element>, _ out: Element, _ input: Element)throws->Array<Element> {
var queue = queue
let i: Int = queue.lastIndexOf(out)
@ -420,7 +418,7 @@ class HtmlTreeBuilder: TreeBuilder {
queue[i] = input
return queue
}
func resetInsertionMode() {
var last = false
for pos in (0..<stack.count).reversed() {
@ -470,110 +468,110 @@ class HtmlTreeBuilder: TreeBuilder {
}
}
}
// todo: tidy up in specific scope methods
private var specificScopeTarget: [String?] = [nil]
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
specificScopeTarget[0] = targetName
return try inSpecificScope(specificScopeTarget, baseTypes, extraTypes)
}
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String] , _ extraTypes: [String]?)throws->Bool {
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (StringUtil.inString(elName, targetNames)){
if (StringUtil.inString(elName, targetNames)) {
return true
}
if (StringUtil.inString(elName, baseTypes)){
if (StringUtil.inString(elName, baseTypes)) {
return false
}
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)){
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)) {
return false
}
}
try Validate.fail(msg: "Should not be reachable")
return false
}
func inScope(_ targetNames: [String])throws->Bool {
return try inSpecificScope(targetNames, HtmlTreeBuilder.TagsSearchInScope, nil)
}
func inScope(_ targetName: String)throws->Bool {
return try inScope(targetName, nil)
}
func inScope(_ targetName: String, _ extras: [String]?)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagsSearchInScope, extras)
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}
func inListItemScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchList)
}
func inButtonScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchButton)
}
func inTableScope(_ targetName: String)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagSearchTableScope, nil)
}
func inSelectScope(_ targetName: String)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (elName.equals(targetName)){
if (elName.equals(targetName)) {
return true
}
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)){ // all elements except
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)) { // all elements except
return false
}
}
try Validate.fail(msg: "Should not be reachable")
return false
}
func setHeadElement(_ headElement: Element) {
self.headElement = headElement
}
func getHeadElement()->Element? {
func getHeadElement() -> Element? {
return headElement
}
func isFosterInserts()->Bool {
func isFosterInserts() -> Bool {
return fosterInserts
}
func setFosterInserts(_ fosterInserts: Bool) {
self.fosterInserts = fosterInserts
}
func getFormElement()->FormElement? {
func getFormElement() -> FormElement? {
return formElement
}
func setFormElement(_ formElement: FormElement?) {
self.formElement = formElement
}
func newPendingTableCharacters() {
pendingTableCharacters = Array<String>()
}
func getPendingTableCharacters()->Array<String> {
return pendingTableCharacters
}
func setPendingTableCharacters(_ pendingTableCharacters: Array<String>) {
self.pendingTableCharacters = pendingTableCharacters
}
/**
11.2.5.2 Closing elements that have implied end tags<p/>
When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
@ -585,50 +583,50 @@ class HtmlTreeBuilder: TreeBuilder {
*/
func generateImpliedEndTags(_ excludeTag: String?) {
while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)){
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
pop()
}
}
func generateImpliedEndTags() {
generateImpliedEndTags(nil)
}
func isSpecial(_ el: Element)->Bool {
func isSpecial(_ el: Element) -> Bool {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
let name: String = el.nodeName()
return StringUtil.inString(name, HtmlTreeBuilder.TagSearchSpecial)
}
func lastFormattingElement()->Element? {
func lastFormattingElement() -> Element? {
return formattingElements.count > 0 ? formattingElements[formattingElements.count-1] : nil
}
func removeLastFormattingElement()->Element? {
func removeLastFormattingElement() -> Element? {
let size: Int = formattingElements.count
if (size > 0){
if (size > 0) {
return formattingElements.remove(at: size-1)
}else{
} else {
return nil
}
}
// active formatting elements
func pushActiveFormattingElements(_ input: Element) {
var numSeen: Int = 0
for pos in (0..<formattingElements.count).reversed() {
let el: Element? = formattingElements[pos]
if (el == nil){ // marker
if (el == nil) { // marker
break
}
if (isSameFormattingElement(input, el!)){
if (isSameFormattingElement(input, el!)) {
numSeen += 1
}
if (numSeen == 3) {
formattingElements.remove(at: pos)
break
@ -636,29 +634,28 @@ class HtmlTreeBuilder: TreeBuilder {
}
formattingElements.append(input)
}
private func isSameFormattingElement(_ a: Element, _ b: Element)->Bool {
private func isSameFormattingElement(_ a: Element, _ b: Element) -> Bool {
// same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
if(a.attributes == nil){
if(a.attributes == nil) {
return false
}
return a.nodeName().equals(b.nodeName()) &&
// a.namespace().equals(b.namespace()) &&
a.getAttributes()!.equals(o: b.getAttributes())
// todo: namespaces
}
func reconstructFormattingElements()throws {
let last: Element? = lastFormattingElement()
if (last == nil || onStack(last!)){
if (last == nil || onStack(last!)) {
return
}
var entry:Element? = last
let size:Int = formattingElements.count
var pos:Int = size - 1
var entry: Element? = last
let size: Int = formattingElements.count
var pos: Int = size - 1
var skip: Bool = false
while (true) {
if (pos == 0) { // step 4. if none before, skip to 8
@ -677,31 +674,31 @@ class HtmlTreeBuilder: TreeBuilder {
entry = formattingElements[pos]
}
try Validate.notNull(obj: entry) // should not occur, as we break at last element
// 8. create new element from element, 9 insert into current node, onto stack
skip = false // can only skip increment from 4.
let newEl: Element = try insertStartTag(entry!.nodeName()) // todo: avoid fostering here?
// newEl.namespace(entry.namespace()) // todo: namespaces
newEl.getAttributes()?.addAll(incoming: entry!.getAttributes())
// 10. replace entry with new entry
formattingElements[pos] = newEl
// 11
if (pos == size-1) // if not last entry in list, jump to 7
{break}
}
}
func clearFormattingElementsToLastMarker() {
while (!formattingElements.isEmpty) {
let el: Element? = removeLastFormattingElement()
if (el == nil){
if (el == nil) {
break
}
}
}
func removeFromActiveFormattingElements(_ el: Element?) {
for pos in (0..<formattingElements.count).reversed() {
let next: Element? = formattingElements[pos]
@ -711,36 +708,31 @@ class HtmlTreeBuilder: TreeBuilder {
}
}
}
func isInActiveFormattingElements(_ el: Element)->Bool {
func isInActiveFormattingElements(_ el: Element) -> Bool {
return isElementInQueue(formattingElements, el)
}
func getActiveFormattingElement(_ nodeName: String)->Element? {
func getActiveFormattingElement(_ nodeName: String) -> Element? {
for pos in (0..<formattingElements.count).reversed() {
let next: Element? = formattingElements[pos]
if (next == nil){ // scope marker
if (next == nil) { // scope marker
break
}else if (next!.nodeName().equals(nodeName)){
} else if (next!.nodeName().equals(nodeName)) {
return next
}
}
return nil
}
func replaceActiveFormattingElement(_ out: Element, _ input: Element)throws {
try formattingElements = replaceInQueue(formattingElements as! Array<Element>, out, input)//todo: testare as! non è bello
}
func insertMarkerToFormattingElements() {
formattingElements.append(nil)
}
func insertInFosterParent(_ input: Node)throws {
let fosterParent: Element?
let lastTable: Element? = getFromStack("table")
@ -749,18 +741,17 @@ class HtmlTreeBuilder: TreeBuilder {
if (lastTable.parent() != nil) {
fosterParent = lastTable.parent()!
isLastTableParent = true
} else{
} else {
fosterParent = aboveOnStack(lastTable)
}
} else { // no table == frag
fosterParent = stack[0]
}
if (isLastTableParent) {
try Validate.notNull(obj: lastTable) // last table cannot be null by this point.
try lastTable!.before(input)
}
else{
} else {
try fosterParent?.appendChild(input)
}
}

View File

@ -12,8 +12,7 @@ protocol HtmlTreeBuilderStateProtocol {
func process(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool
}
enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
{
enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
case Initial
case BeforeHtml
case BeforeHead
@ -37,16 +36,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
case AfterAfterBody
case AfterAfterFrameset
case ForeignContent
private static let nullString: String = "\u{0000}"
public func equals(_ s: HtmlTreeBuilderState)->Bool
{
public func equals(_ s: HtmlTreeBuilderState) -> Bool {
return self.hashValue == s.hashValue
}
func process(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool
{
func process(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
switch self {
case .Initial:
if (HtmlTreeBuilderState.isWhitespace(t)) {
@ -60,7 +57,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
let doctype: DocumentType = DocumentType(
tb.settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri())
try tb.getDocument().appendChild(doctype)
if (d.isForceQuirks()){
if (d.isForceQuirks()) {
tb.getDocument().quirksMode(Document.QuirksMode.quirks)
}
tb.transition(.BeforeHtml)
@ -71,13 +68,13 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .BeforeHtml:
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
try tb.insertStartTag("html")
tb.transition(.BeforeHead)
return try tb.process(t)
}
if (t.isDoctype()) {
tb.error(self)
return false
@ -127,7 +124,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.processEndTag("head")
return try tb.process(t)
}
if (HtmlTreeBuilderState.isWhitespace(t)) {
try tb.insert(t.asCharacter())
return true
@ -147,7 +144,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "command", "link")) {
let el: Element = try tb.insertEmpty(start)
// jsoup special: update base the frist time it is seen
if (name.equals("base") && el.hasAttr("href")){
if (name.equals("base") && el.hasAttr("href")) {
try tb.maybeSetBaseUri(el)
}
} else if (name.equals("meta")) {
@ -163,7 +160,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.transition(.InHeadNoscript)
} else if (name.equals("script")) {
// skips some script rules as won't execute them
tb.tokeniser.transition(TokeniserState.ScriptData)
tb.markInsertionMode()
tb.transition(.Text)
@ -224,7 +221,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.framesetOk(true)
return try tb.process(t)
}
if (HtmlTreeBuilderState.isWhitespace(t)) {
try tb.insert(t.asCharacter())
} else if (t.isComment()) {
@ -267,15 +264,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .InBody:
func anyOtherEndTag(_ t: Token, _ tb: HtmlTreeBuilder)->Bool
{
func anyOtherEndTag(_ t: Token, _ tb: HtmlTreeBuilder) -> Bool {
let name: String? = t.asEndTag().normalName()
let stack: Array<Element> = tb.getStack()
for pos in (0..<stack.count).reversed(){
for pos in (0..<stack.count).reversed() {
let node: Element = stack[pos]
if (name != nil && node.nodeName().equals(name!)) {
tb.generateImpliedEndTags(name)
if (!name!.equals((tb.currentElement()?.nodeName())!)){
if (!name!.equals((tb.currentElement()?.nodeName())!)) {
tb.error(self)
}
tb.popStackToClose(name!)
@ -289,9 +285,8 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
}
switch (t.type)
{
switch (t.type) {
case Token.TokenType.Char:
let c: Token.Char = t.asCharacter()
if (c.getData() != nil && c.getData()!.equals(HtmlTreeBuilderState.nullString)) {
@ -315,13 +310,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
case Token.TokenType.StartTag:
let startTag: Token.StartTag = t.asStartTag()
if let name: String = startTag.normalName()
{
if let name: String = startTag.normalName() {
if (name.equals("a")) {
if (tb.getActiveFormattingElement("a") != nil) {
tb.error(self)
try tb.processEndTag("a")
// still on stack?
let remainingA: Element? = tb.getFromStack("a")
if (remainingA != nil) {
@ -354,7 +348,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.processEndTag("li")
break
}
if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), haystack: Constants.InBodyStartLiBreakers)){
if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), haystack: Constants.InBodyStartLiBreakers)) {
break
}
}
@ -367,7 +361,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
// merge attributes onto real html
let html: Element = tb.getStack()[0]
for attribute in startTag.getAttributes().iterator() {
if (!html.hasAttr(attribute.getKey())){
if (!html.hasAttr(attribute.getKey())) {
html.getAttributes()?.put(attribute: attribute)
}
}
@ -375,7 +369,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return try tb.process(t, .InHead)
} else if (name.equals("body")) {
tb.error(self)
let stack : Array<Element> = tb.getStack()
let stack: Array<Element> = tb.getStack()
if (stack.count == 1 || (stack.count > 2 && !stack[1].nodeName().equals("body"))) {
// only in fragment case
return false // ignore
@ -383,7 +377,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.framesetOk(false)
let body: Element = stack[1]
for attribute: Attribute in startTag.getAttributes().iterator() {
if (!body.hasAttr(attribute.getKey())){
if (!body.hasAttr(attribute.getKey())) {
body.getAttributes()?.put(attribute: attribute)
}
}
@ -398,11 +392,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false // ignore frameset
} else {
let second: Element = stack[1]
if (second.parent() != nil){
if (second.parent() != nil) {
try second.remove()
}
// pop up to html element
while (stack.count > 1){
while (stack.count > 1) {
stack.remove(at: stack.count-1)
}
try tb.insert(startTag)
@ -435,14 +429,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.insertForm(startTag, true)
} else if (StringUtil.inSorted(name, haystack: Constants.DdDt)) {
tb.framesetOk(false)
let stack:Array<Element> = tb.getStack()
let stack: Array<Element> = tb.getStack()
for i in (1..<stack.count).reversed() {
let el: Element = stack[i]
if (StringUtil.inSorted(el.nodeName(), haystack: Constants.DdDt)) {
try tb.processEndTag(el.nodeName())
break
}
if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), haystack: Constants.InBodyStartLiBreakers)){
if (tb.isSpecial(el) && !StringUtil.inSorted(el.nodeName(), haystack: Constants.InBodyStartLiBreakers)) {
break
}
}
@ -495,7 +489,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else if (name.equals("input")) {
try tb.reconstructFormattingElements()
let el: Element = try tb.insertEmpty(startTag)
if (try !el.attr("type").equalsIgnoreCase(string: "hidden")){
if (try !el.attr("type").equalsIgnoreCase(string: "hidden")) {
tb.framesetOk(false)
}
} else if (StringUtil.inSorted(name, haystack: Constants.InBodyStartMedia)) {
@ -507,22 +501,22 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.insertEmpty(startTag)
tb.framesetOk(false)
} else if (name.equals("image")) {
if (tb.getFromStack("svg") == nil){
if (tb.getFromStack("svg") == nil) {
return try tb.process(startTag.name("img")) // change <image> to <img>, unless in svg
}else{
} else {
try tb.insert(startTag)
}
} else if (name.equals("isindex")) {
// how much do we care about the early 90s?
tb.error(self)
if (tb.getFormElement() != nil){
if (tb.getFormElement() != nil) {
return false
}
tb.tokeniser.acknowledgeSelfClosingFlag()
try tb.processStartTag("form")
if (startTag._attributes.hasKey(key: "action")) {
if let form: Element = tb.getFormElement(){
if let form: Element = tb.getFormElement() {
try form.attr("action", startTag._attributes.get(key: "action"))
}
}
@ -532,13 +526,13 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
let prompt: String = startTag._attributes.hasKey(key: "prompt") ?
startTag._attributes.get(key: "prompt") :
"self is a searchable index. Enter search keywords: "
try tb.process(Token.Char().data(prompt))
// input
let inputAttribs: Attributes = Attributes()
for attr: Attribute in startTag._attributes.iterator() {
if (!StringUtil.inSorted(attr.getKey(), haystack: Constants.InBodyStartInputAttribs)){
if (!StringUtil.inSorted(attr.getKey(), haystack: Constants.InBodyStartInputAttribs)) {
inputAttribs.put(attribute: attr)
}
}
@ -571,15 +565,15 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.reconstructFormattingElements()
try tb.insert(startTag)
tb.framesetOk(false)
let state: HtmlTreeBuilderState = tb.state()
if (state.equals(.InTable) || state.equals(.InCaption) || state.equals(.InTableBody) || state.equals(.InRow) || state.equals(.InCell)){
if (state.equals(.InTable) || state.equals(.InCaption) || state.equals(.InTableBody) || state.equals(.InRow) || state.equals(.InCell)) {
tb.transition(.InSelectInTable)
}else{
} else {
tb.transition(.InSelect)
}
} else if (StringUtil.inSorted(name, haystack: Constants.InBodyStartOptions)) {
if (tb.currentElement() != nil && tb.currentElement()!.nodeName().equals("option")){
if (tb.currentElement() != nil && tb.currentElement()!.nodeName().equals("option")) {
try tb.processEndTag("option")
}
try tb.reconstructFormattingElements()
@ -610,33 +604,32 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.reconstructFormattingElements()
try tb.insert(startTag)
}
}else{
} else {
try tb.reconstructFormattingElements()
try tb.insert(startTag)
}
break
case .EndTag:
let endTag: Token.EndTag = t.asEndTag()
if let name = endTag.normalName(){
if let name = endTag.normalName() {
if (StringUtil.inSorted(name, haystack: Constants.InBodyEndAdoptionFormatters)) {
// Adoption Agency Algorithm.
for i in 0..<8
{
for i in 0..<8 {
let formatEl: Element? = tb.getActiveFormattingElement(name)
if (formatEl == nil){
if (formatEl == nil) {
return anyOtherEndTag(t, tb)
}else if (!tb.onStack(formatEl!)) {
} else if (!tb.onStack(formatEl!)) {
tb.error(self)
tb.removeFromActiveFormattingElements(formatEl!)
return true
} else if (try !tb.inScope(formatEl!.nodeName())) {
tb.error(self)
return false
} else if (tb.currentElement() != formatEl!){
} else if (tb.currentElement() != formatEl!) {
tb.error(self)
}
var furthestBlock: Element? = nil
var commonAncestor: Element? = nil
var seenFormattingElement: Bool = false
@ -644,9 +637,8 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
// the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) self prevents
// run-aways
var stackSize = stack.count
if(stackSize > 64){stackSize = 64}
for si in 0..<stackSize
{
if(stackSize > 64) {stackSize = 64}
for si in 0..<stackSize {
let el: Element = stack[si]
if (el == formatEl) {
commonAncestor = stack[si - 1]
@ -661,60 +653,57 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.removeFromActiveFormattingElements(formatEl)
return true
}
// todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
// does that mean: int pos of format el in list?
var node: Element? = furthestBlock
var lastNode: Element? = furthestBlock
for j in 0..<3
{
if (node != nil && tb.onStack(node!)){
for j in 0..<3 {
if (node != nil && tb.onStack(node!)) {
node = tb.aboveOnStack(node!)
}
// note no bookmark check
if (node != nil && !tb.isInActiveFormattingElements(node!))
{
if (node != nil && !tb.isInActiveFormattingElements(node!)) {
tb.removeFromStack(node!)
continue
} else if (node == formatEl){
} else if (node == formatEl) {
break
}
let replacement: Element = try Element(Tag.valueOf(node!.nodeName(), ParseSettings.preserveCase), tb.getBaseUri())
// case will follow the original node (so honours ParseSettings)
try tb.replaceActiveFormattingElement(node!, replacement)
try tb.replaceOnStack(node!, replacement)
node = replacement
if (lastNode == furthestBlock) {
// todo: move the aforementioned bookmark to be immediately after the node in the list of active formatting elements.
// not getting how self bookmark both straddles the element above, but is inbetween here...
}
if (lastNode!.parent() != nil){
if (lastNode!.parent() != nil) {
try lastNode?.remove()
}
try node!.appendChild(lastNode!)
lastNode = node
}
if (StringUtil.inSorted(commonAncestor!.nodeName(), haystack: Constants.InBodyEndTableFosters)) {
if (lastNode!.parent() != nil){
if (lastNode!.parent() != nil) {
try lastNode!.remove()
}
try tb.insertInFosterParent(lastNode!)
} else {
if (lastNode!.parent() != nil){
if (lastNode!.parent() != nil) {
try lastNode!.remove()
}
try commonAncestor!.appendChild(lastNode!)
}
let adopter: Element = Element(formatEl!.tag(), tb.getBaseUri())
adopter.getAttributes()?.addAll(incoming: formatEl!.getAttributes())
var childNodes: [Node] = furthestBlock!.getChildNodes()
for childNode: Node in childNodes
{
for childNode: Node in childNodes {
try adopter.appendChild(childNode) // append will reparent. thus the clone to avoid concurrent mod.
}
try furthestBlock?.appendChild(adopter)
@ -730,7 +719,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags()
if (!tb.currentElement()!.nodeName().equals(name)){
if (!tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(name)
@ -744,7 +733,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags(name)
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(name)
@ -759,7 +748,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
} else if (name.equals("html")) {
let notIgnored: Bool = try tb.processEndTag("body")
if (notIgnored){
if (notIgnored) {
return try tb.process(endTag)
}
} else if (name.equals("form")) {
@ -770,7 +759,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags()
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
// remove currentForm from stack. will shift anything under up.
@ -783,7 +772,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return try tb.process(endTag)
} else {
tb.generateImpliedEndTags(name)
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(name)
@ -794,7 +783,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags(name)
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(name)
@ -805,7 +794,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags(name)
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(Constants.Headings)
@ -820,7 +809,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
}
tb.generateImpliedEndTags()
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)){
if (tb.currentElement() != nil && !tb.currentElement()!.nodeName().equals(name)) {
tb.error(self)
}
tb.popStackToClose(name)
@ -833,10 +822,10 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else {
return anyOtherEndTag(t, tb)
}
}else{
} else {
return anyOtherEndTag(t, tb)
}
break
case .EOF:
// todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
@ -872,7 +861,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return processed
}
if (t.isCharacter()) {
tb.newPendingTableCharacters()
tb.markInsertionMode()
@ -886,8 +875,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else if (t.isStartTag()) {
let startTag: Token.StartTag = t.asStartTag()
if let name: String = startTag.normalName()
{
if let name: String = startTag.normalName() {
if (name.equals("caption")) {
tb.clearStackToTableContext()
tb.insertMarkerToFormattingElements()
@ -922,9 +910,9 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
} else if (name.equals("form")) {
tb.error(self)
if (tb.getFormElement() != nil){
if (tb.getFormElement() != nil) {
return false
}else {
} else {
try tb.insertForm(startTag, false)
}
} else {
@ -934,8 +922,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return true // todo: check if should return processed http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intable
} else if (t.isEndTag()) {
let endTag: Token.EndTag = t.asEndTag()
if let name: String = endTag.normalName()
{
if let name: String = endTag.normalName() {
if (name.equals("table")) {
if (try !tb.inTableScope(name)) {
tb.error(self)
@ -951,12 +938,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else {
return try anythingElse(t, tb)
}
}else{
} else {
return try anythingElse(t, tb)
}
return true // todo: as above todo
} else if (t.isEOF()) {
if (tb.currentElement() != nil && tb.currentElement()!.nodeName().equals("html")){
if (tb.currentElement() != nil && tb.currentElement()!.nodeName().equals("html")) {
tb.error(self)
}
return true // stops parsing
@ -978,8 +965,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
default:
// todo - don't really like the way these table character data lists are built
if (tb.getPendingTableCharacters().count > 0) {
for character:String in tb.getPendingTableCharacters()
{
for character: String in tb.getPendingTableCharacters() {
if (!HtmlTreeBuilderState.isWhitespace(character)) {
// InTable anything else section:
tb.error(self)
@ -990,7 +976,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else {
try tb.process(Token.Char().data(character), .InBody)
}
} else{
} else {
try tb.insert(Token.Char().data(character))
}
}
@ -1001,8 +987,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .InCaption:
if (t.isEndTag() && t.asEndTag().normalName()!.equals("caption"))
{
if (t.isEndTag() && t.asEndTag().normalName()!.equals("caption")) {
let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName()
if (try name != nil && !tb.inTableScope(name!)) {
@ -1010,7 +995,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
} else {
tb.generateImpliedEndTags()
if (!tb.currentElement()!.nodeName().equals("caption")){
if (!tb.currentElement()!.nodeName().equals("caption")) {
tb.error(self)
}
tb.popStackToClose("caption")
@ -1024,7 +1009,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
) {
tb.error(self)
let processed: Bool = try tb.processEndTag("caption")
if (processed){
if (processed) {
return try tb.process(t)
}
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName()!,
@ -1038,14 +1023,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
case .InColumnGroup:
func anythingElse(_ t: Token, _ tb: TreeBuilder)throws->Bool {
let processed: Bool = try tb.processEndTag("colgroup")
if (processed){ // only ignored in frag case
if (processed) { // only ignored in frag case
return try tb.process(t)
}
return true
}
if (HtmlTreeBuilderState.isWhitespace(t)) {
try tb.insert(t.asCharacter())
return true
@ -1060,11 +1043,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
case .StartTag:
let startTag: Token.StartTag = t.asStartTag()
let name: String? = startTag.normalName()
if ("html".equals(name)){
if ("html".equals(name)) {
return try tb.process(t, .InBody)
}else if ("col".equals(name)){
} else if ("col".equals(name)) {
try tb.insertEmpty(startTag)
}else{
} else {
return try anythingElse(t, tb)
}
break
@ -1079,14 +1062,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.pop()
tb.transition(.InTable)
}
} else{
} else {
return try anythingElse(t, tb)
}
break
case .EOF:
if ("html".equals(tb.currentElement()?.nodeName())){
if ("html".equals(tb.currentElement()?.nodeName())) {
return true // stop parsing; frag case
}else{
} else {
return try anythingElse(t, tb)
}
default:
@ -1095,7 +1078,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return true
case .InTableBody:
@discardableResult
func exitTableBody(_ t:Token, _ tb: HtmlTreeBuilder)throws->Bool {
func exitTableBody(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
if (try !(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb.inScope("tfoot"))) {
// frag case
tb.error(self)
@ -1105,12 +1088,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
try tb.processEndTag(tb.currentElement()!.nodeName()) // tbody, tfoot, thead
return try tb.process(t)
}
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
return try tb.process(t, .InTable)
}
switch (t.type) {
case .StartTag:
let startTag: Token.StartTag = t.asStartTag()
@ -1125,7 +1107,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return try tb.process(startTag)
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
return try exitTableBody(t, tb)
} else{
} else {
return try anythingElse(t, tb)
}
break
@ -1146,7 +1128,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
tb.error(self)
return false
} else{
} else {
return try anythingElse(t, tb)
}
break
@ -1158,20 +1140,20 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
return try tb.process(t, .InTable)
}
func handleMissingTr(_ t: Token, _ tb: TreeBuilder)throws->Bool {
let processed: Bool = try tb.processEndTag("tr")
if (processed){
if (processed) {
return try tb.process(t)
}else{
} else {
return false
}
}
if (t.isStartTag()) {
let startTag: Token.StartTag = t.asStartTag()
let name: String? = startTag.normalName()
if (StringUtil.inString(name, haystack: "th", "td")) {
tb.clearStackToTableRowContext()
try tb.insert(startTag)
@ -1185,7 +1167,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
} else if (t.isEndTag()) {
let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName()
if ("tr".equals(name)) {
if (try !tb.inTableScope(name!)) {
tb.error(self) // frag
@ -1217,20 +1199,19 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
return try tb.process(t, .InBody)
}
func closeCell(_ tb: HtmlTreeBuilder)throws {
if (try tb.inTableScope("td")){
if (try tb.inTableScope("td")) {
try tb.processEndTag("td")
}else{
} else {
try tb.processEndTag("th") // only here if th or td in scope
}
}
if (t.isEndTag()) {
let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName()
if (StringUtil.inString(name, haystack: "td", "th")) {
if (try !tb.inTableScope(name!)) {
tb.error(self)
@ -1238,7 +1219,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return false
}
tb.generateImpliedEndTags()
if (!name!.equals(tb.currentElement()?.nodeName())){
if (!name!.equals(tb.currentElement()?.nodeName())) {
tb.error(self)
}
tb.popStackToClose(name!)
@ -1271,13 +1252,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .InSelect:
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)->Bool {
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder) -> Bool {
tb.error(self)
return false
}
switch (t.type) {
case .Char:
let c: Token.Char = t.asCharacter()
@ -1297,15 +1277,15 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
case .StartTag:
let start: Token.StartTag = t.asStartTag()
let name: String? = start.normalName()
if ("html".equals(name)){
if ("html".equals(name)) {
return try tb.process(start, .InBody)
}else if ("option".equals(name)) {
} else if ("option".equals(name)) {
try tb.processEndTag("option")
try tb.insert(start)
} else if ("optgroup".equals(name)) {
if ("option".equals(tb.currentElement()?.nodeName())){
if ("option".equals(tb.currentElement()?.nodeName())) {
try tb.processEndTag("option")
}else if ("optgroup".equals(tb.currentElement()?.nodeName())){
} else if ("optgroup".equals(tb.currentElement()?.nodeName())) {
try tb.processEndTag("optgroup")
}
try tb.insert(start)
@ -1314,7 +1294,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
return try tb.processEndTag("select")
} else if (StringUtil.inString(name, haystack: "input", "keygen", "textarea")) {
tb.error(self)
if (try !tb.inSelectScope("select")){
if (try !tb.inSelectScope("select")) {
return false // frag
}
try tb.processEndTag("select")
@ -1329,18 +1309,18 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
let end: Token.EndTag = t.asEndTag()
let name = end.normalName()
if ("optgroup".equals(name)) {
if ("option".equals(tb.currentElement()?.nodeName()) && tb.currentElement() != nil && tb.aboveOnStack(tb.currentElement()!) != nil && "optgroup".equals(tb.aboveOnStack(tb.currentElement()!)?.nodeName())){
if ("option".equals(tb.currentElement()?.nodeName()) && tb.currentElement() != nil && tb.aboveOnStack(tb.currentElement()!) != nil && "optgroup".equals(tb.aboveOnStack(tb.currentElement()!)?.nodeName())) {
try tb.processEndTag("option")
}
if ("optgroup".equals(tb.currentElement()?.nodeName())){
if ("optgroup".equals(tb.currentElement()?.nodeName())) {
tb.pop()
}else{
} else {
tb.error(self)
}
} else if ("option".equals(name)) {
if ("option".equals(tb.currentElement()?.nodeName())){
if ("option".equals(tb.currentElement()?.nodeName())) {
tb.pop()
}else{
} else {
tb.error(self)
}
} else if ("select".equals(name)) {
@ -1351,12 +1331,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
tb.popStackToClose(name!)
tb.resetInsertionMode()
}
} else{
} else {
return anythingElse(t, tb)
}
break
case .EOF:
if (!"html".equals(tb.currentElement()?.nodeName())){
if (!"html".equals(tb.currentElement()?.nodeName())) {
tb.error(self)
}
break
@ -1374,7 +1354,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
if (try t.asEndTag().normalName() != nil && tb.inTableScope(t.asEndTag().normalName()!)) {
try tb.processEndTag("select")
return try (tb.process(t))
} else{
} else {
return false
}
} else {
@ -1406,7 +1386,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .InFrameset:
if (HtmlTreeBuilderState.isWhitespace(t)) {
try tb.insert(t.asCharacter())
} else if (t.isComment()) {
@ -1450,7 +1430,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .AfterFrameset:
if (HtmlTreeBuilderState.isWhitespace(t)) {
try tb.insert(t.asCharacter())
} else if (t.isComment()) {
@ -1472,7 +1452,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .AfterAfterBody:
if (t.isComment()) {
try tb.insert(t.asComment())
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.isStartTag() && "html".equals(t.asStartTag().normalName()))) {
@ -1486,7 +1466,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
return true
case .AfterAfterFrameset:
if (t.isComment()) {
try tb.insert(t.asComment())
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.isStartTag() && "html".equals(t.asStartTag().normalName()))) {
@ -1506,47 +1486,40 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol
}
}
private static func isWhitespace(_ t: Token)->Bool {
private static func isWhitespace(_ t: Token) -> Bool {
if (t.isCharacter()) {
let data: String? = t.asCharacter().getData()
return isWhitespace(data)
}
return false
}
private static func isWhitespace(_ data: String?)->Bool {
private static func isWhitespace(_ data: String?) -> Bool {
// todo: self checks more than spec - "\t", "\n", "\f", "\r", " "
if let data = data{
if let data = data {
for c in data.characters {
if (!StringUtil.isWhitespace(c)){
if (!StringUtil.isWhitespace(c)) {
return false}
}
}
return true
}
private static func handleRcData(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws {
try tb.insert(startTag)
tb.tokeniser.transition(TokeniserState.Rcdata)
tb.markInsertionMode()
tb.transition(.Text)
}
private static func handleRawtext(_ startTag: Token.StartTag, _ tb: HtmlTreeBuilder)throws {
try tb.insert(startTag)
tb.tokeniser.transition(TokeniserState.Rawtext)
tb.markInsertionMode()
tb.transition(.Text)
}
// lists of tags to search through. A little harder to read here, but causes less GC than dynamic varargs.
// was contributing around 10% of parse GC load.
fileprivate final class Constants {

View File

@ -8,22 +8,21 @@
import Foundation
open class Node : Equatable, Hashable
{
private static let EMPTY_NODES : Array<Node> = Array<Node>()
var parentNode : Node?
var childNodes : Array <Node>
var attributes : Attributes?
var baseUri : String?
open class Node: Equatable, Hashable {
private static let EMPTY_NODES: Array<Node> = Array<Node>()
var parentNode: Node?
var childNodes: Array <Node>
var attributes: Attributes?
var baseUri: String?
/**
* Get the list index of this node in its node sibling list. I.e. if this is the first node
* sibling, returns 0.
* @return position in node sibling list
* @see org.jsoup.nodes.Element#elementSiblingIndex()
*/
public private(set) var siblingIndex : Int = 0
public private(set) var siblingIndex: Int = 0
/**
Create a new Node.
@param baseUri base URI
@ -34,13 +33,13 @@ open class Node : Equatable, Hashable
self.baseUri = baseUri.trim()
self.attributes = attributes
}
public init(_ baseUri: String) {
childNodes = Node.EMPTY_NODES
self.baseUri = baseUri.trim()
self.attributes = Attributes()
}
/**
* Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
*/
@ -49,15 +48,15 @@ open class Node : Equatable, Hashable
self.attributes = nil
self.baseUri = nil
}
/**
Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
@return node name
*/
public func nodeName()->String {
public func nodeName() -> String {
preconditionFailure("This method must be overridden")
}
/**
* Get an attribute's value by its key. <b>Case insensitive</b>
* <p>
@ -74,22 +73,22 @@ open class Node : Equatable, Hashable
* @see #absUrl(String)
*/
open func attr(_ attributeKey: String)throws ->String {
let val : String = try attributes!.getIgnoreCase(key: attributeKey)
if (val.characters.count > 0){
let val: String = try attributes!.getIgnoreCase(key: attributeKey)
if (val.characters.count > 0) {
return val
}else if (attributeKey.lowercased().startsWith("abs:")){
} else if (attributeKey.lowercased().startsWith("abs:")) {
return try absUrl(attributeKey.substring("abs:".characters.count))
}else {return ""}
} else {return ""}
}
/**
* Get all of the element's attributes.
* @return attributes (which implements iterable, in same order as presented in original HTML).
*/
open func getAttributes()->Attributes? {
open func getAttributes() -> Attributes? {
return attributes
}
/**
* Set an attribute (key=value). If the attribute already exists, it is replaced.
* @param attributeKey The attribute key.
@ -98,34 +97,34 @@ open class Node : Equatable, Hashable
*/
@discardableResult
open func attr(_ attributeKey: String, _ attributeValue: String)throws->Node {
try attributes?.put(attributeKey , attributeValue)
try attributes?.put(attributeKey, attributeValue)
return self
}
/**
* Test if this element has an attribute. <b>Case insensitive</b>
* @param attributeKey The attribute key to check.
* @return true if the attribute exists, false if not.
*/
open func hasAttr(_ attributeKey: String)->Bool {
open func hasAttr(_ attributeKey: String) -> Bool {
guard let attributes = attributes else {
return false
}
if (attributeKey.startsWith("abs:")) {
let key : String = attributeKey.substring("abs:".characters.count)
do{
let key: String = attributeKey.substring("abs:".characters.count)
do {
let abs = try absUrl(key)
if (attributes.hasKeyIgnoreCase(key: key) && !"".equals(abs)){
if (attributes.hasKeyIgnoreCase(key: key) && !"".equals(abs)) {
return true
}
}catch{
} catch {
return false
}
}
return attributes.hasKeyIgnoreCase(key: attributeKey)
}
/**
* Remove an attribute from this element.
* @param attributeKey The attribute to remove.
@ -136,39 +135,36 @@ open class Node : Equatable, Hashable
try attributes?.removeIgnoreCase(key: attributeKey)
return self
}
/**
Get the base URI of this node.
@return base URI
*/
open func getBaseUri()->String {
open func getBaseUri() -> String {
return baseUri!
}
/**
Update the base URI of this node and all of its descendants.
@param baseUri base URI to set
*/
open func setBaseUri(_ baseUri: String)throws
{
class nodeVisitor : NodeVisitor
{
private let baseUri : String
init(_ baseUri: String)
{
open func setBaseUri(_ baseUri: String)throws {
class nodeVisitor: NodeVisitor {
private let baseUri: String
init(_ baseUri: String) {
self.baseUri = baseUri
}
func head(_ node: Node, _ depth: Int)throws {
node.baseUri = baseUri
}
func tail(_ node: Node, _ depth: Int)throws {
}
}
try traverse(nodeVisitor(baseUri))
}
/**
* Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lta href&gt;</code> or
* <code>&lt;img src&gt;</code>).
@ -194,23 +190,23 @@ open class Node : Equatable, Hashable
*/
open func absUrl(_ attributeKey: String)throws->String {
try Validate.notEmpty(string: attributeKey)
if (!hasAttr(attributeKey)) {
return "" // nothing to make absolute with
} else {
return StringUtil.resolve(baseUri!, relUrl: try attr(attributeKey))
}
}
/**
Get a child node by its 0-based index.
@param index index of child node
@return the child node at this index. Throws a {@code IndexOutOfBoundsException} if the index is out of bounds.
*/
open func childNode(_ index: Int)->Node {
open func childNode(_ index: Int) -> Node {
return childNodes[index]
}
/**
Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
themselves can be manipulated.
@ -219,33 +215,32 @@ open class Node : Equatable, Hashable
open func getChildNodes()->Array<Node> {
return childNodes
}
/**
* Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original
* nodes
* @return a deep copy of this node's children
*/
open func childNodesCopy()->Array<Node>{
open func childNodesCopy()->Array<Node> {
var children: Array<Node> = Array<Node>()
for node: Node in childNodes
{
for node: Node in childNodes {
children.append(node.copy() as! Node)
}
return children
}
/**
* Get the number of child nodes that this node holds.
* @return the number of child nodes that this node holds.
*/
public func childNodeSize()->Int {
public func childNodeSize() -> Int {
return childNodes.count
}
final func childNodesAsArray()->[Node] {
final func childNodesAsArray() -> [Node] {
return childNodes as Array
}
/**
Gets this node's parent node.
@return parent node or null if no parent.
@ -253,36 +248,36 @@ open class Node : Equatable, Hashable
open func parent() -> Node? {
return parentNode
}
/**
Gets this node's parent node. Node overridable by extending classes, so useful if you really just need the Node type.
@return parent node or null if no parent.
*/
final func getParentNode()->Node? {
final func getParentNode() -> Node? {
return parentNode
}
/**
* Gets the Document associated with this Node.
* @return the Document associated with this Node, or null if there is no such Document.
*/
open func ownerDocument()-> Document? {
if let this = self as? Document{
open func ownerDocument() -> Document? {
if let this = self as? Document {
return this
}else if (parentNode == nil){
} else if (parentNode == nil) {
return nil
}else{
} else {
return parentNode!.ownerDocument()
}
}
/**
* Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
*/
open func remove()throws {
try parentNode?.removeChild(self)
}
/**
* Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
* @param html HTML to add before this node
@ -294,7 +289,7 @@ open class Node : Equatable, Hashable
try addSiblingHtml(siblingIndex, html)
return self
}
/**
* Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
* @param node to add before this node
@ -305,11 +300,11 @@ open class Node : Equatable, Hashable
open func before(_ node: Node)throws ->Node {
try Validate.notNull(obj: node)
try Validate.notNull(obj: parentNode)
try parentNode?.addChildren(siblingIndex,node)
try parentNode?.addChildren(siblingIndex, node)
return self
}
/**
* Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
* @param html HTML to add after this node
@ -321,7 +316,7 @@ open class Node : Equatable, Hashable
try addSiblingHtml(siblingIndex + 1, html)
return self
}
/**
* Insert the specified node into the DOM after this node (i.e. as a following sibling).
* @param node to add after this node
@ -332,20 +327,19 @@ open class Node : Equatable, Hashable
open func after(_ node: Node)throws->Node {
try Validate.notNull(obj: node)
try Validate.notNull(obj: parentNode)
try parentNode?.addChildren(siblingIndex+1,node)
try parentNode?.addChildren(siblingIndex+1, node)
return self
}
private func addSiblingHtml(_ index: Int, _ html: String)throws {
try Validate.notNull(obj: parentNode)
let context : Element? = parent() as? Element
let nodes : Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
try parentNode?.addChildren(index,nodes)
let context: Element? = parent() as? Element
let nodes: Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
try parentNode?.addChildren(index, nodes)
}
/**
* Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
@ -358,7 +352,7 @@ open class Node : Equatable, Hashable
try addSiblingHtml(siblingIndex + 1, html)
return self
}
/**
* Insert the specified node into the DOM after this node (i.e. as a following sibling).
* @param node to add after this node
@ -369,20 +363,20 @@ open class Node : Equatable, Hashable
open func after(node: Node)throws->Node {
try Validate.notNull(obj: node)
try Validate.notNull(obj: parentNode)
try parentNode?.addChildren(siblingIndex + 1, node)
return self
}
open func addSiblingHtml(index: Int, _ html: String)throws {
try Validate.notNull(obj: html)
try Validate.notNull(obj: parentNode)
let context : Element? = parent() as? Element
let nodes : Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
let context: Element? = parent() as? Element
let nodes: Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
try parentNode?.addChildren(index, nodes)
}
/**
Wrap the supplied HTML around this node.
@param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
@ -391,33 +385,31 @@ open class Node : Equatable, Hashable
@discardableResult
open func wrap(_ html: String)throws->Node? {
try Validate.notEmpty(string: html)
let context : Element? = parent() as? Element
var wrapChildren : Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
let wrapNode : Node? = wrapChildren.count > 0 ? wrapChildren[0] : nil
if (wrapNode == nil || !(((wrapNode as? Element) != nil))){ // nothing to wrap with; noop
let context: Element? = parent() as? Element
var wrapChildren: Array<Node> = try Parser.parseFragment(html, context, getBaseUri())
let wrapNode: Node? = wrapChildren.count > 0 ? wrapChildren[0] : nil
if (wrapNode == nil || !(((wrapNode as? Element) != nil))) { // nothing to wrap with; noop
return nil
}
let wrap : Element = wrapNode as! Element
let deepest : Element = getDeepChild(el: wrap)
let wrap: Element = wrapNode as! Element
let deepest: Element = getDeepChild(el: wrap)
try parentNode?.replaceChild(self, wrap)
wrapChildren = wrapChildren.filter { $0 != wrap}
try deepest.addChildren(self)
// remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
if (wrapChildren.count > 0)
{
for i in 0..<wrapChildren.count
{
let remainder : Node = wrapChildren[i]
if (wrapChildren.count > 0) {
for i in 0..<wrapChildren.count {
let remainder: Node = wrapChildren[i]
try remainder.parentNode?.removeChild(remainder)
try wrap.appendChild(remainder)
}
}
return self
}
/**
* Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
* the node but keeping its children.
@ -436,23 +428,23 @@ open class Node : Equatable, Hashable
@discardableResult
open func unwrap()throws ->Node? {
try Validate.notNull(obj: parentNode)
let firstChild : Node? = childNodes.count > 0 ? childNodes[0] : nil
let firstChild: Node? = childNodes.count > 0 ? childNodes[0] : nil
try parentNode?.addChildren(siblingIndex, self.childNodesAsArray())
try self.remove()
return firstChild
}
private func getDeepChild(el: Element)->Element {
private func getDeepChild(el: Element) -> Element {
let children = el.children()
if (children.size() > 0){
if (children.size() > 0) {
return getDeepChild(el: children.get(0))
}else{
} else {
return el
}
}
/**
* Replace this node in the DOM with the supplied node.
* @param in the node that will will replace the existing node.
@ -462,151 +454,143 @@ open class Node : Equatable, Hashable
try Validate.notNull(obj: parentNode)
try parentNode?.replaceChild(self, input)
}
public func setParentNode(_ parentNode: Node)throws {
if (self.parentNode != nil){
if (self.parentNode != nil) {
try self.parentNode?.removeChild(self)
}
self.parentNode = parentNode
}
public func replaceChild(_ out: Node, _ input: Node)throws {
try Validate.isTrue(val: out.parentNode === self)
try Validate.notNull(obj: input)
if (input.parentNode != nil){
if (input.parentNode != nil) {
try input.parentNode?.removeChild(input)
}
let index : Int = out.siblingIndex
let index: Int = out.siblingIndex
childNodes[index] = input
input.parentNode = self
input.setSiblingIndex(index)
out.parentNode = nil
}
public func removeChild(_ out: Node)throws {
try Validate.isTrue(val: out.parentNode === self)
let index : Int = out.siblingIndex
let index: Int = out.siblingIndex
childNodes.remove(at: index)
reindexChildren(index)
out.parentNode = nil
}
public func addChildren(_ children: Node...)throws {
//most used. short circuit addChildren(int), which hits reindex children and array copy
try addChildren(children)
}
public func addChildren(_ children: [Node])throws {
//most used. short circuit addChildren(int), which hits reindex children and array copy
for child in children
{
for child in children {
try reparentChild(child)
ensureChildNodes()
childNodes.append(child)
child.setSiblingIndex(childNodes.count-1)
}
}
public func addChildren(_ index: Int,_ children: Node...)throws {
try addChildren(index,children)
public func addChildren(_ index: Int, _ children: Node...)throws {
try addChildren(index, children)
}
public func addChildren(_ index: Int,_ children: [Node])throws {
public func addChildren(_ index: Int, _ children: [Node])throws {
ensureChildNodes()
for i in (0..<children.count).reversed()
{
let input : Node = children[i]
for i in (0..<children.count).reversed() {
let input: Node = children[i]
try reparentChild(input)
childNodes.insert(input, at: index)
reindexChildren(index)
}
}
public func ensureChildNodes()
{
public func ensureChildNodes() {
// if (childNodes === Node.EMPTY_NODES) {
// childNodes = Array<Node>()
// }
}
public func reparentChild(_ child: Node)throws {
if (child.parentNode != nil){
if (child.parentNode != nil) {
try child.parentNode?.removeChild(child)
}
try child.setParentNode(self)
}
private func reindexChildren(_ start: Int) {
for i in start..<childNodes.count
{
for i in start..<childNodes.count {
childNodes[i].setSiblingIndex(i)
}
}
/**
Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
include this node (a node is not a sibling of itself).
@return node siblings. If the node has no parent, returns an empty list.
*/
open func siblingNodes()->Array<Node> {
if (parentNode == nil){
if (parentNode == nil) {
return Array<Node>()
}
let nodes : Array<Node> = parentNode!.childNodes
var siblings : Array<Node> = Array<Node>()
for node in nodes
{
if (node !== self){
let nodes: Array<Node> = parentNode!.childNodes
var siblings: Array<Node> = Array<Node>()
for node in nodes {
if (node !== self) {
siblings.append(node)
}
}
return siblings
}
/**
Get this node's next sibling.
@return next sibling, or null if this is the last sibling
*/
open func nextSibling()->Node? {
if (parentNode == nil){
open func nextSibling() -> Node? {
if (parentNode == nil) {
return nil // root
}
let siblings : Array<Node> = parentNode!.childNodes
let index : Int = siblingIndex+1
if (siblings.count > index){
let siblings: Array<Node> = parentNode!.childNodes
let index: Int = siblingIndex+1
if (siblings.count > index) {
return siblings[index]
}else{
} else {
return nil
}
}
/**
Get this node's previous sibling.
@return the previous sibling, or null if this is the first sibling
*/
open func previousSibling()->Node? {
if (parentNode == nil){
open func previousSibling() -> Node? {
if (parentNode == nil) {
return nil // root
}
if (siblingIndex > 0){
if (siblingIndex > 0) {
return parentNode?.childNodes[siblingIndex-1]
}else{
} else {
return nil
}
}
public func setSiblingIndex(_ siblingIndex: Int) {
self.siblingIndex = siblingIndex
}
/**
* Perform a depth-first traversal through this node and its descendants.
* @param nodeVisitor the visitor callbacks to perform on each node
@ -614,46 +598,43 @@ open class Node : Equatable, Hashable
*/
@discardableResult
open func traverse(_ nodeVisitor: NodeVisitor)throws->Node {
let traversor : NodeTraversor = NodeTraversor(nodeVisitor)
let traversor: NodeTraversor = NodeTraversor(nodeVisitor)
try traversor.traverse(self)
return self
}
/**
Get the outer HTML of this node.
@return HTML
*/
open func outerHtml()throws->String {
let accum : StringBuilder = StringBuilder(128)
let accum: StringBuilder = StringBuilder(128)
try outerHtml(accum)
return accum.toString()
}
public func outerHtml(_ accum: StringBuilder)throws {
try NodeTraversor(OuterHtmlVisitor(accum, getOutputSettings())).traverse(self)
}
// if this node has no document (or parent), retrieve the default output settings
func getOutputSettings()-> OutputSettings {
func getOutputSettings() -> OutputSettings {
return ownerDocument() != nil ? ownerDocument()!.outputSettings() : (Document("")).outputSettings()
}
/**
Get the outer HTML of this node.
@param accum accumulator to place HTML into
@throws IOException if appending to the given accumulator fails.
*/
func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) throws
{
func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) throws {
preconditionFailure("This method must be overridden")
}
func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) throws
{
func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) throws {
preconditionFailure("This method must be overridden")
}
/**
* Write this node and its children to the given {@link Appendable}.
*
@ -664,44 +645,44 @@ open class Node : Equatable, Hashable
try outerHtml(appendable)
return appendable
}
open func toString()throws->String {
return try outerHtml()
}
public func indent(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
public func indent(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
accum.append("\n").append(StringUtil.padding(depth * Int(out.indentAmount())))
}
/**
* Check if this node is the same instance of another (object identity test).
* @param o other object to compare to
* @return true if the content of this node is the same as the other
* @see Node#hasSameValue(Object) to compare nodes by their value
*/
open func equals(_ o: Node)->Bool {
open func equals(_ o: Node) -> Bool {
// implemented just so that javadoc is clear this is an identity test
return self === o
}
/**
* Check if this node is has the same content as another node. A node is considered the same if its name, attributes and content match the
* other node; particularly its position in the tree does not influence its similarity.
* @param o other object to compare to
* @return true if the content of this node is the same as the other
*/
open func hasSameValue(_ o : Node)throws->Bool {
if (self === o){return true}
open func hasSameValue(_ o: Node)throws->Bool {
if (self === o) {return true}
// if (type(of:self) != type(of: o))
// {
// return false
// }
return try self.outerHtml() == o.outerHtml()
}
/**
* Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
* parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
@ -710,84 +691,73 @@ open class Node : Equatable, Hashable
* The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
* @return stand-alone cloned node
*/
public func copy(with zone: NSZone? = nil) -> Any
{
public func copy(with zone: NSZone? = nil) -> Any {
return copy(clone: Node())
}
public func copy(parent: Node?)->Node
{
public func copy(parent: Node?) -> Node {
let clone = Node()
return copy(clone: clone,parent: parent)
return copy(clone: clone, parent: parent)
}
public func copy(clone: Node)->Node
{
let thisClone : Node = copy(clone: clone, parent: nil) // splits for orphan
public func copy(clone: Node) -> Node {
let thisClone: Node = copy(clone: clone, parent: nil) // splits for orphan
// Queue up nodes that need their children cloned (BFS).
var nodesToProcess : Array<Node> = Array<Node>()
var nodesToProcess: Array<Node> = Array<Node>()
nodesToProcess.append(thisClone)
while (!nodesToProcess.isEmpty) {
let currParent : Node = nodesToProcess.removeFirst()
for i in 0..<currParent.childNodes.count
{
let childClone : Node = currParent.childNodes[i].copy(parent:currParent)
let currParent: Node = nodesToProcess.removeFirst()
for i in 0..<currParent.childNodes.count {
let childClone: Node = currParent.childNodes[i].copy(parent:currParent)
currParent.childNodes[i] = childClone
nodesToProcess.append(childClone)
}
}
return thisClone
}
/*
* Return a clone of the node using the given parent (which can be null).
* Not a deep copy of children.
*/
public func copy(clone: Node, parent: Node?)->Node
{
public func copy(clone: Node, parent: Node?) -> Node {
clone.parentNode = parent // can be null, to create an orphan split
clone.siblingIndex = parent == nil ? 0 : siblingIndex
clone.attributes = attributes != nil ? attributes?.clone() : nil
clone.baseUri = baseUri
clone.childNodes = Array<Node>()
for child in childNodes{
for child in childNodes {
clone.childNodes.append(child)
}
return clone
}
private class OuterHtmlVisitor : NodeVisitor {
private var accum : StringBuilder
private var out : OutputSettings
private class OuterHtmlVisitor: NodeVisitor {
private var accum: StringBuilder
private var out: OutputSettings
init(_ accum: StringBuilder, _ out: OutputSettings) {
self.accum = accum
self.out = out
}
open func head(_ node: Node, _ depth: Int)throws{
open func head(_ node: Node, _ depth: Int)throws {
try node.outerHtmlHead(accum, depth, out)
}
open func tail(_ node: Node, _ depth: Int)throws {
if (!(node.nodeName() == "#text"))
{ // saves a void hit.
if (!(node.nodeName() == "#text")) { // saves a void hit.
try node.outerHtmlTail(accum, depth, out)
}
}
}
/// Returns a Boolean value indicating whether two values are equal.
///
/// Equality is the inverse of inequality. For any values `a` and `b`,
@ -796,46 +766,40 @@ open class Node : Equatable, Hashable
/// - Parameters:
/// - lhs: A value to compare.
/// - rhs: Another value to compare.
public static func ==(lhs: Node, rhs: Node) -> Bool{
public static func ==(lhs: Node, rhs: Node) -> Bool {
return lhs === rhs
}
/// The hash value.
///
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int {
var result : Int = description.hashValue
var result: Int = description.hashValue
result = Int.addWithOverflow(Int.multiplyWithOverflow(31, result).0, baseUri != nil ? baseUri!.hashValue : 31).0
return result
}
}
extension Node : CustomStringConvertible
{
extension Node : CustomStringConvertible {
public var description: String {
do{
do {
return try toString()
}catch{
} catch {
}
return ""
}
}
extension Node : CustomDebugStringConvertible
{
extension Node : CustomDebugStringConvertible {
public var debugDescription: String {
do{
do {
return try String(describing: type(of: self)) + " " + toString()
}catch{
} catch {
}
return String(describing: type(of: self))
}
}

View File

@ -8,10 +8,9 @@
import Foundation
class NodeTraversor
{
private let visitor : NodeVisitor
class NodeTraversor {
private let visitor: NodeVisitor
/**
* Create a new traversor.
* @param visitor a class implementing the {@link NodeVisitor} interface, to be called when visiting each node.
@ -19,15 +18,15 @@ class NodeTraversor
public init(_ visitor: NodeVisitor) {
self.visitor = visitor
}
/**
* Start a depth-first traverse of the root and all of its descendants.
* @param root the root node point to traverse.
*/
open func traverse(_ root: Node?)throws {
var node : Node? = root
var depth : Int = 0
var node: Node? = root
var depth: Int = 0
while (node != nil) {
try visitor.head(node!, depth)
if (node!.childNodeSize() > 0) {
@ -40,13 +39,12 @@ class NodeTraversor
depth-=1
}
try visitor.tail(node!, depth)
if (node === root){
if (node === root) {
break
}
node = node!.nextSibling()
}
}
}
}

View File

@ -25,7 +25,7 @@ public protocol NodeVisitor {
* of that will have depth 1.
*/
func head(_ node: Node, _ depth: Int)throws
/**
* Callback for when a node is last visited, after all of its descendants have been visited.
*

View File

@ -8,8 +8,8 @@
import Foundation
public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollection ,Hashable {
public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollection, Hashable {
/// Returns the position immediately after the given index.
///
/// - Parameter i: A valid index of the collection. `i` must be less than
@ -22,38 +22,36 @@ public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollecti
// ======================================================= //
// MARK: - Type Aliases
// ======================================================= //
public typealias Element = (Key, Value)
public typealias Index = Int
// ======================================================= //
// MARK: - Initialization
// ======================================================= //
public init() {}
public init(count:Int) {}
public init(count: Int) {}
public init(elements: [Element]) {
for (key, value) in elements {
self[key] = value
}
}
public func copy() -> Any {
return copy(with: nil)
}
public func mutableCopy(with zone: NSZone? = nil) -> Any
{
public func mutableCopy(with zone: NSZone? = nil) -> Any {
return copy()
}
public func copy(with zone: NSZone? = nil) -> Any {
let copy = OrderedDictionary<Key,Value>()
let copy = OrderedDictionary<Key, Value>()
//let copy = type(of:self).init()
for element in orderedKeys
{
for element in orderedKeys {
copy.put(value: valueForKey(key: element)!, forKey: element)
}
return copy
@ -62,27 +60,27 @@ public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollecti
func clone() -> OrderedDictionary<Key, Value> {
return copy() as! OrderedDictionary<Key, Value>
}
// ======================================================= //
// MARK: - Accessing Keys & Values
// ======================================================= //
public var orderedKeys: [Key] {
return _orderedKeys
}
public func keySet() -> [Key] {
return _orderedKeys
}
public var orderedValues: [Value] {
return _orderedKeys.flatMap { _keysToValues[$0] }
}
// ======================================================= //
// MARK: - Managing Content Using Keys
// ======================================================= //
public subscript(key: Key) -> Value? {
get {
return valueForKey(key: key)
@ -95,202 +93,197 @@ public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollecti
}
}
}
public func containsKey(key: Key) -> Bool {
return _orderedKeys.contains(key)
}
public func valueForKey(key: Key) -> Value? {
return _keysToValues[key]
}
public func get(key: Key) -> Value? {
return valueForKey(key: key)
}
// required var for the Hashable protocol
public var hashValue: Int {
return 0
return 0
}
public func hashCode() -> Int {
return hashValue
}
@discardableResult
private func updateValue(value: Value, forKey key: Key) -> Value? {
if _orderedKeys.contains(key) {
guard let currentValue = _keysToValues[key] else {
fatalError("Inconsistency error occured in OrderedDictionary")
}
_keysToValues[key] = value
return currentValue
} else {
_orderedKeys.append(key)
_keysToValues[key] = value
return nil
}
}
public func put(value: Value, forKey key: Key) {
self[key] = value
}
public func putAll(all:OrderedDictionary<Key, Value>)
{
for i in all.orderedKeys
{
public func putAll(all: OrderedDictionary<Key, Value>) {
for i in all.orderedKeys {
put(value:all[i]!, forKey: i)
}
}
@discardableResult
public func removeValueForKey(key: Key) -> Value? {
if let index = _orderedKeys.index(of: key) {
guard let currentValue = _keysToValues[key] else {
fatalError("Inconsistency error occured in OrderedDictionary")
}
_orderedKeys.remove(at: index)
_keysToValues[key] = nil
return currentValue
} else {
return nil
}
}
@discardableResult
public func remove(key: Key) -> Value? {
return removeValueForKey(key:key)
}
public func removeAll(keepCapacity: Bool = true) {
_orderedKeys.removeAll(keepingCapacity: keepCapacity)
_keysToValues.removeAll(keepingCapacity: keepCapacity)
}
// ======================================================= //
// MARK: - Managing Content Using Indexes
// ======================================================= //
public subscript(index: Index) -> Element {
get {
guard let element = elementAtIndex(index: index) else {
fatalError("OrderedDictionary index out of range")
}
return element
}
set(newValue) {
updateElement(element: newValue, atIndex: index)
}
}
public func indexForKey(key: Key) -> Index? {
return _orderedKeys.index(of: key)
}
public func elementAtIndex(index: Index) -> Element? {
guard _orderedKeys.indices.contains(index) else { return nil }
let key = _orderedKeys[index]
guard let value = self._keysToValues[key] else {
fatalError("Inconsistency error occured in OrderedDictionary")
}
return (key, value)
}
public func insertElementWithKey(key: Key, value: Value, atIndex index: Index) -> Value? {
return insertElement(newElement: (key, value), atIndex: index)
}
public func insertElement(newElement: Element, atIndex index: Index) -> Value? {
guard index >= 0 else {
fatalError("Negative OrderedDictionary index is out of range")
}
guard index <= count else {
fatalError("OrderedDictionary index out of range")
}
let (key, value) = newElement
let adjustedIndex: Int
let currentValue: Value?
if let currentIndex = _orderedKeys.index(of: key) {
currentValue = _keysToValues[key]
adjustedIndex = (currentIndex < index - 1) ? index - 1 : index
_orderedKeys.remove(at: currentIndex)
_keysToValues[key] = nil
} else {
currentValue = nil
adjustedIndex = index
}
_orderedKeys.insert(key, at: adjustedIndex)
_keysToValues[key] = value
return currentValue
}
@discardableResult
public func updateElement(element: Element, atIndex index: Index) -> Element? {
guard let currentElement = elementAtIndex(index: index) else {
fatalError("OrderedDictionary index out of range")
}
let (newKey, newValue) = element
_orderedKeys[index] = newKey
_keysToValues[newKey] = newValue
return currentElement
}
public func removeAtIndex(index: Index) -> Element? {
if let element = elementAtIndex(index: index) {
_orderedKeys.remove(at: index)
_keysToValues.removeValue(forKey: element.0)
return element
} else {
return nil
}
}
// ======================================================= //
// MARK: - CollectionType Conformance
// ======================================================= //
public var startIndex: Index {
return _orderedKeys.startIndex
}
public var endIndex: Index {
return _orderedKeys.endIndex
}
// ======================================================= //
// MARK: - Internal Backing Store
// ======================================================= //
/// The backing store for the ordered keys.
internal var _orderedKeys = [Key]()
/// The backing store for the mapping of keys to values.
internal var _keysToValues = [Key: Value]()
}
// ======================================================= //
@ -313,7 +306,7 @@ public class OrderedDictionary<Key: Hashable, Value: Equatable>: MutableCollecti
//}
extension OrderedDictionary: LazySequenceProtocol {
func generate() -> AnyIterator<Value> {
var i = 0
return AnyIterator {
@ -324,7 +317,7 @@ extension OrderedDictionary: LazySequenceProtocol {
return self.orderedValues[i-1]
}
}
}
// ======================================================= //
@ -332,46 +325,45 @@ extension OrderedDictionary: LazySequenceProtocol {
// ======================================================= //
extension OrderedDictionary: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String {
return constructDescription(debug: false)
}
public var debugDescription: String {
return constructDescription(debug: true)
}
private func constructDescription(debug: Bool) -> String {
// The implementation of the description is inspired by zwaldowski's implementation of the ordered dictionary.
// See http://bit.ly/1VL4JUR
if isEmpty { return "[:]" }
func descriptionForItem(item: Any) -> String {
var description = ""
if debug {
debugPrint(item, separator: "", terminator: "", to: &description)
} else {
print(item, separator: "", terminator: "", to: &description)
}
return description
}
let bodyComponents = map({ (key: Key, value: Value) -> String in
return descriptionForItem(item: key) + ": " + descriptionForItem(item: value)
})
let body = bodyComponents.joined(separator: ", ")
return "[\(body)]"
}
}
extension OrderedDictionary: Equatable
{
extension OrderedDictionary: Equatable {
/// Returns a Boolean value indicating whether two values are equal.
///
/// Equality is the inverse of inequality. For any values `a` and `b`,
@ -381,12 +373,11 @@ extension OrderedDictionary: Equatable
/// - lhs: A value to compare.
/// - rhs: Another value to compare.
public static func ==(lhs: OrderedDictionary<Key, Value>, rhs: OrderedDictionary<Key, Value>) -> Bool {
if(lhs.count != rhs.count){return false}
if(lhs.count != rhs.count) {return false}
return (lhs._orderedKeys == rhs._orderedKeys) && (lhs._keysToValues == rhs._keysToValues)
}
}
//public func == <Key: Equatable, Value: Equatable>(lhs: OrderedDictionary<Key, Value>, rhs: OrderedDictionary<Key, Value>) -> Bool {
// return lhs._orderedKeys == rhs._orderedKeys && lhs._keysToValues == rhs._keysToValues
//}

View File

@ -12,18 +12,17 @@ public class OrderedSet<T: Hashable> {
public typealias Index = Int
fileprivate var contents = [T: Index]() // Needs to have a value of Index instead of Void for fast removals
fileprivate var sequencedContents = Array<UnsafeMutablePointer<T>>()
/**
Inititalizes an empty ordered set.
- returns: An empty ordered set.
*/
public init() { }
deinit{
deinit {
removeAllObjects()
}
/**
Initializes a new ordered set with the order and contents
of sequence.
@ -36,26 +35,26 @@ public class OrderedSet<T: Hashable> {
for object in sequence {
if contents[object] == nil {
contents[object] = contents.count
let pointer = UnsafeMutablePointer<T>.allocate(capacity: 1)
pointer.initialize(to: object)
sequencedContents.append(pointer)
}
}
}
public required init(arrayLiteral elements: T...) {
for object in elements {
if contents[object] == nil {
contents[object] = contents.count
let pointer = UnsafeMutablePointer<T>.allocate(capacity: 1)
pointer.initialize(to: object)
sequencedContents.append(pointer)
}
}
}
/**
Locate the index of an object in the ordered set.
It is preferable to use this method over the global find() for performance reasons.
@ -66,16 +65,16 @@ public class OrderedSet<T: Hashable> {
if let index = contents[object] {
return index
}
return nil
}
/**
Appends an object to the end of the ordered set.
- parameter object: The object to be appended.
*/
public func append(_ object: T) {
if let lastIndex = index(of: object) {
remove(object)
insert(object, at: lastIndex)
@ -86,7 +85,7 @@ public class OrderedSet<T: Hashable> {
sequencedContents.append(pointer)
}
}
/**
Appends a sequence of objects to the end of the ordered set.
- parameter sequence: The sequence of objects to be appended.
@ -97,7 +96,7 @@ public class OrderedSet<T: Hashable> {
append(object)
}
}
/**
Removes an object from the ordered set.
If the object exists in the ordered set, it will be removed.
@ -110,17 +109,17 @@ public class OrderedSet<T: Hashable> {
contents[object] = nil
sequencedContents[index].deallocate(capacity: 1)
sequencedContents.remove(at: index)
for (object, i) in contents {
if i < index {
continue
}
contents[object] = i - 1
}
}
}
/**
Removes the given objects from the ordered set.
- parameter objects: The objects to be removed.
@ -131,7 +130,7 @@ public class OrderedSet<T: Hashable> {
remove(object)
}
}
/**
Removes an object at a given index.
This method will cause a fatal error if you attempt to move an object to an index that is out of bounds.
@ -141,23 +140,23 @@ public class OrderedSet<T: Hashable> {
if index < 0 || index >= count {
fatalError("Attempting to remove an object at an index that does not exist")
}
remove(sequencedContents[index].pointee)
}
/**
Removes all objects in the ordered set.
*/
public func removeAllObjects() {
contents.removeAll()
for sequencedContent in sequencedContents {
sequencedContent.deallocate(capacity: 1)
}
sequencedContents.removeAll()
}
/**
Swaps two objects contained within the ordered set.
Both objects must exist within the set, or the swap will not occur.
@ -169,13 +168,13 @@ public class OrderedSet<T: Hashable> {
if let secondPosition = contents[second] {
contents[first] = secondPosition
contents[second] = firstPosition
sequencedContents[firstPosition].pointee = second
sequencedContents[secondPosition].pointee = first
}
}
}
/**
Tests if the ordered set contains any objects within a sequence.
- parameter other: The sequence to look for the intersection in.
@ -188,10 +187,10 @@ public class OrderedSet<T: Hashable> {
return true
}
}
return false
}
/**
Tests if a the ordered set is a subset of another sequence.
- parameter sequence: The sequence to check.
@ -203,10 +202,10 @@ public class OrderedSet<T: Hashable> {
return false
}
}
return true
}
/**
Moves an object to a different index, shifting all objects in between the movement.
This method is a no-op if the object doesn't exist in the set or the index is the
@ -219,33 +218,33 @@ public class OrderedSet<T: Hashable> {
if index < 0 || index >= count {
fatalError("Attempting to move an object at an index that does not exist")
}
if let position = contents[object] {
// Return if the client attempted to move to the current index
if position == index {
return
}
let adjustment = position > index ? -1 : 1
var currentIndex = position
while currentIndex != index {
let nextIndex = currentIndex + adjustment
let firstObject = sequencedContents[currentIndex].pointee
let secondObject = sequencedContents[nextIndex].pointee
sequencedContents[currentIndex].pointee = secondObject
sequencedContents[nextIndex].pointee = firstObject
contents[firstObject] = nextIndex
contents[secondObject] = currentIndex
currentIndex += adjustment
}
}
}
/**
Moves an object from one index to a different index, shifting all objects in between the movement.
This method is a no-op if the index is the same that the object is currently at.
@ -258,10 +257,10 @@ public class OrderedSet<T: Hashable> {
if ((index < 0 || index >= count) || (toIndex < 0 || toIndex >= count)) {
fatalError("Attempting to move an object at or to an index that does not exist")
}
moveObject(self[index], toIndex: toIndex)
}
/**
Inserts an object at a given index, shifting all objects above it up one.
This method will cause a fatal error if you attempt to insert the object out of bounds.
@ -273,19 +272,19 @@ public class OrderedSet<T: Hashable> {
if index > count || index < 0 {
fatalError("Attempting to insert an object at an index that does not exist")
}
if contents[object] != nil {
return
}
// Append our object, then swap them until its at the end.
append(object)
for i in (index..<count-1).reversed() {
swapObject(self[i], with: self[i+1])
}
}
/**
Inserts objects at a given index, shifting all objects above it up one.
This method will cause a fatal error if you attempt to insert the objects out of bounds.
@ -298,9 +297,9 @@ public class OrderedSet<T: Hashable> {
if index > count || index < 0 {
fatalError("Attempting to insert an object at an index that does not exist")
}
var addedObjectCount = 0
for object in objects {
if contents[object] == nil {
let seqIdx = index + addedObjectCount
@ -311,14 +310,14 @@ public class OrderedSet<T: Hashable> {
addedObjectCount += 1
}
}
// Now we'll remove duplicates and update the shifted objects position in the contents
// dictionary.
for i in index + addedObjectCount..<count {
contents[sequencedContents[i].pointee] = i
}
}
/// Returns the last object in the set, or `nil` if the set is empty.
public var last: T? {
return sequencedContents.last?.pointee
@ -330,42 +329,42 @@ extension OrderedSet: ExpressibleByArrayLiteral { }
extension OrderedSet where T: Comparable {}
extension OrderedSet {
public var count: Int {
return contents.count
}
public var isEmpty: Bool {
return count == 0
}
public var first: T? {
guard count > 0 else { return nil }
return sequencedContents[0].pointee
}
public func index(after i: Int) -> Int {
return sequencedContents.index(after: i)
}
public var startIndex: Int {
return 0
}
public var endIndex: Int {
return contents.count
}
public subscript(index: Index) -> T {
get {
return sequencedContents[index].pointee
}
set {
let previousCount = contents.count
contents[sequencedContents[index].pointee] = nil
contents[newValue] = index
// If the count is reduced we used an existing value, and need to sync up sequencedContents
if contents.count == previousCount {
sequencedContents[index].pointee = newValue
@ -374,12 +373,12 @@ extension OrderedSet {
}
}
}
}
extension OrderedSet: Sequence {
public typealias Iterator = OrderedSetGenerator<T>
public func makeIterator() -> Iterator {
return OrderedSetGenerator(set: self)
}
@ -388,11 +387,11 @@ extension OrderedSet: Sequence {
public struct OrderedSetGenerator<T: Hashable>: IteratorProtocol {
public typealias Element = T
private var generator: IndexingIterator<Array<UnsafeMutablePointer<T>>>
public init(set: OrderedSet<T>) {
generator = set.sequencedContents.makeIterator()
}
public mutating func next() -> Element? {
return generator.next()?.pointee
}
@ -403,7 +402,7 @@ extension OrderedSetGenerator where T: Comparable {}
public func +<T: Hashable, S: Sequence> (lhs: OrderedSet<T>, rhs: S) -> OrderedSet<T> where S.Iterator.Element == T {
let joinedSet = lhs
joinedSet.append(contentsOf: rhs)
return joinedSet
}
@ -414,7 +413,7 @@ public func +=<T: Hashable, S: Sequence> (lhs: inout OrderedSet<T>, rhs: S) wher
public func -<T: Hashable, S: Sequence> (lhs: OrderedSet<T>, rhs: S) -> OrderedSet<T> where S.Iterator.Element == T {
let purgedSet = lhs
purgedSet.remove(rhs)
return purgedSet
}
@ -428,13 +427,13 @@ public func ==<T: Hashable> (lhs: OrderedSet<T>, rhs: OrderedSet<T>) -> Bool {
if lhs.count != rhs.count {
return false
}
for object in lhs {
if lhs.contents[object] != rhs.contents[object] {
return false
}
}
return true
}

View File

@ -12,32 +12,31 @@ import Foundation
* A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
*/
open class ParseError {
private let pos : Int
private let errorMsg : String
private let pos: Int
private let errorMsg: String
init(_ pos: Int, _ errorMsg: String) {
self.pos = pos
self.errorMsg = errorMsg
}
/**
* Retrieve the error message.
* @return the error message.
*/
open func getErrorMessage()->String {
open func getErrorMessage() -> String {
return errorMsg
}
/**
* Retrieves the offset of the error.
* @return error offset within input
*/
open func getPosition()->Int {
open func getPosition() -> Int {
return pos
}
open func toString()->String {
open func toString() -> String {
return "\(pos): " + errorMsg
}
}

View File

@ -8,46 +8,45 @@
import Foundation
public class ParseErrorList
{
private static let INITIAL_CAPACITY : Int = 16
private let maxSize : Int
private let initialCapacity : Int
private var array : Array<ParseError?> = Array<ParseError>()
public class ParseErrorList {
private static let INITIAL_CAPACITY: Int = 16
private let maxSize: Int
private let initialCapacity: Int
private var array: Array<ParseError?> = Array<ParseError>()
init(_ initialCapacity: Int, _ maxSize: Int) {
self.maxSize = maxSize
self.initialCapacity = initialCapacity
array = Array(repeating: nil, count: maxSize)
}
func canAddError()->Bool {
func canAddError() -> Bool {
return array.count < maxSize
}
func getMaxSize()->Int {
func getMaxSize() -> Int {
return maxSize
}
static func noTracking()->ParseErrorList {
static func noTracking() -> ParseErrorList {
return ParseErrorList(0, 0)
}
static func tracking(_ maxSize: Int)->ParseErrorList {
static func tracking(_ maxSize: Int) -> ParseErrorList {
return ParseErrorList(INITIAL_CAPACITY, maxSize)
}
// // you need to provide the Equatable functionality
// static func ==(leftFoo: Foo, rightFoo: Foo) -> Bool {
// return ObjectIdentifier(leftFoo) == ObjectIdentifier(rightFoo)
// }
open func add(_ e: ParseError) {
array.append(e)
}
open func add(_ index: Int, _ element: ParseError){
open func add(_ index: Int, _ element: ParseError) {
array.insert(element, at: index)
}
}

View File

@ -8,22 +8,19 @@
import Foundation
open class ParseSettings
{
open class ParseSettings {
/**
* HTML default settings: both tag and attribute names are lower-cased during parsing.
*/
public static let htmlDefault : ParseSettings = ParseSettings(false, false)
public static let htmlDefault: ParseSettings = ParseSettings(false, false)
/**
* Preserve both tag and attribute case.
*/
public static let preserveCase : ParseSettings = ParseSettings(true, true)
private let preserveTagCase : Bool
private let preserveAttributeCase : Bool
public static let preserveCase: ParseSettings = ParseSettings(true, true)
private let preserveTagCase: Bool
private let preserveAttributeCase: Bool
/**
* Define parse settings.
* @param tag preserve tag case?
@ -33,32 +30,30 @@ open class ParseSettings
preserveTagCase = tag
preserveAttributeCase = attribute
}
open func normalizeTag(_ name: String)->String {
open func normalizeTag(_ name: String) -> String {
var name = name.trim()
if (!preserveTagCase){
if (!preserveTagCase) {
name = name.lowercased()
}
return name
}
open func normalizeAttribute(_ name: String)->String {
open func normalizeAttribute(_ name: String) -> String {
var name = name.trim()
if (!preserveAttributeCase){
if (!preserveAttributeCase) {
name = name.lowercased()
}
return name
}
open func normalizeAttributes(_ attributes: Attributes)throws ->Attributes {
if (!preserveAttributeCase) {
for attr in attributes.iterator()
{
for attr in attributes.iterator() {
try attr.setKey(key: attr.getKey().lowercased())
}
}
return attributes
}
}

View File

@ -8,20 +8,18 @@
import Foundation
/**
* Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the more convenient parse methods
* in {@link org.jsoup.Jsoup}.
*/
public class Parser
{
public class Parser {
private static let DEFAULT_MAX_ERRORS: Int = 0 // by default, error tracking is disabled.
private var _treeBuilder: TreeBuilder
private var _maxErrors: Int = DEFAULT_MAX_ERRORS
private var _errors: ParseErrorList = ParseErrorList(16,16)
private var _errors: ParseErrorList = ParseErrorList(16, 16)
private var _settings: ParseSettings
/**
* Create a new Parser, using the specified TreeBuilder
* @param treeBuilder TreeBuilder to use to parse input into Documents.
@ -30,69 +28,69 @@ public class Parser
self._treeBuilder = treeBuilder
_settings = treeBuilder.defaultSettings()
}
public func parseInput(_ html: String, _ baseUri: String)throws->Document {
_errors = isTrackErrors() ? ParseErrorList.tracking(_maxErrors) : ParseErrorList.noTracking()
return try _treeBuilder.parse(html, baseUri, _errors, _settings)
}
// gets & sets
/**
* Get the TreeBuilder currently in use.
* @return current TreeBuilder.
*/
public func getTreeBuilder()->TreeBuilder {
public func getTreeBuilder() -> TreeBuilder {
return _treeBuilder
}
/**
* Update the TreeBuilder used when parsing content.
* @param treeBuilder current TreeBuilder
* @return this, for chaining
*/
@discardableResult
public func setTreeBuilder(_ treeBuilder: TreeBuilder)->Parser {
public func setTreeBuilder(_ treeBuilder: TreeBuilder) -> Parser {
self._treeBuilder = treeBuilder
return self
}
/**
* Check if parse error tracking is enabled.
* @return current track error state.
*/
public func isTrackErrors()->Bool {
public func isTrackErrors() -> Bool {
return _maxErrors > 0
}
/**
* Enable or disable parse error tracking for the next parse.
* @param maxErrors the maximum number of errors to track. Set to 0 to disable.
* @return this, for chaining
*/
@discardableResult
public func setTrackErrors(_ maxErrors: Int)->Parser {
public func setTrackErrors(_ maxErrors: Int) -> Parser {
self._maxErrors = maxErrors
return self
}
/**
* Retrieve the parse errors, if any, from the last parse.
* @return list of parse errors, up to the size of the maximum errors tracked.
*/
public func getErrors()->ParseErrorList {
public func getErrors() -> ParseErrorList {
return _errors
}
@discardableResult
public func settings(_ settings: ParseSettings)->Parser {
public func settings(_ settings: ParseSettings) -> Parser {
self._settings = settings
return self
}
public func settings()->ParseSettings {
public func settings() -> ParseSettings {
return _settings
}
// static parse functions below
/**
* Parse HTML into a Document.
@ -106,7 +104,7 @@ public class Parser
let treeBuilder: TreeBuilder = HtmlTreeBuilder()
return try treeBuilder.parse(html, baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings())
}
/**
* Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
*
@ -121,7 +119,7 @@ public class Parser
let treeBuilder = HtmlTreeBuilder()
return try treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings())
}
/**
* Parse a fragment of XML into a list of nodes.
*
@ -133,7 +131,7 @@ public class Parser
let treeBuilder: XmlTreeBuilder = XmlTreeBuilder()
return try treeBuilder.parseFragment(fragmentXml, baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings())
}
/**
* Parse a fragment of HTML into the {@code body} of a Document.
*
@ -144,21 +142,19 @@ public class Parser
*/
public static func parseBodyFragment(_ bodyHtml: String, _ baseUri: String)throws->Document {
let doc: Document = Document.createShell(baseUri)
if let body: Element = doc.body()
{
if let body: Element = doc.body() {
let nodeList: Array<Node> = try parseFragment(bodyHtml, body, baseUri)
//var nodes: [Node] = nodeList.toArray(Node[nodeList.size()]) // the node list gets modified when re-parented
for i in (nodeList.count - 1)...1
{
for i in (nodeList.count - 1)...1 {
try nodeList[i].remove()
}
for node:Node in nodeList {
for node: Node in nodeList {
try body.appendChild(node)
}
}
return doc
}
/**
* Utility method to unescape HTML entities from a string
* @param string HTML escaped string
@ -169,7 +165,7 @@ public class Parser
let tokeniser: Tokeniser = Tokeniser(CharacterReader(string), ParseErrorList.noTracking())
return try tokeniser.unescapeEntities(inAttribute)
}
/**
* @param bodyHtml HTML to parse
* @param baseUri baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
@ -180,24 +176,24 @@ public class Parser
public static func parseBodyFragmentRelaxed(_ bodyHtml: String, _ baseUri: String)throws->Document {
return try parse(bodyHtml, baseUri)
}
// builders
/**
* Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document,
* based on a knowledge of the semantics of the incoming tags.
* @return a new HTML parser.
*/
public static func htmlParser()->Parser {
public static func htmlParser() -> Parser {
return Parser(HtmlTreeBuilder())
}
/**
* Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML,
* rather creates a simple tree directly from the input.
* @return a new simple XML parser.
*/
public static func xmlParser()->Parser {
public static func xmlParser() -> Parser {
return Parser(XmlTreeBuilder())
}
}

View File

@ -19,78 +19,69 @@ import Foundation
public struct Pattern {
public static let CASE_INSENSITIVE: Int = 0x02
let pattern: String
init(_ pattern: String) {
self.pattern = pattern
}
static public func compile(_ s: String)->Pattern
{
static public func compile(_ s: String) -> Pattern {
return Pattern(s)
}
static public func compile(_ s: String, _ op: Int)->Pattern
{
static public func compile(_ s: String, _ op: Int) -> Pattern {
return Pattern(s)
}
func validate()throws
{
func validate()throws {
_ = try NCRegularExpression(pattern: self.pattern, options:[])
}
func matcher(in text: String) -> Matcher {
do {
let regex = try NCRegularExpression(pattern: self.pattern, options:[])
let nsString = NSString(string: text)
let results = regex.matches(in: text,options:[], range: NSRange(location: 0, length: nsString.length))
return Matcher(results,text)
let results = regex.matches(in: text, options:[], range: NSRange(location: 0, length: nsString.length))
return Matcher(results, text)
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return Matcher([],text)
return Matcher([], text)
}
}
public func toString()->String{
public func toString() -> String {
return pattern
}
}
public class Matcher
{
let matches :[NCTextCheckingResult]
let string : String
var index : Int = -1
public var count : Int { return matches.count}
init(_ m:[NCTextCheckingResult],_ s: String)
{
public class Matcher {
let matches: [NCTextCheckingResult]
let string: String
var index: Int = -1
public var count: Int { return matches.count}
init(_ m: [NCTextCheckingResult], _ s: String) {
matches = m
string = s
}
@discardableResult
public func find() -> Bool
{
public func find() -> Bool {
index += 1
if(index < matches.count)
{
if(index < matches.count) {
return true
}
return false
}
public func group(_ i: Int) -> String?
{
public func group(_ i: Int) -> String? {
let b = matches[index]
let c = b.range(at:i)
if(c.location == NSNotFound) {return nil}
let result = string.substring(c.location,c.length)
let result = string.substring(c.location, c.length)
return result
}
public func group() -> String?
{
public func group() -> String? {
return group(0)
}
}

View File

@ -12,13 +12,13 @@ import Foundation
* Parses a CSS selector into an Evaluator tree.
*/
public class QueryParser {
private static let combinators : [String] = [",", ">", "+", "~", " "]
private static let AttributeEvals : [String] = ["=", "!=", "^=", "$=", "*=", "~="]
private static let combinators: [String] = [",", ">", "+", "~", " "]
private static let AttributeEvals: [String] = ["=", "!=", "^=", "$=", "*=", "~="]
private var tq: TokenQueue
private var query: String
private var evals: Array<Evaluator> = Array<Evaluator>()
/**
* Create a new QueryParser.
* @param query CSS query
@ -27,7 +27,7 @@ public class QueryParser {
self.query = query
self.tq = TokenQueue(query)
}
/**
* Parse a CSS query into an Evaluator.
* @param query CSS query
@ -37,25 +37,25 @@ public class QueryParser {
let p = QueryParser(query)
return try p.parse()
}
/**
* Parse the query
* @return Evaluator
*/
public func parse()throws->Evaluator {
tq.consumeWhitespace()
if (tq.matchesAny(QueryParser.combinators)) { // if starts with a combinator, use root as elements
evals.append( StructuralEvaluator.Root())
try combinator(tq.consume())
} else {
try findElements()
}
while (!tq.isEmpty()) {
// hierarchy and extras
let seenWhite: Bool = tq.consumeWhitespace()
if (tq.matchesAny(QueryParser.combinators)) {
try combinator(tq.consume())
} else if (seenWhite) {
@ -64,22 +64,22 @@ public class QueryParser {
try findElements() // take next el, #. etc off queue
}
}
if (evals.count == 1){
if (evals.count == 1) {
return evals[0]
}
return CombiningEvaluator.And(evals)
}
private func combinator(_ combinator: Character)throws {
tq.consumeWhitespace()
let subQuery: String = consumeSubQuery() // support multi > childs
var rootEval: Evaluator? // the new topmost evaluator
var currentEval: Evaluator? // the evaluator the new eval will be combined to. could be root, or rightmost or.
let newEval: Evaluator = try QueryParser.parse(subQuery) // the evaluator to add into target evaluator
var replaceRightMost: Bool = false
if (evals.count == 1) {
currentEval = evals[0]
rootEval = currentEval
@ -88,24 +88,15 @@ public class QueryParser {
currentEval = (currentEval as! CombiningEvaluator.Or).rightMostEvaluator()
replaceRightMost = true
}
}
else {
} else {
currentEval = CombiningEvaluator.And(evals)
rootEval = currentEval
}
evals.removeAll()
// for most combinators: change the current eval into an AND of the current eval and the new eval
if (combinator == ">")
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediateParent(currentEval!))}
else if (combinator == " ")
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.Parent(currentEval!))}
else if (combinator == "+")
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediatePreviousSibling(currentEval!))}
else if (combinator == "~")
{currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.PreviousSibling(currentEval!))}
else if (combinator == ",") { // group or.
let or : CombiningEvaluator.Or
if (combinator == ">") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediateParent(currentEval!))} else if (combinator == " ") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.Parent(currentEval!))} else if (combinator == "+") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediatePreviousSibling(currentEval!))} else if (combinator == "~") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.PreviousSibling(currentEval!))} else if (combinator == ",") { // group or.
let or: CombiningEvaluator.Or
if ((currentEval as? CombiningEvaluator.Or) != nil) {
or = currentEval as! CombiningEvaluator.Or
or.add(newEval)
@ -115,117 +106,61 @@ public class QueryParser {
or.add(newEval)
}
currentEval = or
}
else{
} else {
throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Unknown combinator: \(String(combinator))")
}
if (replaceRightMost)
{
if (replaceRightMost) {
(rootEval as! CombiningEvaluator.Or).replaceRightMostEvaluator(currentEval!)
}
else {
} else {
rootEval = currentEval
}
evals.append(rootEval!)
}
private func consumeSubQuery()->String {
let sq : StringBuilder = StringBuilder()
private func consumeSubQuery() -> String {
let sq: StringBuilder = StringBuilder()
while (!tq.isEmpty()) {
if (tq.matches("(")){
if (tq.matches("(")) {
sq.append("(").append(tq.chompBalanced("(", ")")).append(")")
}else if (tq.matches("[")){
} else if (tq.matches("[")) {
sq.append("[").append(tq.chompBalanced("[", "]")).append("]")
}else if (tq.matchesAny(QueryParser.combinators)){
} else if (tq.matchesAny(QueryParser.combinators)) {
break
}else{
} else {
sq.append(tq.consume())
}
}
return sq.toString()
}
private func findElements()throws {
if (tq.matchChomp("#"))
{
if (tq.matchChomp("#")) {
try byId()
}else if (tq.matchChomp("."))
{
try byClass()}
else if (tq.matchesWord() || tq.matches("*|"))
{try byTag()}
else if (tq.matches("["))
{try byAttribute()}
else if (tq.matchChomp("*"))
{ allElements()}
else if (tq.matchChomp(":lt("))
{try indexLessThan()}
else if (tq.matchChomp(":gt("))
{try indexGreaterThan()}
else if (tq.matchChomp(":eq("))
{try indexEquals()}
else if (tq.matches(":has("))
{try has()}
else if (tq.matches(":contains("))
{try contains(false)}
else if (tq.matches(":containsOwn("))
{try contains(true)}
else if (tq.matches(":matches("))
{try matches(false)}
else if (tq.matches(":matchesOwn("))
{try matches(true)}
else if (tq.matches(":not("))
{try not()}
else if (tq.matchChomp(":nth-child("))
{try cssNthChild(false, false)}
else if (tq.matchChomp(":nth-last-child("))
{try cssNthChild(true, false)}
else if (tq.matchChomp(":nth-of-type("))
{try cssNthChild(false, true)}
else if (tq.matchChomp(":nth-last-of-type("))
{try cssNthChild(true, true)}
else if (tq.matchChomp(":first-child"))
{evals.append(Evaluator.IsFirstChild())}
else if (tq.matchChomp(":last-child"))
{evals.append(Evaluator.IsLastChild())}
else if (tq.matchChomp(":first-of-type"))
{evals.append(Evaluator.IsFirstOfType())}
else if (tq.matchChomp(":last-of-type"))
{evals.append(Evaluator.IsLastOfType())}
else if (tq.matchChomp(":only-child"))
{evals.append(Evaluator.IsOnlyChild())}
else if (tq.matchChomp(":only-of-type"))
{evals.append(Evaluator.IsOnlyOfType())}
else if (tq.matchChomp(":empty"))
{evals.append(Evaluator.IsEmpty())}
else if (tq.matchChomp(":root"))
{evals.append(Evaluator.IsRoot())}
else // unhandled
} else if (tq.matchChomp(".")) {
try byClass()} else if (tq.matchesWord() || tq.matches("*|")) {try byTag()} else if (tq.matches("[")) {try byAttribute()} else if (tq.matchChomp("*")) { allElements()} else if (tq.matchChomp(":lt(")) {try indexLessThan()} else if (tq.matchChomp(":gt(")) {try indexGreaterThan()} else if (tq.matchChomp(":eq(")) {try indexEquals()} else if (tq.matches(":has(")) {try has()} else if (tq.matches(":contains(")) {try contains(false)} else if (tq.matches(":containsOwn(")) {try contains(true)} else if (tq.matches(":matches(")) {try matches(false)} else if (tq.matches(":matchesOwn(")) {try matches(true)} else if (tq.matches(":not(")) {try not()} else if (tq.matchChomp(":nth-child(")) {try cssNthChild(false, false)} else if (tq.matchChomp(":nth-last-child(")) {try cssNthChild(true, false)} else if (tq.matchChomp(":nth-of-type(")) {try cssNthChild(false, true)} else if (tq.matchChomp(":nth-last-of-type(")) {try cssNthChild(true, true)} else if (tq.matchChomp(":first-child")) {evals.append(Evaluator.IsFirstChild())} else if (tq.matchChomp(":last-child")) {evals.append(Evaluator.IsLastChild())} else if (tq.matchChomp(":first-of-type")) {evals.append(Evaluator.IsFirstOfType())} else if (tq.matchChomp(":last-of-type")) {evals.append(Evaluator.IsLastOfType())} else if (tq.matchChomp(":only-child")) {evals.append(Evaluator.IsOnlyChild())} else if (tq.matchChomp(":only-of-type")) {evals.append(Evaluator.IsOnlyOfType())} else if (tq.matchChomp(":empty")) {evals.append(Evaluator.IsEmpty())} else if (tq.matchChomp(":root")) {evals.append(Evaluator.IsRoot())} else // unhandled
{
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse query \(query): unexpected token at \(tq.remainder())")
}
}
private func byId()throws {
let id: String = tq.consumeCssIdentifier()
try Validate.notEmpty(string: id)
evals.append(Evaluator.Id(id))
}
private func byClass()throws {
let className: String = tq.consumeCssIdentifier()
try Validate.notEmpty(string: className)
evals.append(Evaluator.Class(className.trim()))
}
private func byTag()throws {
var tagName = tq.consumeElementSelector()
try Validate.notEmpty(string: tagName)
// namespaces: wildcard match equals(tagName) or ending in ":"+tagName
if (tagName.startsWith("*|")) {
evals.append(
@ -234,76 +169,66 @@ public class QueryParser {
Evaluator.TagEndsWith(tagName.replacingOccurrences(of: "*|", with: ":").trim().lowercased())))
} else {
// namespaces: if element name is "abc:def", selector must be "abc|def", so flip:
if (tagName.contains("|")){
if (tagName.contains("|")) {
tagName = tagName.replacingOccurrences(of: "|", with: ":")
}
evals.append(Evaluator.Tag(tagName.trim()))
}
}
private func byAttribute()throws {
let cq: TokenQueue = TokenQueue(tq.chompBalanced("[", "]")) // content queue
let key: String = cq.consumeToAny(QueryParser.AttributeEvals) // eq, not, start, end, contain, match, (no val)
try Validate.notEmpty(string: key)
cq.consumeWhitespace()
if (cq.isEmpty()) {
if (key.startsWith("^")){
if (key.startsWith("^")) {
evals.append(try Evaluator.AttributeStarting(key.substring(1)))
}else{
} else {
evals.append(Evaluator.Attribute(key))
}
} else {
if (cq.matchChomp("=")){
if (cq.matchChomp("=")) {
evals.append(try Evaluator.AttributeWithValue(key, cq.remainder()))
}
else if (cq.matchChomp("!=")){
} else if (cq.matchChomp("!=")) {
evals.append(try Evaluator.AttributeWithValueNot(key, cq.remainder()))
}
else if (cq.matchChomp("^=")){
} else if (cq.matchChomp("^=")) {
evals.append(try Evaluator.AttributeWithValueStarting(key, cq.remainder()))
}
else if (cq.matchChomp("$=")){
} else if (cq.matchChomp("$=")) {
evals.append(try Evaluator.AttributeWithValueEnding(key, cq.remainder()))
}
else if (cq.matchChomp("*=")){
} else if (cq.matchChomp("*=")) {
evals.append(try Evaluator.AttributeWithValueContaining(key, cq.remainder()))
}
else if (cq.matchChomp("~=")){
} else if (cq.matchChomp("~=")) {
evals.append( Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder())))
}else{
} else {
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse attribute query '\(query)': unexpected token at '\(cq.remainder())'")
}
}
}
private func allElements() {
evals.append(Evaluator.AllElements())
}
// pseudo selectors :lt, :gt, :eq
private func indexLessThan()throws {
evals.append(Evaluator.IndexLessThan(try consumeIndex()))
}
private func indexGreaterThan()throws {
evals.append(Evaluator.IndexGreaterThan(try consumeIndex()))
}
private func indexEquals()throws {
evals.append(Evaluator.IndexEquals(try consumeIndex()))
}
//pseudo selectors :first-child, :last-child, :nth-child, ...
private static let NTH_AB: Pattern = Pattern.compile("((\\+|-)?(\\d+)?)n(\\s*(\\+|-)?\\s*\\d+)?", Pattern.CASE_INSENSITIVE)
private static let NTH_B: Pattern = Pattern.compile("(\\+|-)?(\\d+)")
private func cssNthChild(_ backwards: Bool, _ ofType: Bool)throws {
let argS: String = tq.chompTo(")").trim().lowercased()
let mAB: Matcher = QueryParser.NTH_AB.matcher(in: argS)
@ -327,32 +252,27 @@ public class QueryParser {
} else {
throw Exception.Error(type: ExceptionType.SelectorParseException, Message:"Could not parse nth-index '\(argS)': unexpected format")
}
if (ofType){
if (backwards){
if (ofType) {
if (backwards) {
evals.append(Evaluator.IsNthLastOfType(a, b))
}else{
} else {
evals.append(Evaluator.IsNthOfType(a, b))
}
}else {
if (backwards){
} else {
if (backwards) {
evals.append(Evaluator.IsNthLastChild(a, b))
}else{
} else {
evals.append(Evaluator.IsNthChild(a, b))
}
}
}
private func consumeIndex()throws->Int {
let indexS: String = tq.chompTo(")").trim()
try Validate.isTrue(val: StringUtil.isNumeric(indexS), msg: "Index must be numeric")
return Int(indexS)!
}
// pseudo selector :has(el)
private func has()throws {
try tq.consume(":has")
@ -360,39 +280,39 @@ public class QueryParser {
try Validate.notEmpty(string: subQuery, msg: ":has(el) subselect must not be empty")
evals.append(StructuralEvaluator.Has(try QueryParser.parse(subQuery)))
}
// pseudo selector :contains(text), containsOwn(text)
private func contains(_ own: Bool)throws {
try tq.consume(own ? ":containsOwn" : ":contains")
let searchText: String = TokenQueue.unescape(tq.chompBalanced("(", ")"))
try Validate.notEmpty(string: searchText, msg: ":contains(text) query must not be empty")
if (own){
if (own) {
evals.append(Evaluator.ContainsOwnText(searchText))
}else{
} else {
evals.append(Evaluator.ContainsText(searchText))
}
}
// :matches(regex), matchesOwn(regex)
private func matches(_ own: Bool)throws {
try tq.consume(own ? ":matchesOwn" : ":matches")
let regex: String = tq.chompBalanced("(", ")") // don't unescape, as regex bits will be escaped
try Validate.notEmpty(string: regex, msg: ":matches(regex) query must not be empty")
if (own){
if (own) {
evals.append(Evaluator.MatchesOwn(Pattern.compile(regex)))
}else{
} else {
evals.append(Evaluator.Matches(Pattern.compile(regex)))
}
}
// :not(selector)
private func not()throws {
try tq.consume(":not")
let subQuery: String = tq.chompBalanced("(", ")")
try Validate.notEmpty(string: subQuery, msg: ":not(selector) subselect must not be empty")
evals.append(StructuralEvaluator.Not(try QueryParser.parse(subQuery)))
}
}

View File

@ -73,23 +73,23 @@ import Foundation
* @see Element#select(String)
*/
open class Selector {
private let evaluator : Evaluator
private let root : Element
private let evaluator: Evaluator
private let root: Element
private init(_ query: String, _ root: Element)throws {
let query = query.trim()
try Validate.notEmpty(string: query)
self.evaluator = try QueryParser.parse(query)
self.root = root
}
private init(_ evaluator: Evaluator, _ root: Element) {
self.evaluator = evaluator
self.root = root
}
/**
* Find elements matching selector.
*
@ -101,7 +101,7 @@ open class Selector {
public static func select(_ query: String, _ root: Element)throws->Elements {
return try Selector(query, root).select()
}
/**
* Find elements matching selector.
*
@ -112,7 +112,7 @@ open class Selector {
public static func select(_ evaluator: Evaluator, _ root: Element)throws->Elements {
return try Selector(evaluator, root).select()
}
/**
* Find elements matching selector.
*
@ -126,12 +126,10 @@ open class Selector {
var elements: Array<Element> = Array<Element>()
var seenElements: Array<Element> = Array<Element>()
// dedupe elements by identity, not equality
for root: Element in roots
{
for root: Element in roots {
let found: Elements = try select(evaluator, root)
for el: Element in found.array()
{
for el: Element in found.array() {
if (!seenElements.contains(el)) {
elements.append(el)
seenElements.append(el)
@ -140,13 +138,13 @@ open class Selector {
}
return Elements(elements)
}
private func select()throws->Elements {
return try Collector.collect(evaluator, root)
}
// exclude set. package open so that Elements can implement .not() selector.
static func filterOut(_ elements: Array<Element>, _ outs: Array<Element>)->Elements {
static func filterOut(_ elements: Array<Element>, _ outs: Array<Element>) -> Elements {
let output: Elements = Elements()
for el: Element in elements {
var found: Bool = false
@ -156,7 +154,7 @@ open class Selector {
break
}
}
if (!found){
if (!found) {
output.add(el)
}
}

View File

@ -8,35 +8,32 @@
import Foundation
public class SimpleDictionary<KeyType: Hashable, ValueType> {
public typealias DictionaryType = [KeyType: ValueType]
public private(set) var values = DictionaryType()
public init() {
}
public var count: Int {
return values.count
}
public func remove(_ key: KeyType) {
values.removeValue(forKey: key)
}
public func contains(_ key: KeyType) -> Bool {
return self.values[key] != nil
}
public func put(_ value: ValueType, forKey key: KeyType) {
self.values[key] = value
}
public func get(_ key: KeyType)->ValueType?{
public func get(_ key: KeyType) -> ValueType? {
return self.values[key]
}
}

View File

@ -8,18 +8,18 @@
import Foundation
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
class StreamReader {
let encoding: String.Encoding
let chunkSize: Int
var fileHandle: FileHandle!
let delimData: Data
var buffer: Data
var atEof: Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
@ -31,15 +31,15 @@ class StreamReader {
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
@ -65,14 +65,14 @@ class StreamReader {
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()

View File

@ -9,54 +9,48 @@
import Foundation
extension String {
subscript (i: Int) -> Character {
return self[self.characters.index(self.startIndex, offsetBy: i)]
}
subscript (i: Int) -> String {
return String(self[i] as Character)
}
func unicodeScalar(_ i: Int)->UnicodeScalar{
func unicodeScalar(_ i: Int) -> UnicodeScalar {
return self.unicodeScalars.prefix(i+1).last!
}
func string(_ offset: Int, _ count: Int)->String{
func string(_ offset: Int, _ count: Int) -> String {
let truncStart = self.unicodeScalars.count-offset
return String(self.unicodeScalars.suffix(truncStart).prefix(count))
}
static func split(_ value: String,_ offset : Int, _ count: Int) -> String
{
static func split(_ value: String, _ offset: Int, _ count: Int) -> String {
let start = value.index(value.startIndex, offsetBy: offset)
let end = value.index(value.startIndex, offsetBy: count+offset)
let range = start..<end
return value.substring(with: range)
}
func isEmptyOrWhitespace() -> Bool {
if(self.isEmpty) {
return true
}
return (self.trimmingCharacters(in: CharacterSet.whitespaces) == "")
}
func startsWith(_ string:String) -> Bool
{
func startsWith(_ string: String) -> Bool {
return self.hasPrefix(string)
}
func indexOf(_ substring: String, _ offset: Int ) -> Int {
if(offset > characters.count){return -1}
if(offset > characters.count) {return -1}
let maxIndex = self.characters.count - substring.characters.count
if(maxIndex >= 0)
{
if(maxIndex >= 0) {
for index in offset...maxIndex {
let rangeSubstring = self.characters.index(self.startIndex, offsetBy: index)..<self.characters.index(self.startIndex, offsetBy: index + substring.characters.count)
if self.substring(with: rangeSubstring) == substring {
@ -66,125 +60,116 @@ extension String {
}
return -1
}
func indexOf(_ substring: String) -> Int {
return self.indexOf(substring, 0)
}
func trim() -> String {
return trimmingCharacters(in: NSCharacterSet.whitespacesAndNewlines)
}
func equalsIgnoreCase(string:String?)->Bool
{
if(string == nil){return false}
func equalsIgnoreCase(string: String?) -> Bool {
if(string == nil) {return false}
return caseInsensitiveCompare(string!) == ComparisonResult.orderedSame
}
static func toHexString(n:Int)->String{
static func toHexString(n: Int) -> String {
return String(format:"%2x", n)
}
func toCharArray() -> [Int] {
return characters.flatMap{Int($0.unicodeScalar.value)}
return characters.flatMap {Int($0.unicodeScalar.value)}
}
func insert(string:String,ind:Int) -> String {
func insert(string: String, ind: Int) -> String {
return String(self.characters.prefix(ind)) + string + String(self.characters.suffix(self.characters.count-ind))
}
func charAt(_ i:Int) -> Character {
func charAt(_ i: Int) -> Character {
return self[i] as Character
}
func substring(_ beginIndex: Int) -> String {
return String.split(self, beginIndex, self.characters.count-beginIndex)
}
func substring(_ beginIndex: Int, _ count: Int) -> String {
return String.split(self, beginIndex, count)
}
func regionMatches(_ ignoreCase: Bool, _ selfOffset: Int, _ other: String, _ otherOffset: Int, _ length: Int )->Bool{
func regionMatches(_ ignoreCase: Bool, _ selfOffset: Int, _ other: String, _ otherOffset: Int, _ length: Int ) -> Bool {
if ((otherOffset < 0) || (selfOffset < 0)
|| (selfOffset > self.characters.count - length)
|| (otherOffset > other.characters.count - length)) {
return false
}
for i in 0..<length
{
let charSelf : Character = self[i+selfOffset]
let charOther : Character = other[i+otherOffset]
if(ignoreCase){
if(charSelf.lowercase != charOther.lowercase){
for i in 0..<length {
let charSelf: Character = self[i+selfOffset]
let charOther: Character = other[i+otherOffset]
if(ignoreCase) {
if(charSelf.lowercase != charOther.lowercase) {
return false
}
}else{
if(charSelf != charOther){
} else {
if(charSelf != charOther) {
return false
}
}
}
return true
}
func startsWith(_ input: String , _ offset: Int)->Bool
{
func startsWith(_ input: String, _ offset: Int) -> Bool {
if ((offset < 0) || (offset > characters.count - input.characters.count)) {
return false
}
for i in 0..<input.characters.count
{
let charSelf : Character = self[i+offset]
let charOther : Character = input[i]
if(charSelf != charOther){return false}
for i in 0..<input.characters.count {
let charSelf: Character = self[i+offset]
let charOther: Character = input[i]
if(charSelf != charOther) {return false}
}
return true
}
func replaceFirst(of pattern:String,with replacement:String) -> String {
if let range = self.range(of: pattern){
func replaceFirst(of pattern: String, with replacement: String) -> String {
if let range = self.range(of: pattern) {
return self.replacingCharacters(in: range, with: replacement)
}else{
} else {
return self
}
}
func replaceAll(of pattern:String,with replacement:String,options: NCRegularExpression.Options = []) -> String{
do{
func replaceAll(of pattern: String, with replacement: String, options: NCRegularExpression.Options = []) -> String {
do {
let regex = try NCRegularExpression(pattern: pattern, options: [])
let range = NSRange(0..<self.utf16.count)
return regex.stringByReplacingMatches(in: self, options: [],
range: range, withTemplate: replacement)
}catch{
} catch {
NSLog("replaceAll error: \(error)")
return self
}
}
func equals(_ s: String?)->Bool{
if(s == nil){return false}
func equals(_ s: String?) -> Bool {
if(s == nil) {return false}
return self == s!
}
static func unicodescalars ( _ scalars: [UnicodeScalar]) -> String {
return String(scalars.flatMap { Character($0) })
}
}
extension String.Encoding
{
func canEncode(_ string: String) -> Bool
{
extension String.Encoding {
func canEncode(_ string: String) -> Bool {
return string.cString(using: self) != nil
}
public func displayName()->String{
public func displayName() -> String {
switch self {
case String.Encoding.ascii: return "US-ASCII"
case String.Encoding.nextstep: return "nextstep"
@ -214,9 +199,3 @@ extension String.Encoding
}
}
}

View File

@ -4,7 +4,7 @@
*/
open class StringBuilder {
fileprivate var stringValue: String
/**
Construct with initial String contents
@ -13,11 +13,11 @@ open class StringBuilder {
public init(string: String = "") {
self.stringValue = string
}
public init(_ size: Int) {
self.stringValue = ""
}
/**
Return the String object
@ -26,7 +26,7 @@ open class StringBuilder {
open func toString() -> String {
return stringValue
}
/**
Return the current length of the String object
*/
@ -34,7 +34,7 @@ open class StringBuilder {
return self.stringValue.characters.count
//return countElements(stringValue)
}
/**
Append a String to the object
@ -45,33 +45,31 @@ open class StringBuilder {
open func append(_ string: String) {
stringValue += string
}
open func appendCodePoint(_ chr: Character) {
stringValue = stringValue + String(chr)
}
open func appendCodePoints(_ chr: [Character]) {
for c in chr{
for c in chr {
stringValue = stringValue + String(c)
}
}
open func appendCodePoint(_ ch: Int) {
stringValue = stringValue + String(Character(UnicodeScalar(ch)!))
}
open func appendCodePoint(_ ch: UnicodeScalar) {
stringValue = stringValue + String(ch)
}
open func appendCodePoints(_ chr: [UnicodeScalar]) {
for c in chr{
for c in chr {
stringValue = stringValue + String(c)
}
}
/**
Append a Printable to the object
@ -84,9 +82,9 @@ open class StringBuilder {
stringValue += value.description
return self
}
@discardableResult
open func insert<T: CustomStringConvertible>(_ offset: Int ,_ value: T) -> StringBuilder {
open func insert<T: CustomStringConvertible>(_ offset: Int, _ value: T) -> StringBuilder {
stringValue = stringValue.insert(string: value.description, ind: offset)
return self
}
@ -116,7 +114,7 @@ open class StringBuilder {
stringValue += value.description + "\n"
return self
}
/**
Reset the object to an empty string
@ -160,4 +158,3 @@ public func += <T: CustomStringConvertible>(lhs: StringBuilder, rhs: T) {
public func +(lhs: StringBuilder, rhs: StringBuilder) -> StringBuilder {
return StringBuilder(string: lhs.toString() + rhs.toString())
}

View File

@ -11,35 +11,33 @@ import Foundation
/**
* A minimal String utility class. Designed for internal jsoup use only.
*/
open class StringUtil
{
open class StringUtil {
enum StringError: Error {
case empty
case short
case error(String)
}
// memoised padding up to 10
fileprivate static var padding : [String] = ["", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "]
fileprivate static var padding: [String] = ["", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "]
/**
* Join a collection of strings by a seperator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
open static func join(_ strings:[String], sep:String) -> String {
open static func join(_ strings: [String], sep: String) -> String {
return strings.joined(separator: sep)
}
open static func join(_ strings:Set<String>, sep:String) -> String {
open static func join(_ strings: Set<String>, sep: String) -> String {
return strings.joined(separator: sep)
}
open static func join(_ strings:OrderedSet<String>, sep:String) -> String {
open static func join(_ strings: OrderedSet<String>, sep: String) -> String {
return strings.joined(separator: sep)
}
// /**
// * Join a collection of strings by a seperator
// * @param strings iterator of string objects
@ -66,138 +64,128 @@ open class StringUtil
* @param width amount of padding desired
* @return string of spaces * width
*/
open static func padding(_ width:Int) -> String{
if(width <= 0){
open static func padding(_ width: Int) -> String {
if(width <= 0) {
return ""
}
if (width < padding.count){
if (width < padding.count) {
return padding[width]
}
var out: [Character] = [Character]()
for _ in 0..<width
{
for _ in 0..<width {
out.append(" ")
}
return String(out)
}
/**
* Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* @return if string is blank
*/
open static func isBlank(_ string:String) -> Bool {
if (string.characters.count == 0){
open static func isBlank(_ string: String) -> Bool {
if (string.characters.count == 0) {
return true
}
for chr in string.characters
{
if (!StringUtil.isWhitespace(chr)){
for chr in string.characters {
if (!StringUtil.isWhitespace(chr)) {
return false
}
}
return true
}
/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chrs
*/
open static func isNumeric(_ string:String) -> Bool {
if (string.characters.count == 0){
open static func isNumeric(_ string: String) -> Bool {
if (string.characters.count == 0) {
return false
}
for chr in string.characters
{
if !("0"..."9" ~= chr)
{
for chr in string.characters {
if !("0"..."9" ~= chr) {
return false
}
}
return true
}
/**
* Tests if a code point is "whitespace" as defined in the HTML spec.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
*/
open static func isWhitespace(_ c:Character) -> Bool
{
open static func isWhitespace(_ c: Character) -> Bool {
//(c == " " || c == "\t" || c == "\n" || (c == "\f" ) || c == "\r")
return c.isWhitespace
}
/**
* Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
* (e.g. newline, tab) convert to a simple space
* @param string content to normalise
* @return normalised string
*/
open static func normaliseWhitespace(_ string:String) -> String {
let sb : StringBuilder = StringBuilder.init()
open static func normaliseWhitespace(_ string: String) -> String {
let sb: StringBuilder = StringBuilder.init()
appendNormalisedWhitespace(sb, string: string, stripLeading: false)
return sb.toString()
}
/**
* After normalizing the whitespace within a string, appends it to a string builder.
* @param accum builder to append to
* @param string string to normalize whitespace within
* @param stripLeading set to true if you wish to remove any leading whitespace
*/
open static func appendNormalisedWhitespace(_ accum:StringBuilder, string: String , stripLeading:Bool ) {
var lastWasWhite : Bool = false
open static func appendNormalisedWhitespace(_ accum: StringBuilder, string: String, stripLeading: Bool ) {
var lastWasWhite: Bool = false
var reachedNonWhite: Bool = false
for c in string.characters
{
if (isWhitespace(c))
{
if ((stripLeading && !reachedNonWhite) || lastWasWhite){
for c in string.characters {
if (isWhitespace(c)) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite) {
continue
}
accum.append(" ")
lastWasWhite = true
}
else
{
} else {
accum.appendCodePoint(c)
lastWasWhite = false
reachedNonWhite = true
}
}
}
open static func inString(_ needle:String? , haystack:String...) -> Bool {
return inString(needle,haystack)
open static func inString(_ needle: String?, haystack: String...) -> Bool {
return inString(needle, haystack)
}
open static func inString(_ needle:String? , _ haystack:[String?]) -> Bool {
if(needle == nil){return false}
for hay in haystack
{
if(hay != nil && hay!.compare(needle!) == ComparisonResult.orderedSame){
open static func inString(_ needle: String?, _ haystack: [String?]) -> Bool {
if(needle == nil) {return false}
for hay in haystack {
if(hay != nil && hay!.compare(needle!) == ComparisonResult.orderedSame) {
return true
}
}
return false
}
open static func inSorted(_ needle:String, haystack:[String]) -> Bool {
open static func inSorted(_ needle: String, haystack: [String]) -> Bool {
return binarySearch(haystack, searchItem: needle) >= 0
}
open static func binarySearch<T:Comparable>(_ inputArr:Array<T>, searchItem: T)->Int{
open static func binarySearch<T: Comparable>(_ inputArr: Array<T>, searchItem: T) -> Int {
var lowerIndex = 0
var upperIndex = inputArr.count - 1
while (true) {
let currentIndex = (lowerIndex + upperIndex)/2
if(inputArr[currentIndex] == searchItem) {
@ -213,7 +201,7 @@ open class StringUtil
}
}
}
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param base the existing absolulte base URL
@ -222,45 +210,43 @@ open class StringUtil
* @throws MalformedURLException if an error occurred generating the URL
*/
//NOTE: Not sure it work
open static func resolve(_ base:URL, relUrl: String ) -> URL? {
open static func resolve(_ base: URL, relUrl: String ) -> URL? {
var base = base
if(base.pathComponents.count == 0 && base.absoluteString.characters.last != "/" && !base.isFileURL)
{
if(base.pathComponents.count == 0 && base.absoluteString.characters.last != "/" && !base.isFileURL) {
base = base.appendingPathComponent("/", isDirectory: false)
}
let u = URL(string: relUrl, relativeTo : base)
return u
}
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param baseUrl the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return an absolute URL if one was able to be generated, or the empty string if not
*/
open static func resolve(_ baseUrl : String , relUrl : String ) -> String {
open static func resolve(_ baseUrl: String, relUrl: String ) -> String {
let base = URL(string: baseUrl)
if(base == nil || base?.scheme == nil)
{
if(base == nil || base?.scheme == nil) {
let abs = URL(string: relUrl)
return abs != nil && abs?.scheme != nil ? abs!.absoluteURL.absoluteString : ""
}else{
} else {
let url = resolve(base!, relUrl: relUrl)
if(url != nil){
if(url != nil) {
let ext = url!.absoluteURL.absoluteString
return ext
}
if(base != nil && base?.scheme != nil){
if(base != nil && base?.scheme != nil) {
let ext = base!.absoluteString
return ext
}
return ""
}
// try {
// try {
// base = new URL(baseUrl)
@ -273,19 +259,7 @@ open class StringUtil
// } catch (MalformedURLException e) {
// return ""
// }
}
}

View File

@ -11,167 +11,165 @@ import Foundation
/**
* Base structural evaluator.
*/
public class StructuralEvaluator : Evaluator {
public class StructuralEvaluator: Evaluator {
let evaluator: Evaluator
public init(_ evaluator: Evaluator) {
self.evaluator = evaluator
}
public class Root : Evaluator {
public override func matches(_ root: Element, _ element: Element)->Bool {
public class Root: Evaluator {
public override func matches(_ root: Element, _ element: Element) -> Bool {
return root === element
}
}
public class Has : StructuralEvaluator {
public class Has: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ element: Element)throws->Bool {
for e in try element.getAllElements().array() {
do{
if(e != element){
if ((try evaluator.matches(root, e)))
{
do {
if(e != element) {
if ((try evaluator.matches(root, e))) {
return true
}
}
}catch{}
} catch {}
}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":has(\(evaluator.toString()))"
}
}
public class Not : StructuralEvaluator {
public class Not: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ node: Element)->Bool {
do{
public override func matches(_ root: Element, _ node: Element) -> Bool {
do {
return try !evaluator.matches(root, node)
}catch{}
} catch {}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":not\(evaluator.toString())"
}
}
public class Parent : StructuralEvaluator {
public class Parent: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ element: Element)->Bool {
if (root == element){
public override func matches(_ root: Element, _ element: Element) -> Bool {
if (root == element) {
return false
}
var parent = element.parent()
while (true) {
do{
if parent != nil{
if (try evaluator.matches(root, parent!)){
do {
if parent != nil {
if (try evaluator.matches(root, parent!)) {
return true
}
}
}catch{}
if (parent == root){
} catch {}
if (parent == root) {
break
}
parent = parent?.parent()
}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":parent\(evaluator.toString())"
}
}
public class ImmediateParent : StructuralEvaluator {
public class ImmediateParent: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ element: Element)->Bool {
if (root == element){
public override func matches(_ root: Element, _ element: Element) -> Bool {
if (root == element) {
return false
}
if let parent = element.parent(){
do{
if let parent = element.parent() {
do {
return try evaluator.matches(root, parent)
}catch{}
} catch {}
}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":ImmediateParent\(evaluator.toString())"
}
}
public class PreviousSibling : StructuralEvaluator {
public class PreviousSibling: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ element: Element)throws->Bool {
if (root == element){
if (root == element) {
return false
}
var prev = try element.previousElementSibling()
while (prev != nil) {
do{
if (try evaluator.matches(root, prev!)){
do {
if (try evaluator.matches(root, prev!)) {
return true
}
}catch{}
} catch {}
prev = try prev!.previousElementSibling()
}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":prev*\(evaluator.toString())"
}
}
class ImmediatePreviousSibling : StructuralEvaluator {
class ImmediatePreviousSibling: StructuralEvaluator {
public override init(_ evaluator: Evaluator) {
super.init(evaluator)
}
public override func matches(_ root: Element, _ element: Element)throws->Bool {
if (root == element){
if (root == element) {
return false
}
if let prev = try element.previousElementSibling(){
do{
if let prev = try element.previousElementSibling() {
do {
return try evaluator.matches(root, prev)
}catch{}
} catch {}
}
return false
}
public override func toString()->String {
public override func toString() -> String {
return ":prev\(evaluator.toString())"
}
}

View File

@ -8,13 +8,12 @@
import Foundation
/**
The core public access point to the jsoup functionality.
*/
open class SwiftSoup {
private init(){}
private init() {}
/**
Parse HTML into a Document. The parser will make a sensible, balanced document tree out of any HTML.
@ -26,7 +25,7 @@ open class SwiftSoup {
public static func parse(_ html: String, _ baseUri: String)throws->Document {
return try Parser.parse(html, baseUri)
}
/**
Parse HTML into a Document, using the provided Parser. You can provide an alternate parser, such as a simple XML
(non-HTML) parser.
@ -40,7 +39,7 @@ open class SwiftSoup {
public static func parse(_ html: String, _ baseUri: String, _ parser: Parser)throws->Document {
return try parser.parseInput(html, baseUri)
}
/**
Parse HTML into a Document. As no base URI is specified, absolute URL detection relies on the HTML including a
{@code <base href>} tag.
@ -69,7 +68,7 @@ open class SwiftSoup {
// public static Connection connect(String url) {
// return HttpConnection.connect(url);
// }
//todo:
// /**
// Parse the contents of a file as HTML.
@ -85,7 +84,7 @@ open class SwiftSoup {
// public static Document parse(File in, String charsetName, String baseUri) throws IOException {
// return DataUtil.load(in, charsetName, baseUri);
// }
//todo:
// /**
// Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.
@ -101,7 +100,7 @@ open class SwiftSoup {
// public static Document parse(File in, String charsetName) throws IOException {
// return DataUtil.load(in, charsetName, in.getAbsolutePath());
// }
// /**
// Read an input stream, and parse it to a Document.
//
@ -116,7 +115,7 @@ open class SwiftSoup {
// public static Document parse(InputStream in, String charsetName, String baseUri) throws IOException {
// return DataUtil.load(in, charsetName, baseUri);
// }
// /**
// Read an input stream, and parse it to a Document. You can provide an alternate parser, such as a simple XML
// (non-HTML) parser.
@ -133,7 +132,7 @@ open class SwiftSoup {
// public static Document parse(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
// return DataUtil.load(in, charsetName, baseUri, parser);
// }
/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
@ -146,7 +145,7 @@ open class SwiftSoup {
public static func parseBodyFragment(_ bodyHtml: String, _ baseUri: String)throws->Document {
return try Parser.parseBodyFragment(bodyHtml, baseUri)
}
/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
@ -158,7 +157,7 @@ open class SwiftSoup {
public static func parseBodyFragment(_ bodyHtml: String)throws->Document {
return try Parser.parseBodyFragment(bodyHtml, "")
}
// /**
// Fetch a URL, and parse it as HTML. Provided for compatibility; in most cases use {@link #connect(String)} instead.
// <p>
@ -181,7 +180,7 @@ open class SwiftSoup {
// con.timeout(timeoutMillis);
// return con.get();
// }
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@ -199,7 +198,7 @@ open class SwiftSoup {
let clean: Document = try cleaner.clean(dirty)
return try clean.body()?.html()
}
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@ -213,7 +212,7 @@ open class SwiftSoup {
public static func clean(_ bodyHtml: String, _ whitelist: Whitelist)throws->String? {
return try SwiftSoup.clean(bodyHtml, "", whitelist)
}
/**
* Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of
* permitted
@ -233,6 +232,5 @@ open class SwiftSoup {
clean.outputSettings(outputSettings)
return try clean.body()?.html()
}
}

View File

@ -8,42 +8,41 @@
import Foundation
open class Tag : Hashable
{
open class Tag: Hashable {
// map of known tags
static var tags: Dictionary<String, Tag> = {
do{
do {
return try Tag.initializeMaps()
}catch{
} catch {
preconditionFailure("This method must be overridden")
}
return Dictionary<String, Tag>()
}()
fileprivate var _tagName : String
fileprivate var _isBlock : Bool = true // block or inline
fileprivate var _formatAsBlock : Bool = true // should be formatted as a block
fileprivate var _canContainBlock : Bool = true // Can this tag hold block level tags?
fileprivate var _canContainInline : Bool = true // only pcdata if not
fileprivate var _empty : Bool = false // can hold nothing e.g. img
fileprivate var _selfClosing : Bool = false // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
fileprivate var _preserveWhitespace : Bool = false // for pre, textarea, script etc
fileprivate var _formList : Bool = false // a control that appears in forms: input, textarea, output etc
fileprivate var _formSubmit : Bool = false // a control that can be submitted in a form: input etc
fileprivate var _tagName: String
fileprivate var _isBlock: Bool = true // block or inline
fileprivate var _formatAsBlock: Bool = true // should be formatted as a block
fileprivate var _canContainBlock: Bool = true // Can this tag hold block level tags?
fileprivate var _canContainInline: Bool = true // only pcdata if not
fileprivate var _empty: Bool = false // can hold nothing e.g. img
fileprivate var _selfClosing: Bool = false // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
fileprivate var _preserveWhitespace: Bool = false // for pre, textarea, script etc
fileprivate var _formList: Bool = false // a control that appears in forms: input, textarea, output etc
fileprivate var _formSubmit: Bool = false // a control that can be submitted in a form: input etc
public init(_ tagName: String) {
self._tagName = tagName
}
/**
* Get this tag's name.
*
* @return the tag's name
*/
open func getName()->String {
open func getName() -> String {
return self._tagName
}
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
@ -56,13 +55,13 @@ open class Tag : Hashable
*/
open static func valueOf(_ tagName: String, _ settings: ParseSettings)throws->Tag {
var tagName = tagName
var tag : Tag? = Tag.tags[tagName]
var tag: Tag? = Tag.tags[tagName]
if (tag == nil) {
tagName = settings.normalizeTag(tagName)
try Validate.notEmpty(string: tagName)
tag = Tag.tags[tagName]
if (tag == nil) {
// not defined: create default; go anywhere, do anything! (incl be inside a <p>)
tag = Tag(tagName)
@ -72,7 +71,7 @@ open class Tag : Hashable
}
return tag!
}
/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* <p>
@ -85,120 +84,120 @@ open class Tag : Hashable
open static func valueOf(_ tagName: String)throws->Tag {
return try valueOf(tagName, ParseSettings.preserveCase)
}
/**
* Gets if this is a block tag.
*
* @return if block tag
*/
open func isBlock()->Bool {
open func isBlock() -> Bool {
return _isBlock
}
/**
* Gets if this tag should be formatted as a block (or as inline)
*
* @return if should be formatted as block or inline
*/
open func formatAsBlock()->Bool {
open func formatAsBlock() -> Bool {
return _formatAsBlock
}
/**
* Gets if this tag can contain block tags.
*
* @return if tag can contain block tags
*/
open func canContainBlock()->Bool {
open func canContainBlock() -> Bool {
return _canContainBlock
}
/**
* Gets if this tag is an inline tag.
*
* @return if this tag is an inline tag.
*/
open func isInline()->Bool {
open func isInline() -> Bool {
return !_isBlock
}
/**
* Gets if this tag is a data only tag.
*
* @return if this tag is a data only tag
*/
open func isData()->Bool {
open func isData() -> Bool {
return !_canContainInline && !isEmpty()
}
/**
* Get if this is an empty tag
*
* @return if this is an empty tag
*/
open func isEmpty()->Bool {
open func isEmpty() -> Bool {
return _empty
}
/**
* Get if this tag is self closing.
*
* @return if this tag should be output as self closing.
*/
open func isSelfClosing()->Bool {
open func isSelfClosing() -> Bool {
return _empty || _selfClosing
}
/**
* Get if this is a pre-defined tag, or was auto created on parsing.
*
* @return if a known tag
*/
open func isKnownTag()->Bool {
open func isKnownTag() -> Bool {
return Tag.tags[_tagName] != nil
}
/**
* Check if this tagname is a known tag.
*
* @param tagName name of tag
* @return if known HTML tag
*/
open static func isKnownTag(_ tagName: String)->Bool {
open static func isKnownTag(_ tagName: String) -> Bool {
return Tag.tags[tagName] != nil
}
/**
* Get if this tag should preserve whitespace within child text nodes.
*
* @return if preserve whitepace
*/
public func preserveWhitespace()->Bool {
public func preserveWhitespace() -> Bool {
return _preserveWhitespace
}
/**
* Get if this tag represents a control associated with a form. E.g. input, textarea, output
* @return if associated with a form
*/
public func isFormListed()->Bool {
public func isFormListed() -> Bool {
return _formList
}
/**
* Get if this tag represents an element that should be submitted with a form. E.g. input, option
* @return if submittable with a form
*/
public func isFormSubmittable()->Bool {
public func isFormSubmittable() -> Bool {
return _formSubmit
}
@discardableResult
func setSelfClosing() ->Tag{
func setSelfClosing() -> Tag {
_selfClosing = true
return self
}
/// Returns a Boolean value indicating whether two values are equal.
///
/// Equality is the inverse of inequality. For any values `a` and `b`,
@ -207,15 +206,14 @@ open class Tag : Hashable
/// - Parameters:
/// - lhs: A value to compare.
/// - rhs: Another value to compare.
static public func ==(lhs: Tag, rhs: Tag) -> Bool
{
static public func ==(lhs: Tag, rhs: Tag) -> Bool {
let this = lhs
let o = rhs
if (this === o) {return true}
if (type(of:this) != type(of:o)) {return false}
let tag : Tag = o
let tag: Tag = o
if (lhs._tagName != tag._tagName) {return false}
if (lhs._canContainBlock != tag._canContainBlock) {return false}
if (lhs._canContainInline != tag._canContainInline) {return false}
@ -227,19 +225,17 @@ open class Tag : Hashable
if (lhs._formList != tag._formList) {return false}
return lhs._formSubmit == tag._formSubmit
}
public func equals(_ tag : Tag)->Bool
{
public func equals(_ tag: Tag) -> Bool {
return self == tag
}
/// The hash value.
///
/// Hash values are not guaranteed to be equal across different executions of
/// your program. Do not save hash values to use during a future execution.
public var hashValue: Int
{
var result : Int = _tagName.hashValue
public var hashValue: Int {
var result: Int = _tagName.hashValue
result = Int.addWithOverflow(Int.multiplyWithOverflow(31, result).0, _isBlock ? 1 : 0).0
result = Int.addWithOverflow(Int.multiplyWithOverflow(31, result).0, _formatAsBlock ? 1 : 0).0
result = Int.addWithOverflow(Int.multiplyWithOverflow(31, result).0, _canContainBlock ? 1 : 0).0
@ -251,15 +247,14 @@ open class Tag : Hashable
result = Int.addWithOverflow(Int.multiplyWithOverflow(31, result).0, _formSubmit ? 1 : 0).0
return result
}
open func toString()->String {
open func toString() -> String {
return _tagName
}
// internal static initialisers:
// prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
private static let blockTags : [String] = [
private static let blockTags: [String] = [
"html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
"noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
@ -267,7 +262,7 @@ open class Tag : Hashable
"td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
"svg", "math"
]
private static let inlineTags : [String] = [
private static let inlineTags: [String] = [
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
@ -275,31 +270,29 @@ open class Tag : Hashable
"summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
"data", "bdi"
]
private static let emptyTags : [String] = [
private static let emptyTags: [String] = [
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
"device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
]
private static let formatAsInlineTags : [String] = [
private static let formatAsInlineTags: [String] = [
"title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
"ins", "del", "s"
]
private static let preserveWhitespaceTags : [String] = [
private static let preserveWhitespaceTags: [String] = [
"pre", "plaintext", "title", "textarea"
// script is not here as it is a data node, which always preserve whitespace
]
// todo: I think we just need submit tags, and can scrub listed
private static let formListedTags : [String] = [
private static let formListedTags: [String] = [
"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
]
private static let formSubmitTags : [String] = [
private static let formSubmitTags: [String] = [
"input", "keygen", "object", "select", "textarea"
]
static private func initializeMaps()throws->Dictionary<String, Tag>
{
static private func initializeMaps()throws->Dictionary<String, Tag> {
var dict = Dictionary<String, Tag>()
// creates
for tagName in blockTags {
let tag = Tag(tagName)
@ -312,7 +305,7 @@ open class Tag : Hashable
tag._formatAsBlock = false
dict[tag._tagName] = tag
}
// mods:
for tagName in emptyTags {
let tag = dict[tagName]
@ -321,25 +314,25 @@ open class Tag : Hashable
tag?._canContainInline = false
tag?._empty = true
}
for tagName in formatAsInlineTags {
let tag = dict[tagName]
try Validate.notNull(obj: tag)
tag?._formatAsBlock = false
}
for tagName in preserveWhitespaceTags {
let tag = dict[tagName]
try Validate.notNull(obj: tag)
tag?._preserveWhitespace = true
}
for tagName in formListedTags {
let tag = dict[tagName]
try Validate.notNull(obj: tag)
tag?._formList = true
}
for tagName in formSubmitTags {
let tag = dict[tagName]
try Validate.notNull(obj: tag)
@ -348,9 +341,3 @@ open class Tag : Hashable
return dict
}
}

View File

@ -11,15 +11,15 @@ import Foundation
/**
A text node.
*/
open class TextNode : Node {
open class TextNode: Node {
/*
TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use
memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create
them as needed on the fly.
*/
private static let TEXT_KEY : String = "text"
var _text : String
private static let TEXT_KEY: String = "text"
var _text: String
/**
Create a new TextNode representing the supplied (unencoded) text).
@ -31,57 +31,57 @@ open class TextNode : Node {
self._text = text
super.init()
self.baseUri = baseUri
}
open override func nodeName()->String {
open override func nodeName() -> String {
return "#text"
}
/**
* Get the text content of this text node.
* @return Unencoded, normalised text.
* @see TextNode#getWholeText()
*/
open func text()->String {
open func text() -> String {
return TextNode.normaliseWhitespace(getWholeText())
}
/**
* Set the text content of this text node.
* @param text unencoded text
* @return this, for chaining
*/
@discardableResult
public func text(_ text: String)->TextNode {
public func text(_ text: String) -> TextNode {
self._text = text
guard let attributes = attributes else {
return self
}
do{
do {
try attributes.put(TextNode.TEXT_KEY, text)
}catch{
} catch {
}
return self
}
/**
Get the (unencoded) text of this text node, including any newlines and spaces present in the original.
@return text
*/
open func getWholeText()->String {
open func getWholeText() -> String {
return attributes == nil ? _text : attributes!.get(key: TextNode.TEXT_KEY)
}
/**
Test if this text node is blank -- that is, empty or only whitespace (including newlines).
@return true if this document is empty or only whitespace, false if it contains any text content.
*/
open func isBlank()->Bool {
open func isBlank() -> Bool {
return StringUtil.isBlank(getWholeText())
}
/**
* Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
* original text up to the offset, and will have a new text node sibling containing the text after the offset.
@ -91,39 +91,37 @@ open class TextNode : Node {
open func splitText(_ offset: Int)throws->TextNode {
try Validate.isTrue(val: offset >= 0, msg: "Split offset must be not be negative")
try Validate.isTrue(val: offset < _text.characters.count, msg: "Split offset must not be greater than current text length")
let head : String = getWholeText().substring(0, offset)
let tail : String = getWholeText().substring(offset)
let head: String = getWholeText().substring(0, offset)
let tail: String = getWholeText().substring(offset)
text(head)
let tailNode : TextNode = TextNode(tail, self.getBaseUri())
if (parent() != nil){
let tailNode: TextNode = TextNode(tail, self.getBaseUri())
if (parent() != nil) {
try parent()?.addChildren(siblingIndex+1, tailNode)
}
return tailNode
}
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws
{
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
if (out.prettyPrint() &&
((siblingIndex == 0 && (parentNode as? Element) != nil && (parentNode as! Element).tag().formatAsBlock() && !isBlank()) ||
(out.outline() && siblingNodes().count > 0 && !isBlank()) )){
(out.outline() && siblingNodes().count > 0 && !isBlank()) )) {
indent(accum, depth, out)
}
let par : Element? = parent() as? Element
let par: Element? = parent() as? Element
let normaliseWhite = out.prettyPrint() && par != nil && !Element.preserveWhitespace(par!)
Entities.escape(accum, getWholeText(), out, false, normaliseWhite, false)
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
}
open override func toString()throws->String {
return try outerHtml()
}
/**
* Create a new TextNode from HTML encoded (aka escaped) data.
* @param encodedText Text containing encoded HTML (e.g. &amp;lt;)
@ -131,82 +129,75 @@ open class TextNode : Node {
* @return TextNode containing unencoded data (e.g. &lt;)
*/
open static func createFromEncoded(_ encodedText: String, _ baseUri: String)throws->TextNode {
let text : String = try Entities.unescape(encodedText)
let text: String = try Entities.unescape(encodedText)
return TextNode(text, baseUri)
}
static open func normaliseWhitespace(_ text: String)->String {
static open func normaliseWhitespace(_ text: String) -> String {
let _text = StringUtil.normaliseWhitespace(text)
return _text
}
static open func stripLeadingWhitespace(_ text: String)->String {
static open func stripLeadingWhitespace(_ text: String) -> String {
return text.replaceFirst(of: "^\\s+", with: "")
//return text.replaceFirst("^\\s+", "")
}
static open func lastCharIsWhitespace(_ sb: StringBuilder)->Bool {
static open func lastCharIsWhitespace(_ sb: StringBuilder) -> Bool {
return sb.toString().characters.last == " "
}
// attribute fiddling. create on first access.
private func ensureAttributes() {
if (attributes == nil) {
attributes = Attributes()
do{
do {
try attributes?.put(TextNode.TEXT_KEY, _text)
}catch{}
} catch {}
}
}
open override func attr(_ attributeKey: String)throws->String {
ensureAttributes()
return try super.attr(attributeKey)
}
open override func getAttributes()->Attributes {
open override func getAttributes() -> Attributes {
ensureAttributes()
return super.getAttributes()!
}
open override func attr(_ attributeKey: String, _ attributeValue: String)throws->Node {
ensureAttributes()
return try super.attr(attributeKey, attributeValue)
}
open override func hasAttr(_ attributeKey: String)->Bool {
open override func hasAttr(_ attributeKey: String) -> Bool {
ensureAttributes()
return super.hasAttr(attributeKey)
}
open override func removeAttr(_ attributeKey: String)throws->Node {
ensureAttributes()
return try super.removeAttr(attributeKey)
}
open override func absUrl(_ attributeKey: String)throws->String {
ensureAttributes()
return try super.absUrl(attributeKey)
}
public override func copy(with zone: NSZone? = nil) -> Any
{
let clone = TextNode(_text,baseUri)
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = TextNode(_text, baseUri)
return super.copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
let clone = TextNode(_text,baseUri)
return super.copy(clone: clone,parent: parent)
public override func copy(parent: Node?) -> Node {
let clone = TextNode(_text, baseUri)
return super.copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
return super.copy(clone: clone,parent: parent)
public override func copy(clone: Node, parent: Node?) -> Node {
return super.copy(clone: clone, parent: parent)
}
}

View File

@ -8,91 +8,88 @@
import Foundation
open class Token
{
var type : TokenType = TokenType.Doctype
open class Token {
var type: TokenType = TokenType.Doctype
private init() {
}
func tokenType()->String
{
func tokenType() -> String {
return String(describing: type(of: self))
}
/**
* Reset the data represent by this token, for reuse. Prevents the need to create transfer objects for every
* piece of data, which immediately get GCed.
*/
@discardableResult
public func reset()->Token
{
public func reset() -> Token {
preconditionFailure("This method must be overridden")
}
static func reset(_ sb: StringBuilder) {
sb.clear()
}
open func toString()throws->String {
return String(describing: type(of: self))
}
final class Doctype : Token {
final class Doctype: Token {
let name: StringBuilder = StringBuilder()
let publicIdentifier: StringBuilder = StringBuilder()
let systemIdentifier : StringBuilder = StringBuilder()
var forceQuirks : Bool = false
let systemIdentifier: StringBuilder = StringBuilder()
var forceQuirks: Bool = false
override init() {
super.init()
type = TokenType.Doctype
}
@discardableResult
override func reset()->Token {
override func reset() -> Token {
Token.reset(name)
Token.reset(publicIdentifier)
Token.reset(systemIdentifier)
forceQuirks = false
return self
}
func getName()->String {
func getName() -> String {
return name.toString()
}
func getPublicIdentifier()->String {
func getPublicIdentifier() -> String {
return publicIdentifier.toString()
}
open func getSystemIdentifier()->String {
open func getSystemIdentifier() -> String {
return systemIdentifier.toString()
}
open func isForceQuirks()->Bool {
open func isForceQuirks() -> Bool {
return forceQuirks
}
}
class Tag : Token {
class Tag: Token {
public var _tagName: String?
public var _normalName: String? // lc version of tag name, for case insensitive tree build
private var _pendingAttributeName: String? // attribute names are generally caught in one hop, not accumulated
private let _pendingAttributeValue : StringBuilder = StringBuilder() // but values are accumulated, from e.g. & in hrefs
private let _pendingAttributeValue: StringBuilder = StringBuilder() // but values are accumulated, from e.g. & in hrefs
private var _pendingAttributeValueS: String? // try to get attr vals in one shot, vs Builder
private var _hasEmptyAttributeValue : Bool = false // distinguish boolean attribute from empty string value
private var _hasEmptyAttributeValue: Bool = false // distinguish boolean attribute from empty string value
private var _hasPendingAttributeValue: Bool = false
public var _selfClosing : Bool = false
public var _selfClosing: Bool = false
// start tags get attributes on construction. End tags get attributes on first new attribute (but only for parser convenience, not used).
public var _attributes : Attributes = Attributes()
public var _attributes: Attributes = Attributes()
override init() {
super.init()
}
@discardableResult
override func reset()->Tag {
override func reset() -> Tag {
_tagName = nil
_normalName = nil
_pendingAttributeName = nil
@ -104,19 +101,19 @@ open class Token
_attributes = Attributes()
return self
}
func newAttribute()throws {
// if (_attributes == nil){
// _attributes = Attributes()
// }
if (_pendingAttributeName != nil) {
var attribute : Attribute
if (_hasPendingAttributeValue){
attribute = try Attribute(key: _pendingAttributeName!,value: _pendingAttributeValue.length > 0 ? _pendingAttributeValue.toString() : _pendingAttributeValueS!)
}else if (_hasEmptyAttributeValue){
var attribute: Attribute
if (_hasPendingAttributeValue) {
attribute = try Attribute(key: _pendingAttributeName!, value: _pendingAttributeValue.length > 0 ? _pendingAttributeValue.toString() : _pendingAttributeValueS!)
} else if (_hasEmptyAttributeValue) {
attribute = try Attribute(key: _pendingAttributeName!, value: "")
}else{
} else {
attribute = try BooleanAttribute(key: _pendingAttributeName!)
}
_attributes.put(attribute: attribute)
@ -127,7 +124,7 @@ open class Token
Token.reset(_pendingAttributeValue)
_pendingAttributeValueS = nil
}
func finaliseTag()throws {
// finalises for emit
if (_pendingAttributeName != nil) {
@ -135,49 +132,49 @@ open class Token
try newAttribute()
}
}
func name()throws->String { // preserves case, for input into Tag.valueOf (which may drop case)
try Validate.isFalse(val: _tagName == nil || _tagName!.unicodeScalars.count == 0)
return _tagName!
}
func normalName()->String? { // loses case, used in tree building for working out where in tree it should go
func normalName() -> String? { // loses case, used in tree building for working out where in tree it should go
return _normalName
}
@discardableResult
func name(_ name: String)->Tag {
func name(_ name: String) -> Tag {
_tagName = name
_normalName = name.lowercased()
return self
}
func isSelfClosing()->Bool {
func isSelfClosing() -> Bool {
return _selfClosing
}
func getAttributes()->Attributes {
func getAttributes() -> Attributes {
return _attributes
}
// these appenders are rarely hit in not null state-- caused by null chars.
func appendTagName(_ append: String) {
_tagName = _tagName == nil ? append : _tagName!.appending(append)
_normalName = _tagName?.lowercased()
}
func appendTagName(_ append : UnicodeScalar) {
func appendTagName(_ append: UnicodeScalar) {
appendTagName("\(append)")
}
func appendAttributeName(_ append: String) {
_pendingAttributeName = _pendingAttributeName == nil ? append : _pendingAttributeName?.appending(append)
}
func appendAttributeName(_ append: UnicodeScalar) {
appendAttributeName("\(append)")
}
func appendAttributeValue(_ append: String) {
ensureAttributeValue()
if (_pendingAttributeValue.length == 0) {
@ -186,28 +183,28 @@ open class Token
_pendingAttributeValue.append(append)
}
}
func appendAttributeValue(_ append: UnicodeScalar) {
ensureAttributeValue()
_pendingAttributeValue.appendCodePoint(append)
}
func appendAttributeValue(_ append: [UnicodeScalar]) {
ensureAttributeValue()
_pendingAttributeValue.appendCodePoints(append)
}
func appendAttributeValue(_ appendCodepoints: [Int]) {
ensureAttributeValue()
for codepoint in appendCodepoints {
_pendingAttributeValue.appendCodePoint(UnicodeScalar(codepoint)!)
}
}
func setEmptyAttributeValue() {
_hasEmptyAttributeValue = true
}
private func ensureAttributeValue() {
_hasPendingAttributeValue = true
// if on second hit, we'll need to move to the builder
@ -217,165 +214,161 @@ open class Token
}
}
}
final class StartTag : Tag {
final class StartTag: Tag {
override init() {
super.init()
_attributes = Attributes()
type = TokenType.StartTag
}
@discardableResult
override func reset()->Tag {
override func reset() -> Tag {
super.reset()
_attributes = Attributes()
// todo - would prefer these to be null, but need to check Element assertions
return self
}
@discardableResult
func nameAttr(_ name: String, _ attributes: Attributes)->StartTag {
func nameAttr(_ name: String, _ attributes: Attributes) -> StartTag {
self._tagName = name
self._attributes = attributes
_normalName = _tagName?.lowercased()
return self
}
open override func toString()throws->String {
if (_attributes.size() > 0){
if (_attributes.size() > 0) {
return try "<" + (name()) + " " + (_attributes.toString()) + ">"
}else{
} else {
return try "<" + name() + ">"
}
}
}
final class EndTag : Tag{
final class EndTag: Tag {
override init() {
super.init()
type = TokenType.EndTag
}
open override func toString()throws->String {
return "</" + (try name()) + ">"
}
}
final class Comment : Token {
let data : StringBuilder = StringBuilder()
var bogus : Bool = false
final class Comment: Token {
let data: StringBuilder = StringBuilder()
var bogus: Bool = false
@discardableResult
override func reset()->Token {
override func reset() -> Token {
Token.reset(data)
bogus = false
return self
}
override init() {
super.init()
type = TokenType.Comment
}
func getData()->String {
func getData() -> String {
return data.toString()
}
open override func toString()throws->String {
return "<!--" + getData() + "-->"
}
}
final class Char : Token {
public var data : String?
final class Char: Token {
public var data: String?
override init() {
super.init()
type = TokenType.Char
}
@discardableResult
override func reset()->Token {
override func reset() -> Token {
data = nil
return self
}
@discardableResult
func data(_ data: String)->Char {
func data(_ data: String) -> Char {
self.data = data
return self
}
func getData()->String? {
func getData() -> String? {
return data
}
open override func toString()throws->String {
try Validate.notNull(obj: data)
return getData()!
}
}
final class EOF : Token {
final class EOF: Token {
override init() {
super.init()
type = Token.TokenType.EOF
}
@discardableResult
override func reset()->Token {
override func reset() -> Token {
return self
}
}
func isDoctype()->Bool {
func isDoctype() -> Bool {
return type == TokenType.Doctype
}
func asDoctype()->Doctype {
func asDoctype() -> Doctype {
return self as! Doctype
}
func isStartTag()->Bool {
func isStartTag() -> Bool {
return type == TokenType.StartTag
}
func asStartTag()->StartTag {
func asStartTag() -> StartTag {
return self as! StartTag
}
func isEndTag()->Bool {
func isEndTag() -> Bool {
return type == TokenType.EndTag
}
func asEndTag()->EndTag {
func asEndTag() -> EndTag {
return self as! EndTag
}
func isComment()->Bool {
func isComment() -> Bool {
return type == TokenType.Comment
}
func asComment()->Comment {
func asComment() -> Comment {
return self as! Comment
}
func isCharacter()->Bool {
func isCharacter() -> Bool {
return type == TokenType.Char
}
func asCharacter()->Char {
func asCharacter() -> Char {
return self as! Char
}
func isEOF()->Bool {
func isEOF() -> Bool {
return type == TokenType.EOF
}
public enum TokenType {
case Doctype
case StartTag

View File

@ -8,13 +8,12 @@
import Foundation
open class TokenQueue
{
private var queue : String
private var pos : Int = 0
private static let ESC : Character = "\\" // escape char for chomp balanced.
open class TokenQueue {
private var queue: String
private var pos: Int = 0
private static let ESC: Character = "\\" // escape char for chomp balanced.
/**
Create a new TokenQueue.
@param data string of data to back queue.
@ -22,27 +21,27 @@ open class TokenQueue
public init (_ data: String) {
queue = data
}
/**
* Is the queue empty?
* @return true if no data left in queue.
*/
open func isEmpty()->Bool {
open func isEmpty() -> Bool {
return remainingLength() == 0
}
private func remainingLength()->Int {
private func remainingLength() -> Int {
return queue.characters.count - pos
}
/**
* Retrieves but does not remove the first character from the queue.
* @return First character, or 0 if empty.
*/
open func peek()-> Character {
open func peek() -> Character {
return isEmpty() ? Character(UnicodeScalar(0)) : queue[pos]
}
/**
Add a character to the start of the queue (will be the next character retrieved).
@param c character to add
@ -50,7 +49,7 @@ open class TokenQueue
open func addFirst(_ c: Character) {
addFirst(String(c))
}
/**
Add a string to the start of the queue.
@param seq string to add.
@ -60,60 +59,60 @@ open class TokenQueue
queue = seq + queue.substring(pos)
pos = 0
}
/**
* Tests if the next characters on the queue match the sequence. Case insensitive.
* @param seq String to check queue for.
* @return true if the next characters match.
*/
open func matches(_ seq: String)->Bool {
open func matches(_ seq: String) -> Bool {
return queue.regionMatches(true, pos, seq, 0, seq.characters.count)
}
/**
* Case sensitive match test.
* @param seq string to case sensitively check for
* @return true if matched, false if not
*/
open func matchesCS(_ seq: String)->Bool {
open func matchesCS(_ seq: String) -> Bool {
return queue.startsWith(seq, pos)
}
/**
Tests if the next characters match any of the sequences. Case insensitive.
@param seq list of strings to case insensitively check for
@return true of any matched, false if none did
*/
open func matchesAny(_ seq:[String])->Bool {
open func matchesAny(_ seq: [String]) -> Bool {
for s in seq {
if (matches(s)){
if (matches(s)) {
return true
}
}
return false
}
open func matchesAny(_ seq: String...)->Bool {
open func matchesAny(_ seq: String...) -> Bool {
return matchesAny(seq)
}
open func matchesAny(_ seq: Character...)->Bool {
if (isEmpty()){
open func matchesAny(_ seq: Character...) -> Bool {
if (isEmpty()) {
return false
}
for c in seq {
if (queue[pos] as Character == c){
if (queue[pos] as Character == c) {
return true
}
}
return false
}
open func matchesStartTag()->Bool {
open func matchesStartTag() -> Bool {
// micro opt for matching "<x"
return (remainingLength() >= 2 && queue[pos] as Character == "<" && Character.isLetter(queue.charAt(pos+1)))
}
/**
* Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
* queue.
@ -121,7 +120,7 @@ open class TokenQueue
* @return true if found and removed, false if not found.
*/
@discardableResult
open func matchChomp(_ seq: String)->Bool {
open func matchChomp(_ seq: String) -> Bool {
if (matches(seq)) {
pos += seq.characters.count
return true
@ -129,41 +128,41 @@ open class TokenQueue
return false
}
}
/**
Tests if queue starts with a whitespace character.
@return if starts with whitespace
*/
open func matchesWhitespace()->Bool {
open func matchesWhitespace() -> Bool {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos))
}
/**
Test if the queue matches a word character (letter or digit).
@return if matches a word character
*/
open func matchesWord()->Bool {
open func matchesWord() -> Bool {
return !isEmpty() && (Character.isLetterOrDigit(queue.charAt(pos)))
}
/**
* Drops the next character off the queue.
*/
open func advance() {
if (!isEmpty()) {pos+=1}
}
/**
* Consume one character off queue.
* @return first character on queue.
*/
open func consume()->Character {
open func consume() -> Character {
let i = pos
pos+=1
return queue.charAt(i)
}
/**
* Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
* throw an illegal state exception -- but you should be running match() against that condition.
@ -172,26 +171,26 @@ open class TokenQueue
* @param seq sequence to remove from head of queue.
*/
open func consume(_ seq: String)throws {
if (!matches(seq)){
if (!matches(seq)) {
//throw new IllegalStateException("Queue did not match expected sequence")
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Queue did not match expected sequence")
}
let len = seq.characters.count
if (len > remainingLength()){
if (len > remainingLength()) {
//throw new IllegalStateException("Queue not long enough to consume sequence")
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Queue not long enough to consume sequence")
}
pos += len
}
/**
* Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
* @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
@discardableResult
open func consumeTo(_ seq: String)->String {
open func consumeTo(_ seq: String) -> String {
let offset = queue.indexOf(seq, pos)
if (offset != -1) {
let consumed = queue.substring(pos, offset-pos)
@ -202,33 +201,32 @@ open class TokenQueue
}
return ""
}
open func consumeToIgnoreCase(_ seq: String)->String {
open func consumeToIgnoreCase(_ seq: String) -> String {
let start = pos
let first = seq.substring(0, 1)
let canScan = first.lowercased() == first.uppercased() // if first is not cased, use index of
while (!isEmpty()) {
if (matches(seq)){
if (matches(seq)) {
break
}
if (canScan)
{
if (canScan) {
let skip = queue.indexOf(first, pos) - pos
if (skip == 0){ // this char is the skip char, but not match, so force advance of pos
if (skip == 0) { // this char is the skip char, but not match, so force advance of pos
pos+=1
}else if (skip < 0){ // no chance of finding, grab to end
} else if (skip < 0) { // no chance of finding, grab to end
pos = queue.characters.count
}else{
} else {
pos += skip
}
} else{
} else {
pos+=1
}
}
return queue.substring(start, pos-start)
}
/**
Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
@param seq any number of terminators to consume to. <b>Case insensitive.</b>
@ -236,15 +234,15 @@ open class TokenQueue
*/
// todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
// is is a case sensitive time...
open func consumeToAny(_ seq: String...)->String {
open func consumeToAny(_ seq: String...) -> String {
return consumeToAny(seq)
}
open func consumeToAny(_ seq: [String])->String {
open func consumeToAny(_ seq: [String]) -> String {
let start = pos
while (!isEmpty() && !matchesAny(seq)) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
@ -255,18 +253,18 @@ open class TokenQueue
* @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
open func chompTo(_ seq: String)->String {
open func chompTo(_ seq: String) -> String {
let data = consumeTo(seq)
matchChomp(seq)
return data
}
open func chompToIgnoreCase(_ seq: String)->String {
open func chompToIgnoreCase(_ seq: String) -> String {
let data = consumeToIgnoreCase(seq) // case insensitive scan
matchChomp(seq)
return data
}
/**
* Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
* and leave " four" on the queue. Unbalanced openers and closers can quoted (with ' or ") or escaped (with \). Those escapes will be left
@ -276,71 +274,68 @@ open class TokenQueue
* @param close closer
* @return data matched from the queue
*/
open func chompBalanced(_ open:Character, _ close: Character)->String {
open func chompBalanced(_ open: Character, _ close: Character) -> String {
var start = -1
var end = -1
var depth = 0
var last : Character = Character(UnicodeScalar(0))
var last: Character = Character(UnicodeScalar(0))
var inQuote = false
repeat {
if (isEmpty()){break}
if (isEmpty()) {break}
let c = consume()
if (last.unicodeScalar.value == 0 || last != TokenQueue.ESC) {
if ((c=="'" || c=="\"") && c != open){
if ((c=="'" || c=="\"") && c != open) {
inQuote = !inQuote
}
if (inQuote){
if (inQuote) {
continue
}
if (c==open) {
depth+=1
if (start == -1){
if (start == -1) {
start = pos
}
}
else if (c==close){
} else if (c==close) {
depth-=1
}
}
if (depth > 0 && last.unicodeScalar.value != 0){
if (depth > 0 && last.unicodeScalar.value != 0) {
end = pos // don't include the outer match pair in the return
}
last = c
} while (depth > 0)
return (end >= 0) ? queue.substring(start, end-start) : ""
}
/**
* Unescaped a \ escaped string.
* @param in backslash escaped string
* @return unescaped string
*/
open static func unescape(_ input: String)->String {
open static func unescape(_ input: String) -> String {
let out = StringBuilder()
var last = Character(UnicodeScalar(0))
for c in input.characters
{
for c in input.characters {
if (c == ESC) {
if (last.unicodeScalar.value != 0 && last == TokenQueue.ESC){
if (last.unicodeScalar.value != 0 && last == TokenQueue.ESC) {
out.append(c)
}
}
else{
} else {
out.append(c)
}
last = c
}
return out.toString()
}
/**
* Pulls the next run of whitespace characters of the queue.
* @return Whether consuming whitespace or not
*/
@discardableResult
open func consumeWhitespace()->Bool {
open func consumeWhitespace() -> Bool {
var seen = false
while (matchesWhitespace()) {
pos+=1
@ -348,87 +343,86 @@ open class TokenQueue
}
return seen
}
/**
* Retrieves the next run of word type (letter or digit) off the queue.
* @return String of word characters from queue, or empty string if none.
*/
@discardableResult
open func consumeWord()->String {
open func consumeWord() -> String {
let start = pos
while (matchesWord()){
while (matchesWord()) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
* Consume an tag name off the queue (word or :, _, -)
*
* @return tag name
*/
open func consumeTagName()->String {
open func consumeTagName() -> String {
let start = pos
while (!isEmpty() && (matchesWord() || matchesAny(":", "_", "-"))){
while (!isEmpty() && (matchesWord() || matchesAny(":", "_", "-"))) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
* Consume a CSS element selector (tag name, but | instead of : for namespaces (or *| for wildcard namespace), to not conflict with :pseudo selects).
*
* @return tag name
*/
open func consumeElementSelector()->String {
open func consumeElementSelector() -> String {
let start = pos
while (!isEmpty() && (matchesWord() || matchesAny("*|","|", "_", "-"))){
while (!isEmpty() && (matchesWord() || matchesAny("*|", "|", "_", "-"))) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
@return identifier
*/
open func consumeCssIdentifier()->String {
open func consumeCssIdentifier() -> String {
let start = pos
while (!isEmpty() && (matchesWord() || matchesAny("-", "_"))){
while (!isEmpty() && (matchesWord() || matchesAny("-", "_"))) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
Consume an attribute key off the queue (letter, digit, -, _, :")
@return attribute key
*/
open func consumeAttributeKey()->String {
open func consumeAttributeKey() -> String {
let start = pos
while (!isEmpty() && (matchesWord() || matchesAny("-", "_", ":"))){
while (!isEmpty() && (matchesWord() || matchesAny("-", "_", ":"))) {
pos+=1
}
return queue.substring(start, pos-start)
}
/**
Consume and return whatever is left on the queue.
@return remained of queue.
*/
open func remainder()->String {
open func remainder() -> String {
let remainder = queue.substring(pos, queue.characters.count-pos)
pos = queue.characters.count
return remainder
}
open func toString()->String {
open func toString() -> String {
return queue.substring(pos)
}
}

View File

@ -8,46 +8,44 @@
import Foundation
final class Tokeniser
{
static let replacementChar : UnicodeScalar = "\u{FFFD}" // replaces null character
private static let notCharRefCharsSorted : [UnicodeScalar] = ["\t", "\n", "\r",UnicodeScalar.BackslashF, " ", "<", "&"].sorted()
private let reader : CharacterReader // html input
private let errors : ParseErrorList? // errors found while tokenising
final class Tokeniser {
static let replacementChar: UnicodeScalar = "\u{FFFD}" // replaces null character
private static let notCharRefCharsSorted: [UnicodeScalar] = ["\t", "\n", "\r", UnicodeScalar.BackslashF, " ", "<", "&"].sorted()
private let reader: CharacterReader // html input
private let errors: ParseErrorList? // errors found while tokenising
private var state: TokeniserState = TokeniserState.Data // current tokenisation state
private var emitPending: Token? // the token we are about to emit on next read
private var isEmitPending : Bool = false
private var charsString : String? = nil // characters pending an emit. Will fall to charsBuilder if more than one
private let charsBuilder : StringBuilder = StringBuilder(1024) // buffers characters to output as one token, if more than one emit per read
let dataBuffer : StringBuilder = StringBuilder(1024) // buffers data looking for </script>
var tagPending : Token.Tag = Token.Tag() // tag we are building up
let startPending : Token.StartTag = Token.StartTag()
private var isEmitPending: Bool = false
private var charsString: String? = nil // characters pending an emit. Will fall to charsBuilder if more than one
private let charsBuilder: StringBuilder = StringBuilder(1024) // buffers characters to output as one token, if more than one emit per read
let dataBuffer: StringBuilder = StringBuilder(1024) // buffers data looking for </script>
var tagPending: Token.Tag = Token.Tag() // tag we are building up
let startPending: Token.StartTag = Token.StartTag()
let endPending: Token.EndTag = Token.EndTag()
let charPending: Token.Char = Token.Char()
let doctypePending: Token.Doctype = Token.Doctype() // doctype building up
let commentPending: Token.Comment = Token.Comment() // comment building up
private var lastStartTag: String? // the last start tag emitted, to test appropriate end tag
private var selfClosingFlagAcknowledged: Bool = true
init(_ reader: CharacterReader, _ errors: ParseErrorList?) {
self.reader = reader
self.errors = errors
}
func read()throws->Token {
if (!selfClosingFlagAcknowledged) {
error("Self closing flag not acknowledged")
selfClosingFlagAcknowledged = true
}
while (!isEmitPending){
while (!isEmitPending) {
try state.read(self, reader)
}
// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
if (charsBuilder.length > 0) {
let str: String = charsBuilder.toString()
@ -55,7 +53,7 @@ final class Tokeniser
charsString = nil
return charPending.data(str)
} else if (charsString != nil) {
let token : Token = charPending.data(charsString!)
let token: Token = charPending.data(charsString!)
charsString = nil
return token
} else {
@ -63,84 +61,83 @@ final class Tokeniser
return emitPending!
}
}
func emit(_ token: Token)throws {
try Validate.isFalse(val: isEmitPending, msg: "There is an unread token pending!")
emitPending = token
isEmitPending = true
if (token.type == Token.TokenType.StartTag) {
let startTag : Token.StartTag = token as! Token.StartTag
let startTag: Token.StartTag = token as! Token.StartTag
lastStartTag = startTag._tagName!
if (startTag._selfClosing){
if (startTag._selfClosing) {
selfClosingFlagAcknowledged = false
}
} else if (token.type == Token.TokenType.EndTag) {
let endTag : Token.EndTag = token as! Token.EndTag
if (endTag._attributes.size() != 0){
let endTag: Token.EndTag = token as! Token.EndTag
if (endTag._attributes.size() != 0) {
error("Attributes incorrectly present on end tag")
}
}
}
func emit(_ str: String ) {
// buffer strings up until last string token found, to emit only one token for a run of character refs etc.
// does not set isEmitPending; read checks that
if (charsString == nil) {
charsString = str
}
else {
} else {
if (charsBuilder.length == 0) { // switching to string builder as more than one emit before read
charsBuilder.append(charsString!)
}
charsBuilder.append(str)
}
}
func emit(_ chars: [UnicodeScalar]) {
emit(String(chars.map{Character($0)}))
emit(String(chars.map {Character($0)}))
}
// func emit(_ codepoints: [Int]) {
// emit(String(codepoints, 0, codepoints.length));
// }
func emit(_ c: UnicodeScalar) {
emit(String(c))
}
func getState()->TokeniserState {
func getState() -> TokeniserState {
return state
}
func transition(_ state: TokeniserState) {
self.state = state
}
func advanceTransition(_ state: TokeniserState) {
reader.advance()
self.state = state
}
func acknowledgeSelfClosingFlag() {
selfClosingFlagAcknowledged = true
}
private var codepointHolder: [UnicodeScalar] = [UnicodeScalar(0)!] // holder to not have to keep creating arrays
private var multipointHolder: [UnicodeScalar] = [UnicodeScalar(0)!,UnicodeScalar(0)!]
private var multipointHolder: [UnicodeScalar] = [UnicodeScalar(0)!, UnicodeScalar(0)!]
func consumeCharacterReference(_ additionalAllowedCharacter: UnicodeScalar?, _ inAttribute: Bool)throws->[UnicodeScalar]? {
if (reader.isEmpty()){
if (reader.isEmpty()) {
return nil
}
if (additionalAllowedCharacter != nil && additionalAllowedCharacter == reader.current()){
if (additionalAllowedCharacter != nil && additionalAllowedCharacter == reader.current()) {
return nil
}
if (reader.matchesAnySorted(Tokeniser.notCharRefCharsSorted)){
if (reader.matchesAnySorted(Tokeniser.notCharRefCharsSorted)) {
return nil
}
var codeRef: [UnicodeScalar] = codepointHolder
reader.markPos()
if (reader.matchConsume("#")) { // numbered
@ -151,18 +148,16 @@ final class Tokeniser
reader.rewindToMark()
return nil
}
if (!reader.matchConsume(";")){
if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon") // missing semi
}
var charval : Int = -1
var charval: Int = -1
let base: Int = isHexMode ? 16 : 10
if let num = Int(numRef,radix: base)
{
if let num = Int(numRef, radix: base) {
charval = num
}
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
characterReferenceError("character outside of valid range")
codeRef[0] = Tokeniser.replacementChar
@ -175,14 +170,14 @@ final class Tokeniser
}
} else { // named
// get as many letters as possible, and look for matching entities.
let nameRef : String = reader.consumeLetterThenDigitSequence()
let nameRef: String = reader.consumeLetterThenDigitSequence()
let looksLegit: Bool = reader.matches(";")
// found if a base named entity without a ;, or an extended entity with the ;.
let found: Bool = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit))
if (!found) {
reader.rewindToMark()
if (looksLegit){ // named with semicolon
if (looksLegit) { // named with semicolon
characterReferenceError("invalid named referenece '\(nameRef)'")
}
return nil
@ -192,7 +187,7 @@ final class Tokeniser
reader.rewindToMark()
return nil
}
if (!reader.matchConsume(";")){
if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon") // missing semi
}
let numChars: Int = Entities.codepointsForName(nameRef, codepoints: &multipointHolder)
@ -207,114 +202,110 @@ final class Tokeniser
}
}
}
@discardableResult
func createTagPending(_ start: Bool)->Token.Tag {
tagPending = start ? startPending.reset() : endPending.reset()
return tagPending
}
func emitTagPending()throws {
try tagPending.finaliseTag()
try emit(tagPending)
}
func createCommentPending() {
commentPending.reset()
}
func emitCommentPending()throws {
try emit(commentPending)
}
func createDoctypePending() {
doctypePending.reset()
}
func emitDoctypePending()throws {
try emit(doctypePending)
}
func createTempBuffer() {
Token.reset(dataBuffer)
}
func isAppropriateEndTagToken()throws->Bool {
if(lastStartTag != nil){
if(lastStartTag != nil) {
let s = try tagPending.name()
return s.equalsIgnoreCase(string: lastStartTag!)
}
return false
}
func appropriateEndTagName()->String? {
if (lastStartTag == nil){
func appropriateEndTagName() -> String? {
if (lastStartTag == nil) {
return nil
}
return lastStartTag
}
func error(_ state: TokeniserState) {
if (errors != nil && errors!.canAddError()){
if (errors != nil && errors!.canAddError()) {
errors?.add(ParseError(reader.getPos(), "Unexpected character '\(String(reader.current()))' in input state [\(state.description)]"))
}
}
func eofError(_ state: TokeniserState) {
if (errors != nil && errors!.canAddError()){
if (errors != nil && errors!.canAddError()) {
errors?.add(ParseError(reader.getPos(), "Unexpectedly reached end of file (EOF) in input state [\(state.description)]"))
}
}
private func characterReferenceError(_ message: String) {
if (errors != nil && errors!.canAddError()){
if (errors != nil && errors!.canAddError()) {
errors?.add(ParseError(reader.getPos(), "Invalid character reference: \(message)"))
}
}
private func error(_ errorMsg: String) {
if (errors != nil && errors!.canAddError()){
if (errors != nil && errors!.canAddError()) {
errors?.add(ParseError(reader.getPos(), errorMsg))
}
}
func currentNodeInHtmlNS()->Bool {
func currentNodeInHtmlNS() -> Bool {
// todo: implement namespaces correctly
return true
// Element currentNode = currentNode()
// return currentNode != null && currentNode.namespace().equals("HTML")
}
/**
* Utility method to consume reader and unescape entities found within.
* @param inAttribute
* @return unescaped string from reader
*/
func unescapeEntities(_ inAttribute: Bool)throws->String {
let builder : StringBuilder = StringBuilder()
let builder: StringBuilder = StringBuilder()
while (!reader.isEmpty()) {
builder.append(reader.consumeTo("&"))
if (reader.matches("&")) {
reader.consume()
if let c = try consumeCharacterReference(nil, inAttribute)
{
if (c.count==0){
if let c = try consumeCharacterReference(nil, inAttribute) {
if (c.count==0) {
builder.append("&")
}else {
} else {
builder.appendCodePoint(c[0])
if (c.count == 2){
if (c.count == 2) {
builder.appendCodePoint(c[1])
}
}
}else {
} else {
builder.append("&")
}
}
}
}
return builder.toString()
}
}

View File

@ -12,22 +12,20 @@ protocol TokeniserStateProtocol {
func read(_ t: Tokeniser, _ r: CharacterReader)throws
}
public class TokeniserStateVars
{
public static let nullScalr : UnicodeScalar = "\u{0000}"
public class TokeniserStateVars {
public static let nullScalr: UnicodeScalar = "\u{0000}"
static let attributeSingleValueCharsSorted = ["'", "&", nullScalr].sorted()
static let attributeDoubleValueCharsSorted = ["\"", "&", nullScalr].sorted()
static let attributeNameCharsSorted = ["\t", "\n", "\r", UnicodeScalar.BackslashF, " ", "/", "=", ">", nullScalr, "\"", "'", "<"].sorted()
static let attributeValueUnquoted = ["\t", "\n", "\r", UnicodeScalar.BackslashF, " ", "&", ">", nullScalr, "\"", "'", "<", "=", "`"].sorted()
static let replacementChar : UnicodeScalar = Tokeniser.replacementChar
static let replacementStr : String = String(Tokeniser.replacementChar)
static let eof : UnicodeScalar = CharacterReader.EOF
static let replacementChar: UnicodeScalar = Tokeniser.replacementChar
static let replacementStr: String = String(Tokeniser.replacementChar)
static let eof: UnicodeScalar = CharacterReader.EOF
}
enum TokeniserState: TokeniserStateProtocol
{
enum TokeniserState: TokeniserStateProtocol {
case Data
case CharacterReferenceInData
case Rcdata
@ -95,11 +93,9 @@ enum TokeniserState: TokeniserStateProtocol
case AfterDoctypeSystemIdentifier
case BogusDoctype
case CdataSection
internal func read(_ t: Tokeniser, _ r: CharacterReader)throws
{
switch self
{
internal func read(_ t: Tokeniser, _ r: CharacterReader)throws {
switch self {
case .Data:
switch (r.current()) {
case "&":
@ -217,7 +213,7 @@ enum TokeniserState: TokeniserStateProtocol
//String tagName = r.consumeToAnySorted(tagCharsSorted).toLowerCase()
let tagName = r.consumeTagName()
t.tagPending.appendTagName(tagName)
switch (r.consume()) {
case "\t":
t.transition(.BeforeAttributeName)
@ -285,55 +281,54 @@ enum TokeniserState: TokeniserStateProtocol
t.dataBuffer.append(name)
return
}
func anythingElse(_ t: Tokeniser, _ r: CharacterReader)
{
func anythingElse(_ t: Tokeniser, _ r: CharacterReader) {
t.emit("</" + t.dataBuffer.toString())
r.unconsume()
t.transition(.Rcdata)
}
let c = r.consume()
switch (c) {
case "\t":
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.BeforeAttributeName)
}else{
} else {
anythingElse(t, r)
}
break
case "\n":
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.BeforeAttributeName)
}else{
} else {
anythingElse(t, r)
}
break
case "\r":
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.BeforeAttributeName)
}else{
} else {
anythingElse(t, r)
}
break
case UnicodeScalar.BackslashF:
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.BeforeAttributeName)
}else{
} else {
anythingElse(t, r)
}
break
case " ":
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.BeforeAttributeName)
}else{
} else {
anythingElse(t, r)
}
break
case "/":
if (try t.isAppropriateEndTagToken()){
if (try t.isAppropriateEndTagToken()) {
t.transition(.SelfClosingStartTag)
}else{
} else {
anythingElse(t, r)
}
break
@ -341,8 +336,7 @@ enum TokeniserState: TokeniserStateProtocol
if (try t.isAppropriateEndTagToken()) {
try t.emitTagPending()
t.transition(.Data)
}
else{anythingElse(t, r)}
} else {anythingElse(t, r)}
break
default:
anythingElse(t, r)
@ -408,7 +402,7 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(.Data)
return
}
switch (r.current()) {
case "-":
t.emit("-")
@ -433,7 +427,7 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(.Data)
return
}
let c = r.consume()
switch (c) {
case "-":
@ -459,7 +453,7 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(.Data)
return
}
let c = r.consume()
switch (c) {
case "-":
@ -601,7 +595,7 @@ enum TokeniserState: TokeniserStateProtocol
}
break
case .ScriptDataDoubleEscapeEnd:
TokeniserState.handleDataDoubleEscapeTag(t,r, .ScriptDataEscaped, .ScriptDataDoubleEscaped)
TokeniserState.handleDataDoubleEscapeTag(t, r, .ScriptDataEscaped, .ScriptDataDoubleEscaped)
break
case .BeforeAttributeName:
// from tagname <xxx
@ -671,7 +665,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AttributeName:
let name = r.consumeToAnySorted(TokeniserStateVars.attributeNameCharsSorted)
t.tagPending.appendAttributeName(name)
let c = r.consume()
switch (c) {
case "\t":
@ -724,7 +718,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AfterAttributeName:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
// ignore
break
case "/":
@ -746,7 +740,7 @@ enum TokeniserState: TokeniserStateProtocol
t.eofError(self)
t.transition(.Data)
break
case "\"","'","<":
case "\"", "'", "<":
t.error(self)
try t.tagPending.newAttribute()
t.tagPending.appendAttributeName(c)
@ -761,7 +755,7 @@ enum TokeniserState: TokeniserStateProtocol
case .BeforeAttributeValue:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
// ignore
break
case "\"":
@ -789,7 +783,7 @@ enum TokeniserState: TokeniserStateProtocol
try t.emitTagPending()
t.transition(.Data)
break
case "<","=","`":
case "<", "=", "`":
t.error(self)
t.tagPending.appendAttributeValue(c)
t.transition(.AttributeValue_unquoted)
@ -801,22 +795,22 @@ enum TokeniserState: TokeniserStateProtocol
break
case .AttributeValue_doubleQuoted:
let value = r.consumeToAny(TokeniserStateVars.attributeDoubleValueCharsSorted)
if (value.characters.count > 0){
if (value.characters.count > 0) {
t.tagPending.appendAttributeValue(value)
}else{
} else {
t.tagPending.setEmptyAttributeValue()
}
let c = r.consume()
switch (c) {
case "\"":
t.transition(.AfterAttributeValue_quoted)
break
case "&":
if let ref = try t.consumeCharacterReference("\"", true){
if let ref = try t.consumeCharacterReference("\"", true) {
t.tagPending.appendAttributeValue(ref)
}else{
} else {
t.tagPending.appendAttributeValue("&")
}
break
@ -835,22 +829,22 @@ enum TokeniserState: TokeniserStateProtocol
break
case .AttributeValue_singleQuoted:
let value = r.consumeToAny(TokeniserStateVars.attributeSingleValueCharsSorted)
if (value.characters.count > 0){
if (value.characters.count > 0) {
t.tagPending.appendAttributeValue(value)
}else{
} else {
t.tagPending.setEmptyAttributeValue()
}
let c = r.consume()
switch (c) {
case "'":
t.transition(.AfterAttributeValue_quoted)
break
case "&":
if let ref = try t.consumeCharacterReference("'", true){
if let ref = try t.consumeCharacterReference("'", true) {
t.tagPending.appendAttributeValue(ref)
}else{
} else {
t.tagPending.appendAttributeValue("&")
}
break
@ -869,19 +863,19 @@ enum TokeniserState: TokeniserStateProtocol
break
case .AttributeValue_unquoted:
let value = r.consumeToAnySorted(TokeniserStateVars.attributeValueUnquoted)
if (value.characters.count > 0){
if (value.characters.count > 0) {
t.tagPending.appendAttributeValue(value)
}
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BeforeAttributeName)
break
case "&":
if let ref = try t.consumeCharacterReference(">", true){
if let ref = try t.consumeCharacterReference(">", true) {
t.tagPending.appendAttributeValue(ref)
}else{
} else {
t.tagPending.appendAttributeValue("&")
}
break
@ -897,7 +891,7 @@ enum TokeniserState: TokeniserStateProtocol
t.eofError(self)
t.transition(.Data)
break
case "\"","'","<","=","`":
case "\"", "'", "<", "=", "`":
t.error(self)
t.tagPending.appendAttributeValue(c)
break
@ -910,7 +904,7 @@ enum TokeniserState: TokeniserStateProtocol
// CharacterReferenceInAttributeValue state handled inline
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BeforeAttributeName)
break
case "/":
@ -952,7 +946,7 @@ enum TokeniserState: TokeniserStateProtocol
// todo: handle bogus comment starting from eof. when does that trigger?
// rewind to capture character that lead us here
r.unconsume()
let comment : Token.Comment = Token.Comment()
let comment: Token.Comment = Token.Comment()
comment.bogus = true
comment.data.append(r.consumeTo(">"))
// todo: replace nullChar with replaceChar
@ -1128,7 +1122,7 @@ enum TokeniserState: TokeniserStateProtocol
case .Doctype:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BeforeDoctypeName)
break
case TokeniserStateVars.eof:
@ -1154,7 +1148,7 @@ enum TokeniserState: TokeniserStateProtocol
}
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
break // ignore whitespace
case TokeniserStateVars.nullScalr:
t.error(self)
@ -1187,7 +1181,7 @@ enum TokeniserState: TokeniserStateProtocol
try t.emitDoctypePending()
t.transition(.Data)
break
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.AfterDoctypeName)
break
case TokeniserStateVars.nullScalr:
@ -1212,9 +1206,9 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(.Data)
return
}
if (r.matchesAny("\t", "\n", "\r", UnicodeScalar.BackslashF, " ")){
if (r.matchesAny("\t", "\n", "\r", UnicodeScalar.BackslashF, " ")) {
r.advance() // ignore whitespace
}else if (r.matches(">")) {
} else if (r.matches(">")) {
try t.emitDoctypePending()
t.advanceTransition(.Data)
} else if (r.matchConsumeIgnoreCase("PUBLIC")) {
@ -1230,7 +1224,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AfterDoctypePublicKeyword:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BeforeDoctypePublicIdentifier)
break
case "\"":
@ -1264,7 +1258,7 @@ enum TokeniserState: TokeniserStateProtocol
case .BeforeDoctypePublicIdentifier:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
break
case "\"":
// set public id to empty string
@ -1347,7 +1341,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AfterDoctypePublicIdentifier:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BetweenDoctypePublicAndSystemIdentifiers)
break
case ">":
@ -1379,7 +1373,7 @@ enum TokeniserState: TokeniserStateProtocol
case .BetweenDoctypePublicAndSystemIdentifiers:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
break
case ">":
try t.emitDoctypePending()
@ -1410,7 +1404,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AfterDoctypeSystemKeyword:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(.BeforeDoctypeSystemIdentifier)
break
case ">":
@ -1444,7 +1438,7 @@ enum TokeniserState: TokeniserStateProtocol
case .BeforeDoctypeSystemIdentifier:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
break
case "\"":
// set system id to empty string
@ -1527,7 +1521,7 @@ enum TokeniserState: TokeniserStateProtocol
case .AfterDoctypeSystemIdentifier:
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
break
case ">":
try t.emitDoctypePending()
@ -1569,15 +1563,7 @@ enum TokeniserState: TokeniserStateProtocol
break
}
}
var description: String {return String(describing: type(of: self))}
/**
* Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. Same body impl, just
@ -1590,12 +1576,12 @@ enum TokeniserState: TokeniserStateProtocol
t.dataBuffer.append(name)
return
}
var needsExitTransition = false
if (try t.isAppropriateEndTagToken() && !r.isEmpty()) {
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ":
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ":
t.transition(BeforeAttributeName)
break
case "/":
@ -1612,13 +1598,13 @@ enum TokeniserState: TokeniserStateProtocol
} else {
needsExitTransition = true
}
if (needsExitTransition) {
t.emit("</" + t.dataBuffer.toString())
t.transition(elseTransition)
}
}
private static func readData(_ t: Tokeniser, _ r: CharacterReader, _ current: TokeniserState, _ advance: TokeniserState)throws {
switch (r.current()) {
case "<":
@ -1638,17 +1624,17 @@ enum TokeniserState: TokeniserStateProtocol
break
}
}
private static func readCharRef(_ t: Tokeniser, _ advance: TokeniserState)throws {
let c = try t.consumeCharacterReference(nil, false)
if (c == nil){
if (c == nil) {
t.emit("&")
}else{
} else {
t.emit(c!)
}
t.transition(advance)
}
private static func readEndTag(_ t: Tokeniser, _ r: CharacterReader, _ a: TokeniserState, _ b: TokeniserState) {
if (r.matchesLetter()) {
t.createTagPending(false)
@ -1658,7 +1644,7 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(b)
}
}
private static func handleDataDoubleEscapeTag(_ t: Tokeniser, _ r: CharacterReader, _ primary: TokeniserState, _ fallback: TokeniserState) {
if (r.matchesLetter()) {
let name = r.consumeLetterSequence()
@ -1666,13 +1652,13 @@ enum TokeniserState: TokeniserStateProtocol
t.emit(name)
return
}
let c = r.consume()
switch (c) {
case "\t","\n","\r",UnicodeScalar.BackslashF," ","/",">":
if (t.dataBuffer.toString() == "script"){
case "\t", "\n", "\r", UnicodeScalar.BackslashF, " ", "/", ">":
if (t.dataBuffer.toString() == "script") {
t.transition(primary)
}else{
} else {
t.transition(fallback)
}
t.emit(c)
@ -1682,8 +1668,5 @@ enum TokeniserState: TokeniserStateProtocol
t.transition(fallback)
}
}
}

View File

@ -17,23 +17,22 @@ public class TreeBuilder {
public var currentToken: Token? // currentToken is used only for error tracking.
public var errors: ParseErrorList // null when not tracking errors
public var settings: ParseSettings
private let start: Token.StartTag = Token.StartTag() // start tag to process
private let end: Token.EndTag = Token.EndTag()
public func defaultSettings()->ParseSettings{preconditionFailure("This method must be overridden")}
public func defaultSettings() -> ParseSettings {preconditionFailure("This method must be overridden")}
public init() {
doc = Document("")
reader = CharacterReader("")
tokeniser = Tokeniser(reader,nil)
tokeniser = Tokeniser(reader, nil)
stack = Array<Element>()
baseUri = ""
errors = ParseErrorList(0,0)
settings = ParseSettings(false,false)
errors = ParseErrorList(0, 0)
settings = ParseSettings(false, false)
}
public func initialiseParse(_ input: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings) {
doc = Document(baseUri)
self.settings = settings
@ -43,29 +42,28 @@ public class TreeBuilder {
stack = Array<Element>()
self.baseUri = baseUri
}
func parse(_ input: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Document {
initialiseParse(input, baseUri, errors, settings)
try runParser()
return doc
}
public func runParser()throws {
while (true) {
let token: Token = try tokeniser.read()
try process(token)
token.reset()
if (token.type == Token.TokenType.EOF){
if (token.type == Token.TokenType.EOF) {
break
}
}
}
@discardableResult
public func process(_ token: Token)throws->Bool{preconditionFailure("This method must be overridden")}
public func process(_ token: Token)throws->Bool {preconditionFailure("This method must be overridden")}
@discardableResult
public func processStartTag(_ name: String)throws->Bool {
if (currentToken === start) { // don't recycle an in-use token
@ -73,7 +71,7 @@ public class TreeBuilder {
}
return try process(start.reset().name(name))
}
@discardableResult
public func processStartTag(_ name: String, _ attrs: Attributes)throws->Bool {
if (currentToken === start) { // don't recycle an in-use token
@ -83,18 +81,17 @@ public class TreeBuilder {
start.nameAttr(name, attrs)
return try process(start)
}
@discardableResult
public func processEndTag(_ name: String)throws->Bool {
if (currentToken === end) { // don't recycle an in-use token
return try process(Token.EndTag().name(name))
}
return try process(end.reset().name(name))
}
public func currentElement()->Element? {
public func currentElement() -> Element? {
let size: Int = stack.count
return size > 0 ? stack[size-1] : nil
}

View File

@ -15,113 +15,112 @@ private let alphaNumericSet = CharacterSet.alphanumerics
private let symbolSet = CharacterSet.symbols
private let digitSet = CharacterSet.decimalDigits
extension UnicodeScalar
{
public static let BackslashF : UnicodeScalar = UnicodeScalar(12)
extension UnicodeScalar {
public static let BackslashF: UnicodeScalar = UnicodeScalar(12)
func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool {
return set.contains(self)
}
/// True for any space character, and the control characters \t, \n, \r, \f, \v.
var isWhitespace: Bool {
switch self {
case " ", "\t", "\n", "\r", UnicodeScalar.BackslashF: return true
case "\u{000B}", "\u{000C}": return true // Form Feed, vertical tab
default: return false
}
}
/// True for any Unicode space character, and the control characters \t, \n, \r, \f, \v.
var isUnicodeSpace: Bool {
switch self {
case " ", "\t", "\n", "\r" ,UnicodeScalar.BackslashF: return true
case " ", "\t", "\n", "\r", UnicodeScalar.BackslashF: return true
case "\u{000C}", "\u{000B}", "\u{0085}": return true // Form Feed, vertical tab, next line (nel)
case "\u{00A0}", "\u{1680}", "\u{180E}": return true // No-break space, ogham space mark, mongolian vowel
case "\u{2000}"..."\u{200D}": return true // En quad, em quad, en space, em space, three-per-em space, four-per-em space, six-per-em space, figure space, ponctuation space, thin space, hair space, zero width space, zero width non-joiner, zero width joiner.
case "\u{2028}", "\u{2029}": return true // Line separator, paragraph separator.
case "\u{202F}", "\u{205F}", "\u{2060}", "\u{3000}", "\u{FEFF}": return true // Narrow no-break space, medium mathematical space, word joiner, ideographic space, zero width no-break space.
default: return false
}
}
/// `true` if `self` normalized contains a single code unit that is in the categories of Uppercase and Titlecase Letters.
var isUppercase: Bool {
return isMemberOfCharacterSet(uppercaseSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Lowercase Letters.
var isLowercase: Bool {
return isMemberOfCharacterSet(lowercaseSet)
}
/// `true` if `self` normalized contains a single code unit that is in the categories of Letters and Marks.
var isAlpha: Bool {
return isMemberOfCharacterSet(alphaSet)
}
/// `true` if `self` normalized contains a single code unit that is in th categories of Letters, Marks, and Numbers.
var isAlphaNumeric: Bool {
return isMemberOfCharacterSet(alphaNumericSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Symbols. These characters include, for example, the dollar sign ($) and the plus (+) sign.
var isSymbol: Bool {
return isMemberOfCharacterSet(symbolSet)
}
/// `true` if `self` normalized contains a single code unit that is in the category of Decimal Numbers.
var isDigit: Bool {
return isMemberOfCharacterSet(digitSet)
}
/// `true` if `self` is an ASCII decimal digit, i.e. between "0" and "9".
var isDecimalDigit: Bool {
return "0123456789".unicodeScalars.contains(self)
}
/// `true` if `self` is an ASCII hexadecimal digit, i.e. "0"..."9", "a"..."f", "A"..."F".
var isHexadecimalDigit: Bool {
return "01234567890abcdefABCDEF".unicodeScalars.contains(self)
}
/// `true` if `self` is an ASCII octal digit, i.e. between '0' and '7'.
var isOctalDigit: Bool {
return "01234567".unicodeScalars.contains(self)
}
var uppercase: UnicodeScalar {
let str = String(self).uppercased()
return str.unicodeScalar(0)

View File

@ -8,80 +8,79 @@
import Foundation
struct Validate
{
struct Validate {
/**
* Validates that the object is not null
* @param obj object to test
*/
public static func notNull(obj:Any?) throws {
if (obj == nil){
if (obj == nil) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Object must not be null")
}
}
/**
* Validates that the object is not null
* @param obj object to test
* @param msg message to output if validation fails
*/
public static func notNull(obj:AnyObject?, msg:String) throws {
if (obj == nil){
public static func notNull(obj: AnyObject?, msg: String) throws {
if (obj == nil) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
}
/**
* Validates that the value is true
* @param val object to test
*/
public static func isTrue(val:Bool) throws {
if (!val){
public static func isTrue(val: Bool) throws {
if (!val) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be true")
}
}
/**
* Validates that the value is true
* @param val object to test
* @param msg message to output if validation fails
*/
public static func isTrue(val: Bool ,msg: String) throws {
if (!val){
public static func isTrue(val: Bool, msg: String) throws {
if (!val) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
}
/**
* Validates that the value is false
* @param val object to test
*/
public static func isFalse(val: Bool) throws {
if (val){
if (val) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be false")
}
}
/**
* Validates that the value is false
* @param val object to test
* @param msg message to output if validation fails
*/
public static func isFalse(val: Bool, msg: String) throws {
if (val){
if (val) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
}
/**
* Validates that the array contains no null elements
* @param objects the array to test
*/
public static func noNullElements(objects: [AnyObject?]) throws {
try noNullElements(objects: objects, msg: "Array must not contain any null objects");
try noNullElements(objects: objects, msg: "Array must not contain any null objects")
}
/**
* Validates that the array contains no null elements
* @param objects the array to test
@ -89,35 +88,34 @@ struct Validate
*/
public static func noNullElements(objects: [AnyObject?], msg: String) throws {
for obj in objects {
if (obj == nil){
if (obj == nil) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
}
}
/**
* Validates that the string is not empty
* @param string the string to test
*/
public static func notEmpty(string: String?) throws
{
if (string == nil || string?.characters.count == 0){
public static func notEmpty(string: String?) throws {
if (string == nil || string?.characters.count == 0) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "String must not be empty")
}
}
/**
* Validates that the string is not empty
* @param string the string to test
* @param msg message to output if validation fails
*/
public static func notEmpty(string: String?, msg: String ) throws {
if (string == nil || string?.characters.count == 0){
if (string == nil || string?.characters.count == 0) {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
}
/**
Cause a failure.
@param msg message to output.
@ -125,8 +123,7 @@ struct Validate
public static func fail(msg: String) throws {
throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
}
/**
Helper
*/

View File

@ -57,24 +57,22 @@
import Foundation
public class Whitelist {
private var tagNames : Set<TagName> // tags allowed, lower case. e.g. [p, br, span]
private var attributes : Dictionary<TagName, Set<AttributeKey>> // tag -> attribute[]. allowed attributes [href] for a tag.
private var enforcedAttributes : Dictionary<TagName, Dictionary<AttributeKey, AttributeValue>> // always set these attribute values
private var protocols : Dictionary<TagName, Dictionary<AttributeKey, Set<Protocol>>> // allowed URL protocols for attributes
private var preserveRelativeLinks : Bool // option to preserve relative links
private var tagNames: Set<TagName> // tags allowed, lower case. e.g. [p, br, span]
private var attributes: Dictionary<TagName, Set<AttributeKey>> // tag -> attribute[]. allowed attributes [href] for a tag.
private var enforcedAttributes: Dictionary<TagName, Dictionary<AttributeKey, AttributeValue>> // always set these attribute values
private var protocols: Dictionary<TagName, Dictionary<AttributeKey, Set<Protocol>>> // allowed URL protocols for attributes
private var preserveRelativeLinks: Bool // option to preserve relative links
/**
This whitelist allows only text nodes: all HTML will be stripped.
@return whitelist
*/
public static func none()->Whitelist {
public static func none() -> Whitelist {
return Whitelist()
}
/**
This whitelist allows only simple text formatting: <code>b, em, i, strong, u</code>. All other HTML (tags and
attributes) will be removed.
@ -84,7 +82,7 @@ public class Whitelist {
public static func simpleText()throws ->Whitelist {
return try Whitelist().addTags("b", "em", "i", "strong", "u")
}
/**
<p>
This whitelist allows a fuller range of text nodes: <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li,
@ -106,18 +104,18 @@ public class Whitelist {
"a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em",
"i", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong", "sub",
"sup", "u", "ul")
.addAttributes("a", "href")
.addAttributes("blockquote", "cite")
.addAttributes("q", "cite")
.addProtocols("a", "href", "ftp", "http", "https", "mailto")
.addProtocols("blockquote", "cite", "http", "https")
.addProtocols("cite", "cite", "http", "https")
.addEnforcedAttribute("a", "rel", "nofollow")
}
/**
This whitelist allows the same text tags as {@link #basic}, and also allows <code>img</code> tags, with appropriate
attributes, with <code>src</code> pointing to <code>http</code> or <code>https</code>.
@ -129,9 +127,9 @@ public class Whitelist {
.addTags("img")
.addAttributes("img", "align", "alt", "height", "src", "title", "width")
.addProtocols("img", "src", "http", "https")
}
/**
This whitelist allows a full range of text and structural body HTML: <code>a, b, blockquote, br, caption, cite,
code, col, colgroup, dd, div, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, span, strike, strong, sub,
@ -150,7 +148,7 @@ public class Whitelist {
"i", "img", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong",
"sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u",
"ul")
.addAttributes("a", "href", "title")
.addAttributes("blockquote", "cite")
.addAttributes("col", "span", "width")
@ -164,18 +162,14 @@ public class Whitelist {
"th", "abbr", "axis", "colspan", "rowspan", "scope",
"width")
.addAttributes("ul", "type")
.addProtocols("a", "href", "ftp", "http", "https", "mailto")
.addProtocols("blockquote", "cite", "http", "https")
.addProtocols("cite", "cite", "http", "https")
.addProtocols("img", "src", "http", "https")
.addProtocols("q", "cite", "http", "https")
}
/**
Create a new, empty whitelist. Generally it will be better to start with a default prepared whitelist instead.
@ -191,7 +185,7 @@ public class Whitelist {
protocols = Dictionary<TagName, Dictionary<AttributeKey, Set<Protocol>>>()
preserveRelativeLinks = false
}
/**
Add a list of allowed elements to a whitelist. (If a tag is not allowed, it will be removed from the HTML.)
@ -200,14 +194,13 @@ public class Whitelist {
*/
@discardableResult
open func addTags(_ tags: String...)throws ->Whitelist {
for tagName in tags
{
for tagName in tags {
try Validate.notEmpty(string: tagName)
tagNames.insert(TagName.valueOf(tagName))
}
return self
}
/**
Remove a list of allowed elements from a whitelist. (If a tag is not allowed, it will be removed from the HTML.)
@ -217,11 +210,11 @@ public class Whitelist {
@discardableResult
open func removeTags(_ tags: String...)throws ->Whitelist {
try Validate.notNull(obj:tags)
for tag in tags {
try Validate.notEmpty(string: tag)
let tagName : TagName = TagName.valueOf(tag)
let tagName: TagName = TagName.valueOf(tag)
if(tagNames.contains(tagName)) { // Only look in sub-maps if tag was allowed
tagNames.remove(tagName)
attributes.removeValue(forKey: tagName)
@ -231,7 +224,7 @@ public class Whitelist {
}
return self
}
/**
Add a list of allowed attributes to a tag. (If an attribute is not allowed on an element, it will be removed.)
<p>
@ -251,31 +244,29 @@ public class Whitelist {
open func addAttributes(_ tag: String, _ keys: String...)throws->Whitelist {
try Validate.notEmpty(string: tag)
try Validate.isTrue(val: keys.count > 0, msg: "No attributes supplied.")
let tagName = TagName.valueOf(tag)
if (!tagNames.contains(tagName)){
if (!tagNames.contains(tagName)) {
tagNames.insert(tagName)
}
var attributeSet = Set<AttributeKey>()
for key in keys
{
for key in keys {
try Validate.notEmpty(string: key)
attributeSet.insert(AttributeKey.valueOf(key))
}
if var currentSet = attributes[tagName]
{
for at in attributeSet{
if var currentSet = attributes[tagName] {
for at in attributeSet {
currentSet.insert(at)
}
attributes[tagName] = currentSet
} else {
attributes[tagName] = attributeSet
}
return self
}
/**
Remove a list of allowed attributes from a tag. (If an attribute is not allowed on an element, it will be removed.)
<p>
@ -295,47 +286,42 @@ public class Whitelist {
open func removeAttributes(_ tag: String, _ keys: String...)throws->Whitelist {
try Validate.notEmpty(string: tag)
try Validate.isTrue(val: keys.count > 0, msg: "No attributes supplied.")
let tagName : TagName = TagName.valueOf(tag)
let tagName: TagName = TagName.valueOf(tag)
var attributeSet = Set<AttributeKey>()
for key in keys {
try Validate.notEmpty(string: key)
attributeSet.insert(AttributeKey.valueOf(key))
}
if(tagNames.contains(tagName)) { // Only look in sub-maps if tag was allowed
if var currentSet = attributes[tagName]
{
for l in attributeSet
{
if var currentSet = attributes[tagName] {
for l in attributeSet {
currentSet.remove(l)
}
attributes[tagName] = currentSet
if(currentSet.isEmpty){ // Remove tag from attribute map if no attributes are allowed for tag
if(currentSet.isEmpty) { // Remove tag from attribute map if no attributes are allowed for tag
attributes.removeValue(forKey: tagName)
}
}
}
if(tag == ":all"){ // Attribute needs to be removed from all individually set tags
for name in attributes.keys
{
var currentSet : Set<AttributeKey> = attributes[name]!
for l in attributeSet{
if(tag == ":all") { // Attribute needs to be removed from all individually set tags
for name in attributes.keys {
var currentSet: Set<AttributeKey> = attributes[name]!
for l in attributeSet {
currentSet.remove(l)
}
attributes[name] = currentSet
if(currentSet.isEmpty){ // Remove tag from attribute map if no attributes are allowed for tag
if(currentSet.isEmpty) { // Remove tag from attribute map if no attributes are allowed for tag
attributes.removeValue(forKey: name)
}
}
}
return self
}
/**
Add an enforced attribute to a tag. An enforced attribute will always be added to the element. If the element
already has the attribute set, it will be overridden.
@ -354,24 +340,24 @@ public class Whitelist {
try Validate.notEmpty(string: tag)
try Validate.notEmpty(string: key)
try Validate.notEmpty(string: value)
let tagName : TagName = TagName.valueOf(tag)
if (!tagNames.contains(tagName)){
let tagName: TagName = TagName.valueOf(tag)
if (!tagNames.contains(tagName)) {
tagNames.insert(tagName)
}
let attrKey : AttributeKey = AttributeKey.valueOf(key)
let attrVal : AttributeValue = AttributeValue.valueOf(value)
let attrKey: AttributeKey = AttributeKey.valueOf(key)
let attrVal: AttributeValue = AttributeValue.valueOf(value)
if (enforcedAttributes[tagName] != nil) {
enforcedAttributes[tagName]?[attrKey] = attrVal
} else {
var attrMap : Dictionary<AttributeKey, AttributeValue> = Dictionary<AttributeKey, AttributeValue>()
var attrMap: Dictionary<AttributeKey, AttributeValue> = Dictionary<AttributeKey, AttributeValue>()
attrMap[attrKey] = attrVal
enforcedAttributes[tagName] = attrMap
}
return self
}
/**
Remove a previously configured enforced attribute from a tag.
@ -383,21 +369,21 @@ public class Whitelist {
open func removeEnforcedAttribute(_ tag: String, _ key: String)throws->Whitelist {
try Validate.notEmpty(string: tag)
try Validate.notEmpty(string: key)
let tagName : TagName = TagName.valueOf(tag)
let tagName: TagName = TagName.valueOf(tag)
if(tagNames.contains(tagName) && (enforcedAttributes[tagName] != nil)) {
let attrKey : AttributeKey = AttributeKey.valueOf(key)
var attrMap : Dictionary<AttributeKey, AttributeValue> = enforcedAttributes[tagName]!
let attrKey: AttributeKey = AttributeKey.valueOf(key)
var attrMap: Dictionary<AttributeKey, AttributeValue> = enforcedAttributes[tagName]!
attrMap.removeValue(forKey: attrKey)
enforcedAttributes[tagName] = attrMap
if(attrMap.isEmpty){ // Remove tag from enforced attribute map if no enforced attributes are present
if(attrMap.isEmpty) { // Remove tag from enforced attribute map if no enforced attributes are present
enforcedAttributes.removeValue(forKey: tagName)
}
}
return self
}
/**
* Configure this Whitelist to preserve relative links in an element's URL attribute, or convert them to absolute
* links. By default, this is <b>false</b>: URLs will be made absolute (e.g. start with an allowed protocol, like
@ -414,11 +400,11 @@ public class Whitelist {
* @see #addProtocols
*/
@discardableResult
open func preserveRelativeLinks(_ preserve: Bool)->Whitelist {
open func preserveRelativeLinks(_ preserve: Bool) -> Whitelist {
preserveRelativeLinks = preserve
return self
}
/**
Add allowed URL protocols for an element's URL attribute. This restricts the possible values of the attribute to
URLs with the defined protocol.
@ -440,19 +426,19 @@ public class Whitelist {
try Validate.notEmpty(string: tag)
try Validate.notEmpty(string: key)
try Validate.notNull(obj: protocols)
let tagName : TagName = TagName.valueOf(tag)
let attrKey : AttributeKey = AttributeKey.valueOf(key)
var attrMap : Dictionary<AttributeKey, Set<Protocol>>
var protSet : Set<Protocol>
let tagName: TagName = TagName.valueOf(tag)
let attrKey: AttributeKey = AttributeKey.valueOf(key)
var attrMap: Dictionary<AttributeKey, Set<Protocol>>
var protSet: Set<Protocol>
if (self.protocols[tagName] != nil) {
attrMap = self.protocols[tagName]!
} else {
attrMap = Dictionary<AttributeKey, Set<Protocol>>()
self.protocols[tagName] = attrMap
}
if (attrMap[attrKey] != nil) {
protSet = attrMap[attrKey]!
} else {
@ -460,17 +446,16 @@ public class Whitelist {
attrMap[attrKey] = protSet
self.protocols[tagName] = attrMap
}
for ptl in protocols
{
for ptl in protocols {
try Validate.notEmpty(string: ptl)
let prot : Protocol = Protocol.valueOf(ptl)
let prot: Protocol = Protocol.valueOf(ptl)
protSet.insert(prot)
}
attrMap[attrKey] = protSet
return self
}
/**
Remove allowed URL protocols for an element's URL attribute.
<p>
@ -486,44 +471,43 @@ public class Whitelist {
open func removeProtocols(_ tag: String, _ key: String, _ protocols: String...)throws->Whitelist {
try Validate.notEmpty(string: tag)
try Validate.notEmpty(string: key)
let tagName : TagName = TagName.valueOf(tag)
let attrKey : AttributeKey = AttributeKey.valueOf(key)
let tagName: TagName = TagName.valueOf(tag)
let attrKey: AttributeKey = AttributeKey.valueOf(key)
if(self.protocols[tagName] != nil) {
var attrMap : Dictionary<AttributeKey, Set<Protocol>>= self.protocols[tagName]!
var attrMap: Dictionary<AttributeKey, Set<Protocol>>= self.protocols[tagName]!
if(attrMap[attrKey] != nil) {
var protSet : Set<Protocol> = attrMap[attrKey]!
for ptl in protocols
{
var protSet: Set<Protocol> = attrMap[attrKey]!
for ptl in protocols {
try Validate.notEmpty(string: ptl)
let prot : Protocol = Protocol.valueOf(ptl)
let prot: Protocol = Protocol.valueOf(ptl)
protSet.remove(prot)
}
attrMap[attrKey] = protSet
if(protSet.isEmpty) { // Remove protocol set if empty
attrMap.removeValue(forKey: attrKey)
if(attrMap.isEmpty){ // Remove entry for tag if empty
if(attrMap.isEmpty) { // Remove entry for tag if empty
self.protocols.removeValue(forKey: tagName)
}
}
}
self.protocols[tagName] = attrMap
}
return self
}
/**
* Test if the supplied tag is allowed by this whitelist
* @param tag test tag
* @return true if allowed
*/
public func isSafeTag(_ tag: String)->Bool {
public func isSafeTag(_ tag: String) -> Bool {
return tagNames.contains(TagName.valueOf(tag))
}
/**
* Test if the supplied attribute is allowed by this whitelist for this tag
* @param tagName tag to consider allowing the attribute in
@ -531,10 +515,10 @@ public class Whitelist {
* @param attr attribute under test
* @return true if allowed
*/
public func isSafeAttribute(_ tagName: String, _ el: Element, _ attr: Attribute)->Bool {
let tag : TagName = TagName.valueOf(tagName)
let key : AttributeKey = AttributeKey.valueOf(attr.getKey())
public func isSafeAttribute(_ tagName: String, _ el: Element, _ attr: Attribute) -> Bool {
let tag: TagName = TagName.valueOf(tagName)
let key: AttributeKey = AttributeKey.valueOf(attr.getKey())
if (attributes[tag] != nil) {
if (attributes[tag]?.contains(key))! {
if (protocols[tag] != nil) {
@ -554,17 +538,16 @@ public class Whitelist {
private func testValidProtocol(_ el: Element, _ attr: Attribute, _ protocols: Set<Protocol>)throws->Bool {
// try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
// rels without a baseuri get removed
var value : String = try el.absUrl(attr.getKey())
if (value.characters.count == 0){
var value: String = try el.absUrl(attr.getKey())
if (value.characters.count == 0) {
value = attr.getValue() // if it could not be made abs, run as-is to allow custom unknown protocols
if (!preserveRelativeLinks){
if (!preserveRelativeLinks) {
attr.setValue(value: value)
}
for ptl in protocols
{
var prot : String = ptl.toString()
for ptl in protocols {
var prot: String = ptl.toString()
if (prot=="#") { // allows anchor links
if (isValidAnchor(value)) {
return true
@ -572,93 +555,85 @@ public class Whitelist {
continue
}
}
prot += ":"
if (value.lowercased().hasPrefix(prot)) {
return true
}
}
}
return false
}
private func isValidAnchor(_ value: String)->Bool
{
private func isValidAnchor(_ value: String) -> Bool {
return value.startsWith("#") && !(Pattern(".*\\s.*").matcher(in: value).count > 0)
}
public func getEnforcedAttributes(_ tagName: String)throws->Attributes {
let attrs: Attributes = Attributes()
let tag: TagName = TagName.valueOf(tagName)
if let keyVals: Dictionary<AttributeKey, AttributeValue> = enforcedAttributes[tag]
{
for entry in keyVals
{
if let keyVals: Dictionary<AttributeKey, AttributeValue> = enforcedAttributes[tag] {
for entry in keyVals {
try attrs.put(entry.key.toString(), entry.value.toString())
}
}
return attrs
}
}
// named types for config. All just hold strings, but here for my sanity.
open class TagName : TypedValue {
open class TagName: TypedValue {
override init(_ value: String) {
super.init(value)
}
static func valueOf(_ value: String)->TagName{
static func valueOf(_ value: String) -> TagName {
return TagName(value)
}
}
open class AttributeKey : TypedValue {
open class AttributeKey: TypedValue {
override init(_ value: String) {
super.init(value)
}
static func valueOf(_ value: String)->AttributeKey {
static func valueOf(_ value: String) -> AttributeKey {
return AttributeKey(value)
}
}
open class AttributeValue : TypedValue {
open class AttributeValue: TypedValue {
override init(_ value: String) {
super.init(value)
}
static func valueOf(_ value: String)->AttributeValue {
static func valueOf(_ value: String) -> AttributeValue {
return AttributeValue(value)
}
}
open class Protocol : TypedValue {
open class Protocol: TypedValue {
override init(_ value: String) {
super.init(value)
}
static func valueOf(_ value: String)->Protocol {
static func valueOf(_ value: String) -> Protocol {
return Protocol(value)
}
}
open class TypedValue {
fileprivate let value: String
open class TypedValue
{
fileprivate let value : String
init(_ value: String) {
self.value = value
}
public func toString()->String {
public func toString() -> String {
return value
}
}
@ -667,13 +642,12 @@ extension TypedValue: Hashable {
public var hashValue: Int {
let prime = 31
var result = 1
result = Int.addWithOverflow(Int.multiplyWithOverflow(prime,result).0, value.hash).0
result = Int.addWithOverflow(Int.multiplyWithOverflow(prime, result).0, value.hash).0
return result
}
}
public func == (lhs: TypedValue, rhs: TypedValue) -> Bool
{
if(lhs === rhs){return true}
public func == (lhs: TypedValue, rhs: TypedValue) -> Bool {
if(lhs === rhs) {return true}
return lhs.value == rhs.value
}

View File

@ -12,10 +12,10 @@ import Foundation
An XML Declaration.
@author Jonathan Hedley, jonathan@hedley.net */
public class XmlDeclaration : Node {
public class XmlDeclaration: Node {
private let _name: String
private let isProcessingInstruction: Bool // <! if true, <? if false, declaration (and last data char should be ?)
/**
Create a new XML declaration
@param name of declaration
@ -27,20 +27,19 @@ public class XmlDeclaration : Node {
self.isProcessingInstruction = isProcessingInstruction
super.init(baseUri)
}
public override func nodeName()->String {
public override func nodeName() -> String {
return "#declaration"
}
/**
* Get the name of this declaration.
* @return name of this declaration.
*/
public func name()->String {
public func name() -> String {
return _name
}
/**
Get the unencoded XML declaration.
@return XML declaration
@ -48,42 +47,39 @@ public class XmlDeclaration : Node {
public func getWholeDeclaration()throws->String {
return try attributes!.html().trim() // attr html starts with a " "
}
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
accum
.append("<")
.append(isProcessingInstruction ? "!" : "?")
.append(_name)
do{
do {
try attributes?.html(accum: accum, out: out)
}catch{}
} catch {}
accum
.append(isProcessingInstruction ? "!" : "?")
.append(">")
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
public override func toString()->String {
do{
public override func toString() -> String {
do {
return try outerHtml()
}catch{}
} catch {}
return ""
}
public override func copy(with zone: NSZone? = nil) -> Any
{
let clone = XmlDeclaration(_name,baseUri!,isProcessingInstruction)
public override func copy(with zone: NSZone? = nil) -> Any {
let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction)
return copy(clone: clone)
}
public override func copy(parent: Node?)->Node
{
let clone = XmlDeclaration(_name,baseUri!,isProcessingInstruction)
return copy(clone: clone,parent: parent)
public override func copy(parent: Node?) -> Node {
let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction)
return copy(clone: clone, parent: parent)
}
public override func copy(clone: Node, parent: Node?)->Node
{
return super.copy(clone: clone,parent: parent)
public override func copy(clone: Node, parent: Node?) -> Node {
return super.copy(clone: clone, parent: parent)
}
}

View File

@ -14,29 +14,26 @@ import Foundation
* <p>Usage example: {@code Document xmlDoc = Jsoup.parse(html, baseUrl, Parser.xmlParser())}</p>
*
*/
public class XmlTreeBuilder : TreeBuilder {
public override init(){
public class XmlTreeBuilder: TreeBuilder {
public override init() {
super.init()
}
public override func defaultSettings()->ParseSettings {
public override func defaultSettings() -> ParseSettings {
return ParseSettings.preserveCase
}
public func parse(_ input: String, _ baseUri: String)throws->Document {
return try parse(input, baseUri, ParseErrorList.noTracking(), ParseSettings.preserveCase)
}
override public func initialiseParse(_ input: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings) {
super.initialiseParse(input, baseUri, errors, settings)
stack.append(doc) // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
doc.outputSettings().syntax(syntax: OutputSettings.Syntax.xml)
}
override public func process(_ token: Token)throws->Bool {
// start tag, end tag, doctype, comment, character, eof
switch (token.type) {
@ -62,11 +59,11 @@ public class XmlTreeBuilder : TreeBuilder {
}
return true
}
private func insertNode(_ node: Node)throws {
try currentElement()?.appendChild(node)
}
@discardableResult
func insert(_ startTag: Token.StartTag)throws->Element {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
@ -84,7 +81,7 @@ public class XmlTreeBuilder : TreeBuilder {
}
return el
}
func insert(_ commentToken: Token.Comment)throws {
let comment: Comment = Comment(commentToken.getData(), baseUri)
var insert: Node = comment
@ -100,17 +97,17 @@ public class XmlTreeBuilder : TreeBuilder {
}
try insertNode(insert)
}
func insert(_ characterToken: Token.Char)throws {
let node: Node = TextNode(characterToken.getData()!, baseUri)
try insertNode(node)
}
func insert(_ d: Token.Doctype)throws {
let doctypeNode: DocumentType = DocumentType(settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri)
try insertNode(doctypeNode)
}
/**
* If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
* found, skips.
@ -120,30 +117,27 @@ public class XmlTreeBuilder : TreeBuilder {
private func popStackToClose(_ endTag: Token.EndTag)throws {
let elName: String = try endTag.name()
var firstFound: Element? = nil
for pos in (0..<stack.count).reversed()
{
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (next.nodeName().equals(elName)) {
firstFound = next
break
}
}
if (firstFound == nil){
if (firstFound == nil) {
return // not found, skip
}
for pos in (0..<stack.count).reversed()
{
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (next == firstFound!){
if (next == firstFound!) {
break
}
}
}
func parseFragment(_ inputFragment: String, _ baseUri: String, _ errors: ParseErrorList, _ settings: ParseSettings)throws->Array<Node> {
initialiseParse(inputFragment, baseUri, errors, settings)
try runParser()

View File

@ -6,7 +6,6 @@
// Copyright © 2016 Nabil Chatbi. All rights reserved.
//
import XCTest
@testable import SwiftSoupTests

View File

@ -13,11 +13,11 @@ import XCTest
import SwiftSoup
class AttributeParseTest: XCTestCase {
func testparsesRoughAttributeString()throws {
let html: String = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />"
// should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>
let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0)
let attr: Attributes = el.getAttributes()!
XCTAssertEqual(7, attr.size())
@ -29,7 +29,7 @@ class AttributeParseTest: XCTestCase {
XCTAssertEqual("12", attr.get(key: "foo"))
XCTAssertEqual("18", attr.get(key: "mux"))
}
func testhandlesNewLinesAndReturns()throws {
let html: String = "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>"
let el: Element = try SwiftSoup.parse(html).select("a").first()!
@ -37,7 +37,7 @@ class AttributeParseTest: XCTestCase {
XCTAssertEqual("bar\r\nqux", try el.attr("foo")) // currently preserves newlines in quoted attributes. todo confirm if should.
XCTAssertEqual("two", try el.attr("bar"))
}
func testparsesEmptyString()throws {
let html: String = "<a />"
let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0)
@ -58,55 +58,55 @@ class AttributeParseTest: XCTestCase {
let html: String = "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>"
let els: Elements = try SwiftSoup.parse(html).select("a")
XCTAssertEqual("?foo=bar&mid&lt=true", try els.first()!.attr("href"))
XCTAssertEqual("?foo=bar<qux&lg=1",try els.last()!.attr("href"))
XCTAssertEqual("?foo=bar<qux&lg=1", try els.last()!.attr("href"))
}
func testmoreAttributeUnescapes()throws {
let html: String = "<a href='&wr_id=123&mid-size=true&ok=&wr'>Check</a>"
let els: Elements = try SwiftSoup.parse(html).select("a")
XCTAssertEqual("&wr_id=123&mid-size=true&ok=&wr",try els.first()!.attr("href"))
XCTAssertEqual("&wr_id=123&mid-size=true&ok=&wr", try els.first()!.attr("href"))
}
func testparsesBooleanAttributes()throws {
let html: String = "<a normal=\"123\" boolean empty=\"\"></a>"
let el: Element = try SwiftSoup.parse(html).select("a").first()!
XCTAssertEqual("123", try el.attr("normal"))
XCTAssertEqual("", try el.attr("boolean"))
XCTAssertEqual("", try el.attr("empty"))
let attributes: Array<Attribute> = el.getAttributes()!.asList()
XCTAssertEqual(3, attributes.count,"There should be 3 attribute present")
XCTAssertEqual(3, attributes.count, "There should be 3 attribute present")
// Assuming the list order always follows the parsed html
XCTAssertFalse((attributes[0] as? BooleanAttribute) != nil,"'normal' attribute should not be boolean")
XCTAssertTrue((attributes[1] as? BooleanAttribute) != nil,"'boolean' attribute should be boolean")
XCTAssertFalse((attributes[2] as? BooleanAttribute) != nil,"'empty' attribute should not be boolean")
XCTAssertFalse((attributes[0] as? BooleanAttribute) != nil, "'normal' attribute should not be boolean")
XCTAssertTrue((attributes[1] as? BooleanAttribute) != nil, "'boolean' attribute should be boolean")
XCTAssertFalse((attributes[2] as? BooleanAttribute) != nil, "'empty' attribute should not be boolean")
XCTAssertEqual(html, try el.outerHtml())
}
func testdropsSlashFromAttributeName()throws {
let html: String = "<img /onerror='doMyJob'/>"
var doc: Document = try SwiftSoup.parse(html)
XCTAssertTrue(try doc.select("img[onerror]").size() != 0,"SelfClosingStartTag ignores last character")
XCTAssertTrue(try doc.select("img[onerror]").size() != 0, "SelfClosingStartTag ignores last character")
XCTAssertEqual("<img onerror=\"doMyJob\">", try doc.body()!.html())
doc = try SwiftSoup.parse(html, "", Parser.xmlParser())
XCTAssertEqual("<img onerror=\"doMyJob\" />", try doc.html())
}
static var allTests = {
return [
("testparsesRoughAttributeString" , testparsesRoughAttributeString),
("testhandlesNewLinesAndReturns" , testhandlesNewLinesAndReturns),
("testparsesEmptyString" , testparsesEmptyString),
("testcanStartWithEq" , testcanStartWithEq),
("teststrictAttributeUnescapes" , teststrictAttributeUnescapes),
("testmoreAttributeUnescapes" , testmoreAttributeUnescapes),
("testparsesBooleanAttributes" , testparsesBooleanAttributes),
("testdropsSlashFromAttributeName" , testdropsSlashFromAttributeName),
("testparsesRoughAttributeString", testparsesRoughAttributeString),
("testhandlesNewLinesAndReturns", testhandlesNewLinesAndReturns),
("testparsesEmptyString", testparsesEmptyString),
("testcanStartWithEq", testcanStartWithEq),
("teststrictAttributeUnescapes", teststrictAttributeUnescapes),
("testmoreAttributeUnescapes", testmoreAttributeUnescapes),
("testparsesBooleanAttributes", testparsesBooleanAttributes),
("testdropsSlashFromAttributeName", testdropsSlashFromAttributeName),
]
}()
}

View File

@ -9,26 +9,25 @@
import XCTest
@testable import SwiftSoup
class AttributeTest: XCTestCase {
func testHtml()
{
func testHtml() {
let attr = try! Attribute(key: "key", value: "value &")
XCTAssertEqual("key=\"value &amp;\"", attr.html())
XCTAssertEqual(attr.html(), attr.toString())
}
func testWithSupplementaryCharacterInAttributeKeyAndValue() {
let s = String("135361".characters)
let attr = try! Attribute(key: s, value: "A" + s + "B")
XCTAssertEqual(s + "=\"A" + s + "B\"", attr.html())
XCTAssertEqual(attr.html(), attr.toString())
}
static var allTests = {
return [
("testHtml" , testHtml),
("testWithSupplementaryCharacterInAttributeKeyAndValue" , testWithSupplementaryCharacterInAttributeKeyAndValue)
("testHtml", testHtml),
("testWithSupplementaryCharacterInAttributeKeyAndValue", testWithSupplementaryCharacterInAttributeKeyAndValue)
]
}()
}

View File

@ -10,30 +10,29 @@ import XCTest
import SwiftSoup
class AttributesTest: XCTestCase {
func testHtml() {
let a: Attributes = Attributes()
do{
do {
try a.put("Tot", "a&p")
try a.put("Hello", "There")
try a.put("data-name", "Jsoup")
}catch{}
} catch {}
XCTAssertEqual(3, a.size())
XCTAssertTrue(a.hasKey(key: "Tot"))
XCTAssertTrue(a.hasKey(key: "Hello"))
XCTAssertTrue(a.hasKey(key: "data-name"))
XCTAssertFalse(a.hasKey(key: "tot"))
XCTAssertTrue(a.hasKeyIgnoreCase(key: "tot"))
XCTAssertEqual("There",try a.getIgnoreCase(key: "hEllo"))
XCTAssertEqual("There", try a.getIgnoreCase(key: "hEllo"))
XCTAssertEqual(1, a.dataset().count)
XCTAssertEqual("Jsoup", a.dataset()["name"])
XCTAssertEqual("", a.get(key: "tot"))
XCTAssertEqual("a&p", a.get(key: "Tot"))
XCTAssertEqual("a&p", try a.getIgnoreCase(key: "tot"))
XCTAssertEqual(" Tot=\"a&amp;p\" Hello=\"There\" data-name=\"Jsoup\"", try a.html())
XCTAssertEqual(try a.html(), try a.toString())
}
@ -52,16 +51,15 @@ class AttributesTest: XCTestCase {
// iterator.dropFirst()
// XCTAssertEqual(2, a.size())
// }
func testIterator() {
let a: Attributes = Attributes()
let datas: [[String]] = [["Tot", "raul"],["Hello", "pismuth"],["data-name", "Jsoup"]]
let datas: [[String]] = [["Tot", "raul"], ["Hello", "pismuth"], ["data-name", "Jsoup"]]
for atts in datas {
try! a.put(atts[0], atts[1])
}
var iterator = a.iterator()
XCTAssertTrue(iterator.next() != nil)
var i = 0
@ -72,19 +70,19 @@ class AttributesTest: XCTestCase {
}
XCTAssertEqual(datas.count, i)
}
func testIteratorEmpty() {
let a = Attributes()
var iterator = a.iterator()
XCTAssertNil(iterator.next())
}
static var allTests = {
return [
("testHtml" , testHtml),
("testIterator" , testIterator),
("testIteratorEmpty" , testIteratorEmpty)
("testHtml", testHtml),
("testIterator", testIterator),
("testIteratorEmpty", testIteratorEmpty)
]
}()
}

View File

@ -10,7 +10,7 @@ import XCTest
import SwiftSoup
class CharacterReaderTest: XCTestCase {
func testConsume() {
let r = CharacterReader("one")
XCTAssertEqual(0, r.getPos())
@ -26,14 +26,14 @@ class CharacterReaderTest: XCTestCase {
XCTAssertTrue(r.isEmpty())
XCTAssertEqual(CharacterReader.EOF, r.consume())
}
func testUnconsume() {
let r = CharacterReader("one")
XCTAssertEqual("o", r.consume())
XCTAssertEqual("n", r.current())
r.unconsume()
XCTAssertEqual("o", r.current())
XCTAssertEqual("o", r.consume())
XCTAssertEqual("n", r.consume())
XCTAssertEqual("e", r.consume())
@ -43,13 +43,13 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("e", r.current())
XCTAssertEqual("e", r.consume())
XCTAssertTrue(r.isEmpty())
XCTAssertEqual(CharacterReader.EOF, r.consume())
r.unconsume()
XCTAssertTrue(r.isEmpty())
XCTAssertEqual(CharacterReader.EOF, r.current())
}
func testMark() {
let r = CharacterReader("one")
XCTAssertEqual("o", r.consume())
@ -60,7 +60,7 @@ class CharacterReaderTest: XCTestCase {
r.rewindToMark()
XCTAssertEqual("n", r.consume())
}
func testConsumeToEnd() {
let input = "one two three"
let r = CharacterReader(input)
@ -68,11 +68,11 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual(input, toEnd)
XCTAssertTrue(r.isEmpty())
}
func testNextIndexOfChar() {
let input = "blah blah"
let r = CharacterReader(input)
XCTAssertEqual(-1, r.nextIndexOf("x"))
XCTAssertEqual(3, r.nextIndexOf("h"))
let pull = r.consumeTo("h")
@ -82,11 +82,11 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual(" blah", r.consumeToEnd())
XCTAssertEqual(-1, r.nextIndexOf("x"))
}
func testNextIndexOfString() {
let input = "One Two something Two Three Four"
let r = CharacterReader(input)
XCTAssertEqual(-1, r.nextIndexOf("Foo"))
XCTAssertEqual(4, r.nextIndexOf("Two"))
XCTAssertEqual("One Two ", r.consumeTo("something"))
@ -94,12 +94,12 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("something Two Three Four", r.consumeToEnd())
XCTAssertEqual(-1, r.nextIndexOf("Two"))
}
func testNextIndexOfUnmatched() {
let r = CharacterReader("<[[one]]")
XCTAssertEqual(-1, r.nextIndexOf("]]>"))
}
func testConsumeToChar() {
let r = CharacterReader("One Two Three")
XCTAssertEqual("One ", r.consumeTo("T"))
@ -109,7 +109,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("T", r.consume())
XCTAssertEqual("hree", r.consumeTo("T")) // consume to end
}
func testConsumeToString() {
let r = CharacterReader("One Two Two Four")
XCTAssertEqual("One ", r.consumeTo("Two"))
@ -118,14 +118,14 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("T", r.consume())
XCTAssertEqual("wo Four", r.consumeTo("Qux"))
}
func testAdvance() {
let r = CharacterReader("One Two Three")
XCTAssertEqual("O", r.consume())
r.advance()
XCTAssertEqual("e", r.consume())
}
func testConsumeToAny() {
let r = CharacterReader("One &bar; qux")
XCTAssertEqual("One ", r.consumeToAny("&", ";"))
@ -136,7 +136,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual(";", r.consume())
XCTAssertEqual(" qux", r.consumeToAny("&", ";"))
}
func testConsumeLetterSequence() {
let r = CharacterReader("One &bar; qux")
XCTAssertEqual("One", r.consumeLetterSequence())
@ -144,7 +144,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("bar", r.consumeLetterSequence())
XCTAssertEqual("; qux", r.consumeToEnd())
}
func testConsumeLetterThenDigitSequence() {
let r = CharacterReader("One12 Two &bar; qux")
XCTAssertEqual("One12", r.consumeLetterThenDigitSequence())
@ -152,7 +152,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("Two", r.consumeLetterThenDigitSequence())
XCTAssertEqual(" &bar; qux", r.consumeToEnd())
}
func testMatches() {
let r = CharacterReader("One Two Three")
XCTAssertTrue(r.matches("O"))
@ -166,8 +166,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("ne Two Three", r.consumeToEnd())
XCTAssertFalse(r.matches("ne"))
}
func testMatchesIgnoreCase() {
let r = CharacterReader("One Two Three")
XCTAssertTrue(r.matchesIgnoreCase("O"))
@ -185,7 +184,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("ne Two Three", r.consumeToEnd())
XCTAssertFalse(r.matchesIgnoreCase("ne"))
}
func testContainsIgnoreCase() {
let r = CharacterReader("One TWO three")
XCTAssertTrue(r.containsIgnoreCase("two"))
@ -193,7 +192,7 @@ class CharacterReaderTest: XCTestCase {
// weird one: does not find one, because it scans for consistent case only
XCTAssertFalse(r.containsIgnoreCase("one"))
}
func testMatchesAny() {
//let scan = [" ", "\n", "\t"]
let r = CharacterReader("One\nTwo\tThree")
@ -203,7 +202,7 @@ class CharacterReaderTest: XCTestCase {
XCTAssertEqual("\n", r.consume())
XCTAssertFalse(r.matchesAny(" ", "\n", "\t"))
}
func testCachesStrings() {
let r = CharacterReader("Check\tCheck\tCheck\tCHOKE\tA string that is longer than 16 chars")
let one = r.consumeTo("\t")
@ -215,7 +214,7 @@ class CharacterReaderTest: XCTestCase {
let four = r.consumeTo("\t")
XCTAssertEqual("\t", r.consume())
let five = r.consumeTo("\t")
XCTAssertEqual("Check", one)
XCTAssertEqual("Check", two)
XCTAssertEqual("Check", three)
@ -226,45 +225,45 @@ class CharacterReaderTest: XCTestCase {
XCTAssertTrue(four != five)
XCTAssertEqual(five, "A string that is longer than 16 chars")
}
func testRangeEquals() {
let r = CharacterReader("Check\tCheck\tCheck\tCHOKE")
XCTAssertTrue(r.rangeEquals(0, 5, "Check"))
XCTAssertFalse(r.rangeEquals(0, 5, "CHOKE"))
XCTAssertFalse(r.rangeEquals(0, 5, "Chec"))
XCTAssertTrue(r.rangeEquals(6, 5, "Check"))
XCTAssertFalse(r.rangeEquals(6, 5, "Chuck"))
XCTAssertTrue(r.rangeEquals(12, 5, "Check"))
XCTAssertFalse(r.rangeEquals(12, 5, "Cheeky"))
XCTAssertTrue(r.rangeEquals(18, 5, "CHOKE"))
XCTAssertFalse(r.rangeEquals(18, 5, "CHIKE"))
}
static var allTests = {
return [
("testConsume" , testConsume),
("testUnconsume" , testUnconsume),
("testMark" , testMark),
("testConsumeToEnd" , testConsumeToEnd),
("testNextIndexOfChar" , testNextIndexOfChar),
("testNextIndexOfString" , testNextIndexOfString),
("testNextIndexOfUnmatched" , testNextIndexOfUnmatched),
("testConsumeToChar" , testConsumeToChar),
("testConsumeToString" , testConsumeToString),
("testAdvance" , testAdvance),
("testConsumeToAny" , testConsumeToAny),
("testConsumeLetterSequence" , testConsumeLetterSequence),
("testConsumeLetterThenDigitSequence" , testConsumeLetterThenDigitSequence),
("testMatches" , testMatches),
("testMatchesIgnoreCase" , testMatchesIgnoreCase),
("testContainsIgnoreCase" , testContainsIgnoreCase),
("testMatchesAny" , testMatchesAny),
("testCachesStrings" , testCachesStrings),
("testRangeEquals" , testRangeEquals)
("testConsume", testConsume),
("testUnconsume", testUnconsume),
("testMark", testMark),
("testConsumeToEnd", testConsumeToEnd),
("testNextIndexOfChar", testNextIndexOfChar),
("testNextIndexOfString", testNextIndexOfString),
("testNextIndexOfUnmatched", testNextIndexOfUnmatched),
("testConsumeToChar", testConsumeToChar),
("testConsumeToString", testConsumeToString),
("testAdvance", testAdvance),
("testConsumeToAny", testConsumeToAny),
("testConsumeLetterSequence", testConsumeLetterSequence),
("testConsumeLetterThenDigitSequence", testConsumeLetterThenDigitSequence),
("testMatches", testMatches),
("testMatchesIgnoreCase", testMatchesIgnoreCase),
("testContainsIgnoreCase", testContainsIgnoreCase),
("testMatchesAny", testMatchesAny),
("testCachesStrings", testCachesStrings),
("testRangeEquals", testRangeEquals)
]
}()
}

View File

@ -12,80 +12,79 @@ import SwiftSoup
class CssTest: XCTestCase {
var html: Document!
private var htmlString: String!
override func setUp() {
super.setUp()
let sb: StringBuilder = StringBuilder(string:"<html><head></head><body>")
sb.append("<div id='pseudo'>")
for i in 1...10{
for i in 1...10 {
sb.append("<p>\(i)</p>")
}
sb.append("</div>")
sb.append("<div id='type'>")
for i in 1...10{
for i in 1...10 {
sb.append("<p>\(i)</p>")
sb.append("<span>\(i)</span>")
sb.append("<em>\(i)</em>")
sb.append("<svg>\(i)</svg>")
}
sb.append("</div>")
sb.append("<span id='onlySpan'><br /></span>")
sb.append("<p class='empty'><!-- Comment only is still empty! --></p>")
sb.append("<div id='only'>")
sb.append("Some text before the <em>only</em> child in this div")
sb.append("</div>")
sb.append("</body></html>")
htmlString = sb.toString()
html = try! SwiftSoup.parse(htmlString)
}
func testFirstChild()throws {
try check(html.select("#pseudo :first-child"), "1")
try check(html.select("html:first-child"))
}
func testLastChild()throws {
try check(html.select("#pseudo :last-child"), "10")
try check(html.select("html:last-child"))
}
func testNthChild_simple()throws {
for i in 1...10 {
try check(html.select("#pseudo :nth-child(\(i))"), "\(i)")
}
}
func testNthOfType_unknownTag()throws {
for i in 1...10 {
try check(html.select("#type svg:nth-of-type(\(i))"), "\(i)")
}
}
func testNthLastChild_simple()throws {
for i in 1...10 {
try check(html.select("#pseudo :nth-last-child(\(i))"), "\(11-i)")
}
}
func testNthOfType_simple()throws {
for i in 1...10 {
try check(html.select("#type p:nth-of-type(\(i))"), "\(i)")
}
}
func testNthLastOfType_simple()throws {
for i in 1...10 {
try check(html.select("#type :nth-last-of-type(\(i))"), "\(11-i)","\(11-i)","\(11-i)","\(11-i)")
try check(html.select("#type :nth-last-of-type(\(i))"), "\(11-i)", "\(11-i)", "\(11-i)", "\(11-i)")
}
}
func testNthChild_advanced()throws {
try check(html.select("#pseudo :nth-child(-5)"))
try check(html.select("#pseudo :nth-child(odd)"), "1", "3", "5", "7", "9")
@ -98,7 +97,7 @@ class CssTest: XCTestCase {
try check(html.select("#pseudo :nth-child(-2n+5)"), "1", "3", "5")
try check(html.select("#pseudo :nth-child(+5)"), "5")
}
func testNthOfType_advanced()throws {
try check(html.select("#type :nth-of-type(-5)"))
try check(html.select("#type p:nth-of-type(odd)"), "1", "3", "5", "7", "9")
@ -111,8 +110,7 @@ class CssTest: XCTestCase {
try check(html.select("#type p:nth-of-type(-2n+5)"), "1", "3", "5")
try check(html.select("#type :nth-of-type(+5)"), "5", "5", "5", "5")
}
func testNthLastChild_advanced()throws {
try check(html.select("#pseudo :nth-last-child(-5)"))
try check(html.select("#pseudo :nth-last-child(odd)"), "2", "4", "6", "8", "10")
@ -122,11 +120,11 @@ class CssTest: XCTestCase {
try check(html.select("#pseudo :nth-last-child(even)"), "1", "3", "5", "7", "9")
try check(html.select("#pseudo :nth-last-child(2n)"), "1", "3", "5", "7", "9")
try check(html.select("#pseudo :nth-last-child(3n-1)"), "3", "6", "9")
try check(html.select("#pseudo :nth-last-child(-2n+5)"), "6", "8", "10")
try check(html.select("#pseudo :nth-last-child(+5)"), "6")
}
func testNthLastOfType_advanced()throws {
try check(html.select("#type :nth-last-of-type(-5)"))
try check(html.select("#type p:nth-last-of-type(odd)"), "2", "4", "6", "8", "10")
@ -136,19 +134,19 @@ class CssTest: XCTestCase {
try check(html.select("#type p:nth-last-of-type(even)"), "1", "3", "5", "7", "9")
try check(html.select("#type p:nth-last-of-type(2n)"), "1", "3", "5", "7", "9")
try check(html.select("#type p:nth-last-of-type(3n-1)"), "3", "6", "9")
try check(html.select("#type span:nth-last-of-type(-2n+5)"), "6", "8", "10")
try check(html.select("#type :nth-last-of-type(+5)"), "6", "6", "6", "6")
}
func testFirstOfType()throws {
try check(html.select("div:not(#only) :first-of-type"), "1", "1", "1", "1", "1")
}
func testLastOfType()throws {
try check(html.select("div:not(#only) :last-of-type"), "10", "10", "10", "10", "10")
}
func testEmpty()throws {
let sel: Elements = try html.select(":empty")
XCTAssertEqual(3, sel.size())
@ -156,15 +154,15 @@ class CssTest: XCTestCase {
XCTAssertEqual("br", sel.get(1).tagName())
XCTAssertEqual("p", sel.get(2).tagName())
}
func testOnlyChild()throws {
let sel: Elements = try html.select("span :only-child")
XCTAssertEqual(1, sel.size())
XCTAssertEqual("br", sel.get(0).tagName())
try check(html.select("#only :only-child"), "only")
}
func testOnlyOfType()throws {
let sel: Elements = try html.select(":only-of-type")
XCTAssertEqual(6, sel.size())
@ -176,12 +174,11 @@ class CssTest: XCTestCase {
XCTAssertTrue(sel.get(4).hasClass("empty"))
XCTAssertEqual("em", sel.get(5).tagName())
}
func check(_ resut: Elements, _ expectedContent: String... ) {
check(resut,expectedContent)
check(resut, expectedContent)
}
func check(_ result: Elements, _ expectedContent: [String] ) {
XCTAssertEqual(expectedContent.count, result.size())
for i in 0..<expectedContent.count {
@ -189,41 +186,38 @@ class CssTest: XCTestCase {
XCTAssertEqual(expectedContent[i], result.get(i).ownText())
}
}
func testRoot()throws {
let sel: Elements = try html.select(":root")
XCTAssertEqual(1, sel.size())
XCTAssertNotNil(sel.get(0))
try XCTAssertEqual(Tag.valueOf("html"), sel.get(0).tag())
let sel2: Elements = try html.select("body").select(":root")
XCTAssertEqual(1, sel2.size())
XCTAssertNotNil(sel2.get(0))
try XCTAssertEqual(Tag.valueOf("body"), sel2.get(0).tag())
}
static var allTests = {
return [
("testFirstChild" , testFirstChild),
("testLastChild" , testLastChild),
("testNthChild_simple" , testNthChild_simple),
("testNthOfType_unknownTag" , testNthOfType_unknownTag),
("testNthLastChild_simple" , testNthLastChild_simple),
("testNthOfType_simple" , testNthOfType_simple),
("testNthLastOfType_simple" , testNthLastOfType_simple),
("testNthChild_advanced" , testNthChild_advanced),
("testNthOfType_advanced" , testNthOfType_advanced),
("testNthLastChild_advanced" , testNthLastChild_advanced),
("testNthLastOfType_advanced" , testNthLastOfType_advanced),
("testFirstOfType" , testFirstOfType),
("testLastOfType" , testLastOfType),
("testEmpty" , testEmpty),
("testOnlyChild" , testOnlyChild),
("testOnlyOfType" , testOnlyOfType),
("testRoot" , testRoot)
("testFirstChild", testFirstChild),
("testLastChild", testLastChild),
("testNthChild_simple", testNthChild_simple),
("testNthOfType_unknownTag", testNthOfType_unknownTag),
("testNthLastChild_simple", testNthLastChild_simple),
("testNthOfType_simple", testNthOfType_simple),
("testNthLastOfType_simple", testNthLastOfType_simple),
("testNthChild_advanced", testNthChild_advanced),
("testNthOfType_advanced", testNthOfType_advanced),
("testNthLastChild_advanced", testNthLastChild_advanced),
("testNthLastOfType_advanced", testNthLastOfType_advanced),
("testFirstOfType", testFirstOfType),
("testLastOfType", testLastOfType),
("testEmpty", testEmpty),
("testOnlyChild", testOnlyChild),
("testOnlyOfType", testOnlyOfType),
("testRoot", testRoot)
]
}()
}

View File

@ -10,95 +10,94 @@ import XCTest
@testable import SwiftSoup
class DocumentTest: XCTestCase {
private static let charsetUtf8 = String.Encoding.utf8
private static let charsetIso8859 = String.Encoding.iso2022JP //"ISO-8859-1"
func testSetTextPreservesDocumentStructure() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>Hello</p>")
try doc.text("Replaced")
XCTAssertEqual("Replaced", try doc.text())
XCTAssertEqual("Replaced", try doc.body()!.text())
XCTAssertEqual(1, try doc.select("head").size())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testTitles() {
do{
do {
let noTitle: Document = try SwiftSoup.parse("<p>Hello</p>")
let withTitle: Document = try SwiftSoup.parse("<title>First</title><title>Ignore</title><p>Hello</p>")
XCTAssertEqual("", try noTitle.title())
try noTitle.title("Hello")
XCTAssertEqual("Hello", try noTitle.title())
XCTAssertEqual("Hello", try noTitle.select("title").first()?.text())
XCTAssertEqual("First", try withTitle.title())
try withTitle.title("Hello")
XCTAssertEqual("Hello", try withTitle.title())
XCTAssertEqual("Hello", try withTitle.select("title").first()?.text())
let normaliseTitle: Document = try SwiftSoup.parse("<title> Hello\nthere \n now \n")
XCTAssertEqual("Hello there now", try normaliseTitle.title())
}catch{
} catch {
}
}
func testOutputEncoding() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p title=π>π & < > </p>")
// default is utf-8
XCTAssertEqual("<p title=\"π\">π &amp; &lt; &gt; </p>", try doc.body()?.html())
XCTAssertEqual("UTF-8",doc.outputSettings().charset().displayName())
XCTAssertEqual("UTF-8", doc.outputSettings().charset().displayName())
doc.outputSettings().charset(String.Encoding.ascii)
XCTAssertEqual(Entities.EscapeMode.base,doc.outputSettings().escapeMode())
XCTAssertEqual(Entities.EscapeMode.base, doc.outputSettings().escapeMode())
XCTAssertEqual("<p title=\"&#x3c0;\">&#x3c0; &amp; &lt; &gt; </p>", try doc.body()?.html())
doc.outputSettings().escapeMode(Entities.EscapeMode.extended)
XCTAssertEqual("<p title=\"&pi;\">&pi; &amp; &lt; &gt; </p>", try doc.body()?.html())
}catch
{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testXhtmlReferences() {
let doc: Document = try! SwiftSoup.parse("&lt; &gt; &amp; &quot; &apos; &times;")
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml)
XCTAssertEqual("&lt; &gt; &amp; \" ' ×",try! doc.body()?.html())
XCTAssertEqual("&lt; &gt; &amp; \" ' ×", try! doc.body()?.html())
}
func testNormalisesStructure() {
let doc: Document = try! SwiftSoup.parse("<html><head><script>one</script><noscript><p>two</p></noscript></head><body><p>three</p></body><p>four</p></html>")
XCTAssertEqual("<html><head><script>one</script><noscript>&lt;p&gt;two</noscript></head><body><p>three</p><p>four</p></body></html>", TextUtil.stripNewlines(try! doc.html()))
}
func testClone() {
let doc: Document = try! SwiftSoup.parse("<title>Hello</title> <p>One<p>Two")
let clone: Document = doc.copy() as! Document
XCTAssertEqual("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>",try! TextUtil.stripNewlines(clone.html()))
XCTAssertEqual("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", try! TextUtil.stripNewlines(clone.html()))
try! clone.title("Hello there")
try! clone.select("p").first()!.text("One more").attr("id", "1")
XCTAssertEqual("<html><head><title>Hello there</title> </head><body><p id=\"1\">One more</p><p>Two</p></body></html>", try! TextUtil.stripNewlines(clone.html()))
XCTAssertEqual("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", try! TextUtil.stripNewlines(doc.html()))
}
func testClonesDeclarations() {
let doc: Document = try! SwiftSoup.parse("<!DOCTYPE html><html><head><title>Doctype test")
let clone: Document = doc.copy() as! Document
XCTAssertEqual(try! doc.html(), try! clone.html())
XCTAssertEqual("<!doctype html><html><head><title>Doctype test</title></head><body></body></html>",
TextUtil.stripNewlines(try! clone.html()))
}
//todo:
// func testLocation()throws {
// File in = new ParseTest().getFile("/htmltests/yahoo-jp.html")
@ -114,11 +113,11 @@ class DocumentTest: XCTestCase {
// assertEquals("http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp",location);
// assertEquals("http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp",baseUri);
// }
func testHtmlAndXmlSyntax() {
let h: String = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar"
let doc: Document = try! SwiftSoup.parse(h)
doc.outputSettings().syntax(syntax: OutputSettings.Syntax.html)
XCTAssertEqual("<!doctype html>\n" +
"<html>\n" +
@ -128,7 +127,7 @@ class DocumentTest: XCTestCase {
" <foo />bar\n" +
" </body>\n" +
"</html>", try! doc.html())
doc.outputSettings().syntax(syntax: OutputSettings.Syntax.xml)
XCTAssertEqual("<!DOCTYPE html>\n" +
"<html>\n" +
@ -139,22 +138,22 @@ class DocumentTest: XCTestCase {
" </body>\n" +
"</html>", try! doc.html())
}
func testHtmlParseDefaultsToHtmlOutputSyntax() {
let doc: Document = try! SwiftSoup.parse("x")
XCTAssertEqual(OutputSettings.Syntax.html, doc.outputSettings().syntax())
}
func testHtmlAppendable() {
let htmlContent: String = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>"
let document: Document = try! SwiftSoup.parse(htmlContent)
let outputSettings: OutputSettings = OutputSettings()
outputSettings.prettyPrint(pretty: false)
document.outputSettings(outputSettings)
XCTAssertEqual(htmlContent, try! document.html(StringBuilder()).toString())
}
//todo: // Ignored since this test can take awhile to run.
// func testOverflowClone() {
// let builder: StringBuilder = StringBuilder();
@ -166,74 +165,73 @@ class DocumentTest: XCTestCase {
// let doc: Document = try! Jsoup.parse(builder.toString());
// doc.copy();
// }
func testDocumentsWithSameContentAreEqual() throws {
let docA: Document = try SwiftSoup.parse("<div/>One")
let docB: Document = try SwiftSoup.parse("<div/>One")
_ = try SwiftSoup.parse("<div/>Two")
XCTAssertFalse(docA.equals(docB))
XCTAssertTrue(docA.equals(docA))
//todo:
// XCTAssertEqual(docA.hashCode(), docA.hashCode());
// XCTAssertFalse(docA.hashCode() == docC.hashCode());
}
func testDocumentsWithSameContentAreVerifialbe() throws {
func testDocumentsWithSameContentAreVerifialbe() throws {
let docA: Document = try SwiftSoup.parse("<div/>One")
let docB: Document = try SwiftSoup.parse("<div/>One")
let docC: Document = try SwiftSoup.parse("<div/>Two")
XCTAssertTrue(try docA.hasSameValue(docB))
XCTAssertFalse(try docA.hasSameValue(docC))
}
func testMetaCharsetUpdateUtf8() {
let doc: Document = createHtmlDocument("changeThis")
doc.updateMetaCharsetElement(true)
do{
do {
try doc.charset(DocumentTest.charsetUtf8)
}catch{
} catch {
print("")
}
let htmlCharsetUTF8: String = "<html>\n" + " <head>\n" + " <meta charset=\"" + "UTF-8" + "\">\n" + " </head>\n" + " <body></body>\n" + "</html>"
XCTAssertEqual(htmlCharsetUTF8, try! doc.toString())
let selectedElement: Element = try! doc.select("meta[charset]").first()!
XCTAssertEqual(DocumentTest.charsetUtf8, doc.charset())
XCTAssertEqual("UTF-8", try! selectedElement.attr("charset"))
XCTAssertEqual(doc.charset(), doc.outputSettings().charset())
}
func testMetaCharsetUpdateIsoLatin2()throws {
let doc: Document = createHtmlDocument("changeThis")
doc.updateMetaCharsetElement(true)
try doc.charset(String.Encoding.isoLatin2)
let htmlCharsetISO = "<html>\n" +
" <head>\n" +
" <meta charset=\"" + String.Encoding.isoLatin2.displayName() + "\">\n" +
" </head>\n" +
" <body></body>\n" +
"</html>"
XCTAssertEqual(htmlCharsetISO,try doc.toString())
XCTAssertEqual(htmlCharsetISO, try doc.toString())
let selectedElement: Element = try doc.select("meta[charset]").first()!
XCTAssertEqual(String.Encoding.isoLatin2.displayName(), doc.charset().displayName())
XCTAssertEqual(String.Encoding.isoLatin2.displayName(), try selectedElement.attr("charset"))
XCTAssertEqual(doc.charset(), doc.outputSettings().charset())
}
func testMetaCharsetUpdateNoCharset()throws {
let docNoCharset: Document = Document.createShell("")
docNoCharset.updateMetaCharsetElement(true)
try docNoCharset.charset(String.Encoding.utf8)
try XCTAssertEqual(String.Encoding.utf8.displayName(), docNoCharset.select("meta[charset]").first()?.attr("charset"))
let htmlCharsetUTF8 = "<html>\n" +
" <head>\n" +
" <meta charset=\"" + String.Encoding.utf8.displayName() + "\">\n" +
@ -242,10 +240,10 @@ class DocumentTest: XCTestCase {
"</html>"
try XCTAssertEqual(htmlCharsetUTF8, docNoCharset.toString())
}
func testMetaCharsetUpdateDisabled()throws {
let docDisabled: Document = Document.createShell("")
let htmlNoCharset = "<html>\n" +
" <head></head>\n" +
" <body></body>\n" +
@ -253,10 +251,10 @@ class DocumentTest: XCTestCase {
try XCTAssertEqual(htmlNoCharset, docDisabled.toString())
try XCTAssertNil(docDisabled.select("meta[charset]").first())
}
func testMetaCharsetUpdateDisabledNoChanges()throws {
let doc: Document = createHtmlDocument("dontTouch")
let htmlCharset = "<html>\n" +
" <head>\n" +
" <meta charset=\"dontTouch\">\n" +
@ -265,137 +263,136 @@ class DocumentTest: XCTestCase {
" <body></body>\n" +
"</html>"
try XCTAssertEqual(htmlCharset, doc.toString())
var selectedElement: Element = try doc.select("meta[charset]").first()!
XCTAssertNotNil(selectedElement)
try XCTAssertEqual("dontTouch", selectedElement.attr("charset"))
selectedElement = try doc.select("meta[name=charset]").first()!
XCTAssertNotNil(selectedElement)
try XCTAssertEqual("dontTouch", selectedElement.attr("content"))
}
func testMetaCharsetUpdateEnabledAfterCharsetChange()throws {
let doc: Document = createHtmlDocument("dontTouch")
try doc.charset(String.Encoding.utf8)
let selectedElement: Element = try doc.select("meta[charset]").first()!
try XCTAssertEqual(String.Encoding.utf8.displayName(), selectedElement.attr("charset"))
try XCTAssertTrue(doc.select("meta[name=charset]").isEmpty())
}
func testMetaCharsetUpdateCleanup()throws {
let doc: Document = createHtmlDocument("dontTouch")
doc.updateMetaCharsetElement(true)
try doc.charset(String.Encoding.utf8)
let htmlCharsetUTF8 = "<html>\n" +
" <head>\n" +
" <meta charset=\"" + String.Encoding.utf8.displayName() + "\">\n" +
" </head>\n" +
" <body></body>\n" +
"</html>"
try XCTAssertEqual(htmlCharsetUTF8, doc.toString())
}
func testMetaCharsetUpdateXmlUtf8()throws {
let doc: Document = try createXmlDocument("1.0", "changeThis", true)
doc.updateMetaCharsetElement(true)
try doc.charset(String.Encoding.utf8)
let xmlCharsetUTF8 = "<?xml version=\"1.0\" encoding=\"" + String.Encoding.utf8.displayName() + "\"?>\n" +
"<root>\n" +
" node\n" +
"</root>"
try XCTAssertEqual(xmlCharsetUTF8, doc.toString())
let selectedNode: XmlDeclaration = doc.childNode(0) as! XmlDeclaration
XCTAssertEqual(String.Encoding.utf8.displayName(), doc.charset().displayName())
try XCTAssertEqual(String.Encoding.utf8.displayName(), selectedNode.attr("encoding"))
XCTAssertEqual(doc.charset(), doc.outputSettings().charset())
}
func testMetaCharsetUpdateXmlIso2022JP()throws {
let doc: Document = try createXmlDocument("1.0", "changeThis", true)
doc.updateMetaCharsetElement(true)
try doc.charset(String.Encoding.iso2022JP)
let xmlCharsetISO = "<?xml version=\"1.0\" encoding=\"" + String.Encoding.iso2022JP.displayName() + "\"?>\n" +
"<root>\n" +
" node\n" +
"</root>"
try XCTAssertEqual(xmlCharsetISO, doc.toString())
let selectedNode: XmlDeclaration = doc.childNode(0) as! XmlDeclaration
XCTAssertEqual(String.Encoding.iso2022JP.displayName(), doc.charset().displayName())
try XCTAssertEqual(String.Encoding.iso2022JP.displayName(), selectedNode.attr("encoding"))
XCTAssertEqual(doc.charset(), doc.outputSettings().charset())
}
func testMetaCharsetUpdateXmlNoCharset()throws {
let doc: Document = try createXmlDocument("1.0", "none", false)
doc.updateMetaCharsetElement(true)
try doc.charset(String.Encoding.utf8)
let xmlCharsetUTF8 = "<?xml version=\"1.0\" encoding=\"" + String.Encoding.utf8.displayName() + "\"?>\n" +
"<root>\n" +
" node\n" +
"</root>"
try XCTAssertEqual(xmlCharsetUTF8, doc.toString())
let selectedNode: XmlDeclaration = doc.childNode(0) as! XmlDeclaration
try XCTAssertEqual(String.Encoding.utf8.displayName(), selectedNode.attr("encoding"))
}
func testMetaCharsetUpdateXmlDisabled()throws {
let doc: Document = try createXmlDocument("none", "none", false)
let xmlNoCharset = "<root>\n" +
" node\n" +
"</root>"
try XCTAssertEqual(xmlNoCharset, doc.toString())
}
func testMetaCharsetUpdateXmlDisabledNoChanges()throws {
let doc: Document = try createXmlDocument("dontTouch", "dontTouch", true)
let xmlCharset = "<?xml version=\"dontTouch\" encoding=\"dontTouch\"?>\n" +
"<root>\n" +
" node\n" +
"</root>"
try XCTAssertEqual(xmlCharset, doc.toString())
let selectedNode: XmlDeclaration = doc.childNode(0) as! XmlDeclaration
try XCTAssertEqual("dontTouch", selectedNode.attr("encoding"))
try XCTAssertEqual("dontTouch", selectedNode.attr("version"))
}
func testMetaCharsetUpdatedDisabledPerDefault() {
let doc: Document = createHtmlDocument("none")
XCTAssertFalse(doc.updateMetaCharsetElement())
}
private func createHtmlDocument(_ charset: String)->Document {
private func createHtmlDocument(_ charset: String) -> Document {
let doc: Document = Document.createShell("")
try! doc.head()?.appendElement("meta").attr("charset", charset)
try! doc.head()?.appendElement("meta").attr("name", "charset").attr("content", charset)
return doc
}
func createXmlDocument(_ version: String, _ charset: String, _ addDecl: Bool)throws->Document {
let doc: Document = Document("")
try doc.appendElement("root").text("node")
doc.outputSettings().syntax(syntax: OutputSettings.Syntax.xml)
if( addDecl == true ) {
let decl: XmlDeclaration = XmlDeclaration("xml", "", false)
try decl.attr("version", version)
try decl.attr("encoding", charset)
try doc.prependChild(decl)
}
return doc
}
//todo:
@ -420,35 +417,35 @@ class DocumentTest: XCTestCase {
// assertTrue("Should have contained a '&#xa0;' or a '&nbsp;'.",
// output.contains("&#xa0;") || output.contains("&nbsp;"));
// }
static var allTests = {
return [
("testSetTextPreservesDocumentStructure" , testSetTextPreservesDocumentStructure),
("testTitles" , testTitles),
("testOutputEncoding" , testOutputEncoding),
("testXhtmlReferences" , testXhtmlReferences),
("testNormalisesStructure" , testNormalisesStructure),
("testClone" , testClone),
("testClonesDeclarations" , testClonesDeclarations),
("testHtmlAndXmlSyntax" , testHtmlAndXmlSyntax),
("testHtmlParseDefaultsToHtmlOutputSyntax" , testHtmlParseDefaultsToHtmlOutputSyntax),
("testHtmlAppendable" , testHtmlAppendable),
("testDocumentsWithSameContentAreEqual" , testDocumentsWithSameContentAreEqual),
("testDocumentsWithSameContentAreVerifialbe" , testDocumentsWithSameContentAreVerifialbe),
("testMetaCharsetUpdateUtf8" , testMetaCharsetUpdateUtf8),
("testMetaCharsetUpdateIsoLatin2" , testMetaCharsetUpdateIsoLatin2),
("testMetaCharsetUpdateNoCharset" , testMetaCharsetUpdateNoCharset),
("testMetaCharsetUpdateDisabled" , testMetaCharsetUpdateDisabled),
("testMetaCharsetUpdateDisabledNoChanges" , testMetaCharsetUpdateDisabledNoChanges),
("testMetaCharsetUpdateEnabledAfterCharsetChange" , testMetaCharsetUpdateEnabledAfterCharsetChange),
("testMetaCharsetUpdateCleanup" , testMetaCharsetUpdateCleanup),
("testMetaCharsetUpdateXmlUtf8" , testMetaCharsetUpdateXmlUtf8),
("testMetaCharsetUpdateXmlIso2022JP" , testMetaCharsetUpdateXmlIso2022JP),
("testMetaCharsetUpdateXmlNoCharset" , testMetaCharsetUpdateXmlNoCharset),
("testMetaCharsetUpdateXmlDisabled" , testMetaCharsetUpdateXmlDisabled),
("testMetaCharsetUpdateXmlDisabledNoChanges" , testMetaCharsetUpdateXmlDisabledNoChanges),
("testMetaCharsetUpdatedDisabledPerDefault" , testMetaCharsetUpdatedDisabledPerDefault)
("testSetTextPreservesDocumentStructure", testSetTextPreservesDocumentStructure),
("testTitles", testTitles),
("testOutputEncoding", testOutputEncoding),
("testXhtmlReferences", testXhtmlReferences),
("testNormalisesStructure", testNormalisesStructure),
("testClone", testClone),
("testClonesDeclarations", testClonesDeclarations),
("testHtmlAndXmlSyntax", testHtmlAndXmlSyntax),
("testHtmlParseDefaultsToHtmlOutputSyntax", testHtmlParseDefaultsToHtmlOutputSyntax),
("testHtmlAppendable", testHtmlAppendable),
("testDocumentsWithSameContentAreEqual", testDocumentsWithSameContentAreEqual),
("testDocumentsWithSameContentAreVerifialbe", testDocumentsWithSameContentAreVerifialbe),
("testMetaCharsetUpdateUtf8", testMetaCharsetUpdateUtf8),
("testMetaCharsetUpdateIsoLatin2", testMetaCharsetUpdateIsoLatin2),
("testMetaCharsetUpdateNoCharset", testMetaCharsetUpdateNoCharset),
("testMetaCharsetUpdateDisabled", testMetaCharsetUpdateDisabled),
("testMetaCharsetUpdateDisabledNoChanges", testMetaCharsetUpdateDisabledNoChanges),
("testMetaCharsetUpdateEnabledAfterCharsetChange", testMetaCharsetUpdateEnabledAfterCharsetChange),
("testMetaCharsetUpdateCleanup", testMetaCharsetUpdateCleanup),
("testMetaCharsetUpdateXmlUtf8", testMetaCharsetUpdateXmlUtf8),
("testMetaCharsetUpdateXmlIso2022JP", testMetaCharsetUpdateXmlIso2022JP),
("testMetaCharsetUpdateXmlNoCharset", testMetaCharsetUpdateXmlNoCharset),
("testMetaCharsetUpdateXmlDisabled", testMetaCharsetUpdateXmlDisabled),
("testMetaCharsetUpdateXmlDisabledNoChanges", testMetaCharsetUpdateXmlDisabledNoChanges),
("testMetaCharsetUpdatedDisabledPerDefault", testMetaCharsetUpdatedDisabledPerDefault)
]
}()
}

View File

@ -10,43 +10,42 @@ import XCTest
import SwiftSoup
class DocumentTypeTest: XCTestCase {
func testConstructorValidationOkWithBlankName() {
let fail: DocumentType? = DocumentType("","", "", "")
let fail: DocumentType? = DocumentType("", "", "", "")
XCTAssertTrue(fail != nil)
}
func testConstructorValidationThrowsExceptionOnNulls() {
let fail: DocumentType? = DocumentType("html", "", "", "")
XCTAssertTrue(fail != nil)
}
func testConstructorValidationOkWithBlankPublicAndSystemIds() {
let fail: DocumentType? = DocumentType("html", "", "", "")
XCTAssertTrue(fail != nil)
}
func testOuterHtmlGeneration() {
let html5 = DocumentType("html", "", "", "")
XCTAssertEqual("<!doctype html>", try! html5.outerHtml())
let publicDocType = DocumentType("html", "-//IETF//DTD HTML//", "", "")
XCTAssertEqual("<!DOCTYPE html PUBLIC \"-//IETF//DTD HTML//\">", try! publicDocType.outerHtml())
let systemDocType = DocumentType("html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", "")
XCTAssertEqual("<!DOCTYPE html \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">", try! systemDocType.outerHtml())
let combo = DocumentType("notHtml", "--public", "--system", "")
XCTAssertEqual("<!DOCTYPE notHtml PUBLIC \"--public\" \"--system\">", try! combo.outerHtml())
}
static var allTests = {
return [
("testConstructorValidationOkWithBlankName" , testConstructorValidationOkWithBlankName),
("testConstructorValidationThrowsExceptionOnNulls" , testConstructorValidationThrowsExceptionOnNulls),
("testConstructorValidationOkWithBlankPublicAndSystemIds" , testConstructorValidationOkWithBlankPublicAndSystemIds),
("testOuterHtmlGeneration" , testOuterHtmlGeneration),
("testConstructorValidationOkWithBlankName", testConstructorValidationOkWithBlankName),
("testConstructorValidationThrowsExceptionOnNulls", testConstructorValidationThrowsExceptionOnNulls),
("testConstructorValidationOkWithBlankPublicAndSystemIds", testConstructorValidationOkWithBlankPublicAndSystemIds),
("testOuterHtmlGeneration", testOuterHtmlGeneration),
]
}()
}

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ Tests for ElementList.
import XCTest
import SwiftSoup
class ElementsTest: XCTestCase {
func testFilter()throws {
let h: String = "<p>Excl</p><div class=headline><p>Hello</p><p>There</p></div><div class=headline><h1>Headline</h1></div>"
let doc: Document = try SwiftSoup.parse(h)
@ -20,7 +20,7 @@ class ElementsTest: XCTestCase {
try XCTAssertEqual("Hello", els.get(0).text())
try XCTAssertEqual("There", els.get(1).text())
}
func testAttributes()throws {
let h = "<p title=foo><p title=bar><p class=foo><p class=bar>"
let doc: Document = try SwiftSoup.parse(h)
@ -29,24 +29,24 @@ class ElementsTest: XCTestCase {
XCTAssertTrue(withTitle.hasAttr("title"))
XCTAssertFalse(withTitle.hasAttr("class"))
try XCTAssertEqual("foo", withTitle.attr("title"))
try withTitle.removeAttr("title")
XCTAssertEqual(2, withTitle.size()) // existing Elements are not reevaluated
try XCTAssertEqual(0, doc.select("p[title]").size())
let ps: Elements = try doc.select("p").attr("style", "classy")
XCTAssertEqual(4, ps.size())
try XCTAssertEqual("classy", ps.last()?.attr("style"))
try XCTAssertEqual("bar", ps.last()?.attr("class"))
}
func testHasAttr()throws {
let doc: Document = try SwiftSoup.parse("<p title=foo><p title=bar><p class=foo><p class=bar>")
let ps: Elements = try doc.select("p")
XCTAssertTrue(ps.hasAttr("class"))
XCTAssertFalse(ps.hasAttr("style"))
}
func testHasAbsAttr()throws {
let doc: Document = try SwiftSoup.parse("<a id=1 href='/foo'>One</a> <a id=2 href='https://jsoup.org'>Two</a>")
let one: Elements = try doc.select("#1")
@ -56,157 +56,157 @@ class ElementsTest: XCTestCase {
XCTAssertTrue(two.hasAttr("abs:href"))
XCTAssertTrue(both.hasAttr("abs:href")) // hits on #2
}
func testAttr()throws {
let doc: Document = try SwiftSoup.parse("<p title=foo><p title=bar><p class=foo><p class=bar>")
let classVal = try doc.select("p").attr("class")
XCTAssertEqual("foo", classVal)
}
func testAbsAttr()throws {
let doc: Document = try SwiftSoup.parse("<a id=1 href='/foo'>One</a> <a id=2 href='https://jsoup.org'>Two</a>")
let one: Elements = try doc.select("#1")
let two: Elements = try doc.select("#2")
let both: Elements = try doc.select("a")
XCTAssertEqual("", try one.attr("abs:href"))
XCTAssertEqual("https://jsoup.org", try two.attr("abs:href"))
XCTAssertEqual("https://jsoup.org", try both.attr("abs:href"))
}
func testClasses()throws {
let doc: Document = try SwiftSoup.parse("<div><p class='mellow yellow'></p><p class='red green'></p>")
let els: Elements = try doc.select("p")
XCTAssertTrue(els.hasClass("red"))
XCTAssertFalse(els.hasClass("blue"))
try els.addClass("blue")
try els.removeClass("yellow")
try els.toggleClass("mellow")
XCTAssertEqual("blue", try els.get(0).className())
XCTAssertEqual("red green blue mellow", try els.get(1).className())
}
func testText()throws {
let h = "<div><p>Hello<p>there<p>world</div>"
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual("Hello there world", try doc.select("div > *").text())
}
func testHasText()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><div><p></p></div>")
let divs: Elements = try doc.select("div")
XCTAssertTrue(divs.hasText())
XCTAssertFalse(try doc.select("div + div").hasText())
}
func testHtml()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><div><p>There</p></div>")
let divs: Elements = try doc.select("div")
XCTAssertEqual("<p>Hello</p>\n<p>There</p>", try divs.html())
}
func testOuterHtml()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><div><p>There</p></div>")
let divs: Elements = try doc.select("div")
XCTAssertEqual("<div><p>Hello</p></div><div><p>There</p></div>", try TextUtil.stripNewlines(divs.outerHtml()))
}
func testSetHtml()throws {
let doc: Document = try SwiftSoup.parse("<p>One</p><p>Two</p><p>Three</p>")
let ps: Elements = try doc.select("p")
try ps.prepend("<b>Bold</b>").append("<i>Ital</i>")
try XCTAssertEqual("<p><b>Bold</b>Two<i>Ital</i></p>", TextUtil.stripNewlines(ps.get(1).outerHtml()))
try ps.html("<span>Gone</span>")
try XCTAssertEqual("<p><span>Gone</span></p>", TextUtil.stripNewlines(ps.get(1).outerHtml()))
}
func testVal()throws {
let doc: Document = try SwiftSoup.parse("<input value='one' /><textarea>two</textarea>")
let els: Elements = try doc.select("input, textarea")
XCTAssertEqual(2, els.size())
try XCTAssertEqual("one", els.val())
try XCTAssertEqual("two", els.last()?.val())
try els.val("three")
try XCTAssertEqual("three", els.first()?.val())
try XCTAssertEqual("three", els.last()?.val())
try XCTAssertEqual("<textarea>three</textarea>", els.last()?.outerHtml())
}
func testBefore()throws {
let doc: Document = try SwiftSoup.parse("<p>This <a>is</a> <a>jsoup</a>.</p>")
try doc.select("a").before("<span>foo</span>")
XCTAssertEqual("<p>This <span>foo</span><a>is</a> <span>foo</span><a>jsoup</a>.</p>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testAfter()throws {
let doc: Document = try SwiftSoup.parse("<p>This <a>is</a> <a>jsoup</a>.</p>")
try doc.select("a").after("<span>foo</span>")
XCTAssertEqual("<p>This <a>is</a><span>foo</span> <a>jsoup</a><span>foo</span>.</p>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testWrap()throws {
let h = "<p><b>This</b> is <b>jsoup</b></p>"
let doc: Document = try SwiftSoup.parse(h)
try doc.select("b").wrap("<i></i>")
XCTAssertEqual("<p><i><b>This</b></i> is <i><b>jsoup</b></i></p>", try doc.body()?.html())
}
func testWrapDiv()throws {
let h = "<p><b>This</b> is <b>jsoup</b>.</p> <p>How do you like it?</p>"
let doc: Document = try SwiftSoup.parse(h)
try doc.select("p").wrap("<div></div>")
XCTAssertEqual("<div><p><b>This</b> is <b>jsoup</b>.</p></div> <div><p>How do you like it?</p></div>",try TextUtil.stripNewlines(doc.body()!.html()))
XCTAssertEqual("<div><p><b>This</b> is <b>jsoup</b>.</p></div> <div><p>How do you like it?</p></div>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testUnwrap()throws {
let h = "<div><font>One</font> <font><a href=\"/\">Two</a></font></div"
let doc: Document = try SwiftSoup.parse(h)
try doc.select("font").unwrap()
XCTAssertEqual("<div>One <a href=\"/\">Two</a></div>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testUnwrapP()throws {
let h = "<p><a>One</a> Two</p> Three <i>Four</i> <p>Fix <i>Six</i></p>"
let doc: Document = try SwiftSoup.parse(h)
try doc.select("p").unwrap()
XCTAssertEqual("<a>One</a> Two Three <i>Four</i> Fix <i>Six</i>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testUnwrapKeepsSpace()throws {
let h = "<p>One <span>two</span> <span>three</span> four</p>"
let doc: Document = try SwiftSoup.parse(h)
try doc.select("span").unwrap()
XCTAssertEqual("<p>One two three four</p>", try doc.body()?.html())
}
func testEmpty()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello <b>there</b></p> <p>now!</p></div>")
doc.outputSettings().prettyPrint(pretty: false)
try doc.select("p").empty()
XCTAssertEqual("<div><p></p> <p></p></div>", try doc.body()?.html())
}
func testRemove()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello <b>there</b></p> jsoup <p>now!</p></div>")
doc.outputSettings().prettyPrint(pretty: false)
try doc.select("p").remove()
XCTAssertEqual("<div> jsoup </div>", try doc.body()?.html())
}
func testEq()throws {
let h = "<p>Hello<p>there<p>world"
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual("there", try doc.select("p").eq(1).text())
XCTAssertEqual("there", try doc.select("p").get(1).text())
}
func testIs()throws {
let h = "<p>Hello<p title=foo>there<p>world"
let doc: Document = try SwiftSoup.parse(h)
@ -214,41 +214,41 @@ class ElementsTest: XCTestCase {
try XCTAssertTrue(ps.is("[title=foo]"))
try XCTAssertFalse(ps.is("[title=bar]"))
}
func testParents()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><p>There</p>")
let parents: Elements = try doc.select("p").parents()
XCTAssertEqual(3, parents.size())
XCTAssertEqual("div", parents.get(0).tagName())
XCTAssertEqual("body", parents.get(1).tagName())
XCTAssertEqual("html", parents.get(2).tagName())
}
func testNot()throws {
let doc: Document = try SwiftSoup.parse("<div id=1><p>One</p></div> <div id=2><p><span>Two</span></p></div>")
let div1: Elements = try doc.select("div").not(":has(p > span)")
XCTAssertEqual(1, div1.size())
XCTAssertEqual("1", div1.first()?.id())
let div2: Elements = try doc.select("div").not("#1")
XCTAssertEqual(1, div2.size())
XCTAssertEqual("2", div2.first()?.id())
}
func testTagNameSet()throws {
let doc: Document = try SwiftSoup.parse("<p>Hello <i>there</i> <i>now</i></p>")
try doc.select("i").tagName("em")
XCTAssertEqual("<p>Hello <em>there</em> <em>now</em></p>", try doc.body()?.html())
}
func testTraverse()throws {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><div>There</div>")
let accum: StringBuilder = StringBuilder()
class nv : NodeVisitor {
class nv: NodeVisitor {
let accum: StringBuilder
init(_ accum: StringBuilder) {
self.accum = accum
@ -263,12 +263,12 @@ class ElementsTest: XCTestCase {
try doc.select("div").traverse(nv(accum))
XCTAssertEqual("<div><p><#text></#text></p></div><div><#text></#text></div>", accum.toString())
}
func testForms()throws {
let doc: Document = try SwiftSoup.parse("<form id=1><input name=q></form><div /><form id=2><input name=f></form>")
let els: Elements = try doc.select("*")
XCTAssertEqual(9, els.size())
let forms: Array<FormElement> = els.forms()
XCTAssertEqual(2, forms.count)
//XCTAssertTrue(forms[0] != nil)
@ -276,47 +276,46 @@ class ElementsTest: XCTestCase {
XCTAssertEqual("1", forms[0].id())
XCTAssertEqual("2", forms[1].id())
}
func testClassWithHyphen()throws {
let doc: Document = try SwiftSoup.parse("<p class='tab-nav'>Check</p>")
let els: Elements = try doc.getElementsByClass("tab-nav")
XCTAssertEqual(1, els.size())
try XCTAssertEqual("Check", els.text())
}
static var allTests = {
return [
("testFilter" , testFilter),
("testAttributes" , testAttributes),
("testHasAttr" , testHasAttr),
("testHasAbsAttr" , testHasAbsAttr),
("testAttr" , testAttr),
("testAbsAttr" , testAbsAttr),
("testClasses" , testClasses),
("testText" , testText),
("testHasText" , testHasText),
("testHtml" , testHtml),
("testOuterHtml" , testOuterHtml),
("testSetHtml" , testSetHtml),
("testVal" , testVal),
("testBefore" , testBefore),
("testAfter" , testAfter),
("testWrap" , testWrap),
("testWrapDiv" , testWrapDiv),
("testUnwrap" , testUnwrap),
("testUnwrapP" , testUnwrapP),
("testUnwrapKeepsSpace" , testUnwrapKeepsSpace),
("testEmpty" , testEmpty),
("testRemove" , testRemove),
("testEq" , testEq),
("testIs" , testIs),
("testParents" , testParents),
("testNot" , testNot),
("testTagNameSet" , testTagNameSet),
("testTraverse" , testTraverse),
("testForms" , testForms),
("testClassWithHyphen" , testClassWithHyphen)
("testFilter", testFilter),
("testAttributes", testAttributes),
("testHasAttr", testHasAttr),
("testHasAbsAttr", testHasAbsAttr),
("testAttr", testAttr),
("testAbsAttr", testAbsAttr),
("testClasses", testClasses),
("testText", testText),
("testHasText", testHasText),
("testHtml", testHtml),
("testOuterHtml", testOuterHtml),
("testSetHtml", testSetHtml),
("testVal", testVal),
("testBefore", testBefore),
("testAfter", testAfter),
("testWrap", testWrap),
("testWrapDiv", testWrapDiv),
("testUnwrap", testUnwrap),
("testUnwrapP", testUnwrapP),
("testUnwrapKeepsSpace", testUnwrapKeepsSpace),
("testEmpty", testEmpty),
("testRemove", testRemove),
("testEq", testEq),
("testIs", testIs),
("testParents", testParents),
("testNot", testNot),
("testTagNameSet", testTagNameSet),
("testTraverse", testTraverse),
("testForms", testForms),
("testClassWithHyphen", testClassWithHyphen)
]
}()
}

View File

@ -11,23 +11,23 @@ import XCTest
import SwiftSoup
class EntitiesTest: XCTestCase {
func testEscape()throws {
let text = "Hello &<> Å å π 新 there ¾ © »"
let escapedAscii = Entities.escape(text,OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base))
let escapedAscii = Entities.escape(text, OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base))
let escapedAsciiFull = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.extended))
let escapedAsciiXhtml = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.xhtml))
let escapedUtfFull = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.extended))
let escapedUtfMin = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.xhtml))
XCTAssertEqual("Hello &amp;&lt;&gt; &Aring; &aring; &#x3c0; &#x65b0; there &frac34; &copy; &raquo;", escapedAscii);
XCTAssertEqual("Hello &amp;&lt;&gt; &angst; &aring; &pi; &#x65b0; there &frac34; &copy; &raquo;", escapedAsciiFull);
XCTAssertEqual("Hello &amp;&lt;&gt; &#xc5; &#xe5; &#x3c0; &#x65b0; there &#xbe; &#xa9; &#xbb;", escapedAsciiXhtml);
XCTAssertEqual("Hello &amp;&lt;&gt; &Aring; &aring; &#x3c0; &#x65b0; there &frac34; &copy; &raquo;", escapedAscii)
XCTAssertEqual("Hello &amp;&lt;&gt; &angst; &aring; &pi; &#x65b0; there &frac34; &copy; &raquo;", escapedAsciiFull)
XCTAssertEqual("Hello &amp;&lt;&gt; &#xc5; &#xe5; &#x3c0; &#x65b0; there &#xbe; &#xa9; &#xbb;", escapedAsciiXhtml)
XCTAssertEqual("Hello &amp;&lt;&gt; Å å π 新 there ¾ © »", escapedUtfFull)
XCTAssertEqual("Hello &amp;&lt;&gt; Å å π 新 there ¾ © »", escapedUtfMin)
// odd that it's defined as aring in base but angst in full
// round trip
XCTAssertEqual(text, try Entities.unescape(escapedAscii))
XCTAssertEqual(text, try Entities.unescape(escapedAsciiFull))
@ -35,29 +35,27 @@ class EntitiesTest: XCTestCase {
XCTAssertEqual(text, try Entities.unescape(escapedUtfFull))
XCTAssertEqual(text, try Entities.unescape(escapedUtfMin))
}
func testXhtml() {
//let text = "&amp; &gt; &lt; &quot;";
XCTAssertEqual(38, Entities.EscapeMode.xhtml.codepointForName("amp"))
XCTAssertEqual(62, Entities.EscapeMode.xhtml.codepointForName("gt"))
XCTAssertEqual(60, Entities.EscapeMode.xhtml.codepointForName("lt"))
XCTAssertEqual(34, Entities.EscapeMode.xhtml.codepointForName("quot"))
XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(38))
XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(62))
XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(60))
XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(34))
}
func testGetByName() {
//XCTAssertEqual("", Entities.getByName(name: "nGt"));//todo:nabil same codepoint 8811 in java but charachters different
//XCTAssertEqual("fj", Entities.getByName(name: "fjlig"));
XCTAssertEqual("", Entities.getByName(name: "gg"))
XCTAssertEqual("©", Entities.getByName(name: "copy"))
}
func testEscapeSupplementaryCharacter() {
let text: String = "𡃁"
let escapedAscii: String = Entities.escape(text, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.base))
@ -65,50 +63,49 @@ class EntitiesTest: XCTestCase {
let escapedUtf: String = Entities.escape(text, OutputSettings().charset(.utf8).escapeMode(Entities.EscapeMode.base))
XCTAssertEqual(text, escapedUtf)
}
func testNotMissingMultis()throws {
let text: String = "&nparsl;"
let un: String = "\u{2AFD}\u{20E5}"
XCTAssertEqual(un, try Entities.unescape(text))
}
func testnotMissingSupplementals()throws {
let text: String = "&npolint; &qfr;"
let un: String = "𝔮"//+"\u{D835}\u{DD2E}" // 𝔮
XCTAssertEqual(un, try Entities.unescape(text))
}
func testUnescape()throws {
let text: String = "Hello &AElig; &amp;&LT&gt; &reg &angst; &angst &#960; &#960 &#x65B0; there &! &frac34; &copy; &COPY;"
XCTAssertEqual("Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©", try Entities.unescape(text))
XCTAssertEqual("&0987654321; &unknown", try Entities.unescape("&0987654321; &unknown"))
}
func testStrictUnescape()throws { // for attributes, enforce strict unescaping (must look like &#xxx; , not just &#xxx)
let text: String = "Hello &amp= &amp;"
XCTAssertEqual("Hello &amp= &", try Entities.unescape(string: text, strict: true))
XCTAssertEqual("Hello &= &", try Entities.unescape(text))
XCTAssertEqual("Hello &= &", try Entities.unescape(string: text, strict: false))
}
func testCaseSensitive()throws {
let unescaped: String = "Ü ü & &"
XCTAssertEqual("&Uuml; &uuml; &amp; &amp;",
Entities.escape(unescaped, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.extended)))
let escaped: String = "&Uuml; &uuml; &amp; &AMP"
XCTAssertEqual("Ü ü & &", try Entities.unescape(escaped))
}
func testQuoteReplacements()throws {
let escaped: String = "&#92; &#36;"
let unescaped: String = "\\ $"
XCTAssertEqual(unescaped, try Entities.unescape(escaped))
}
func testLetterDigitEntities()throws {
let html: String = "<p>&sup1;&sup2;&sup3;&frac14;&frac12;&frac34;</p>"
let doc: Document = try SwiftSoup.parse(html)
@ -119,43 +116,41 @@ class EntitiesTest: XCTestCase {
doc.outputSettings().charset(.utf8)
XCTAssertEqual("¹²³¼½¾", try p.html())
}
func testNoSpuriousDecodes()throws {
let string: String = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2"
XCTAssertEqual(string, try Entities.unescape(string))
}
func testUscapesGtInXmlAttributesButNotInHtml()throws {
// https://github.com/jhy/jsoup/issues/528 - < is OK in HTML attribute values, but not in XML
let docHtml: String = "<a title='<p>One</p>'>One</a>"
let doc: Document = try SwiftSoup.parse(docHtml)
let element: Element = try doc.select("a").first()!
doc.outputSettings().escapeMode(Entities.EscapeMode.base)
XCTAssertEqual("<a title=\"<p>One</p>\">One</a>", try element.outerHtml())
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml)
XCTAssertEqual("<a title=\"&lt;p>One&lt;/p>\">One</a>",try element.outerHtml())
XCTAssertEqual("<a title=\"&lt;p>One&lt;/p>\">One</a>", try element.outerHtml())
}
static var allTests = {
return [
("testEscape" , testEscape),
("testXhtml" , testXhtml),
("testGetByName" , testGetByName),
("testEscapeSupplementaryCharacter" , testEscapeSupplementaryCharacter),
("testNotMissingMultis" , testNotMissingMultis),
("testnotMissingSupplementals" , testnotMissingSupplementals),
("testUnescape" , testUnescape),
("testStrictUnescape" , testStrictUnescape),
("testCaseSensitive" , testCaseSensitive),
("testQuoteReplacements" , testQuoteReplacements),
("testLetterDigitEntities" , testLetterDigitEntities),
("testNoSpuriousDecodes" , testNoSpuriousDecodes),
("testUscapesGtInXmlAttributesButNotInHtml" , testUscapesGtInXmlAttributesButNotInHtml)
("testEscape", testEscape),
("testXhtml", testXhtml),
("testGetByName", testGetByName),
("testEscapeSupplementaryCharacter", testEscapeSupplementaryCharacter),
("testNotMissingMultis", testNotMissingMultis),
("testnotMissingSupplementals", testnotMissingSupplementals),
("testUnescape", testUnescape),
("testStrictUnescape", testStrictUnescape),
("testCaseSensitive", testCaseSensitive),
("testQuoteReplacements", testQuoteReplacements),
("testLetterDigitEntities", testLetterDigitEntities),
("testNoSpuriousDecodes", testNoSpuriousDecodes),
("testUscapesGtInXmlAttributesButNotInHtml", testUscapesGtInXmlAttributesButNotInHtml)
]
}()
}

View File

@ -10,13 +10,13 @@ import XCTest
import SwiftSoup
class FormElementTest: XCTestCase {
func testHasAssociatedControls()throws {
//"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
let html = "<form id=1><button id=1><fieldset id=2 /><input id=3><keygen id=4><object id=5><output id=6>" +
"<select id=7><option></select><textarea id=8><p id=9>"
let doc: Document = try SwiftSoup.parse(html)
let form: FormElement = try doc.select("form").first()! as! FormElement
XCTAssertEqual(8, form.elements().size())
}
@ -72,8 +72,7 @@ class FormElementTest: XCTestCase {
//
// assertEquals("http://example.com/", con.request().url().toExternalForm());
// }
//TODO:
// @Test public void actionWithNoBaseUri() {
// String html = "<form><input name='q'></form>";
@ -91,28 +90,28 @@ class FormElementTest: XCTestCase {
// }
// assertTrue(threw);
// }
func testFormsAddedAfterParseAreFormElements()throws {
let doc: Document = try SwiftSoup.parse("<body />")
try doc.body()?.html("<form action='http://example.com/search'><input name='q' value='search'>")
let formEl: Element = try doc.select("form").first()!
XCTAssertNotNil(formEl as? FormElement)
let form: FormElement = formEl as! FormElement
XCTAssertEqual(1, form.elements().size())
}
func testControlsAddedAfterParseAreLinkedWithForms()throws {
let doc: Document = try SwiftSoup.parse("<body />")
try doc.body()?.html("<form />")
let formEl: Element = try doc.select("form").first()!
try formEl.append("<input name=foo value=bar>")
XCTAssertNotNil(formEl as? FormElement)
let form: FormElement = formEl as! FormElement
XCTAssertEqual(1, form.elements().size())
//todo:
///List<Connection.KeyVal> data = form.formData();
//assertEquals("foo=bar", data.get(0).toString());
@ -149,12 +148,12 @@ class FormElementTest: XCTestCase {
// assertEquals("pass", data.get(1).key());
// assertEquals("login", data.get(2).key());
// }
static var allTests = {
return [
("testHasAssociatedControls" , testHasAssociatedControls),
("testFormsAddedAfterParseAreFormElements" , testFormsAddedAfterParseAreFormElements),
("testControlsAddedAfterParseAreLinkedWithForms" , testControlsAddedAfterParseAreLinkedWithForms)
("testHasAssociatedControls", testHasAssociatedControls),
("testFormsAddedAfterParseAreFormElements", testFormsAddedAfterParseAreFormElements),
("testControlsAddedAfterParseAreLinkedWithForms", testControlsAddedAfterParseAreLinkedWithForms)
]
}()
}

View File

@ -13,7 +13,7 @@ import XCTest
import SwiftSoup
class HtmlParserTest: XCTestCase {
func testParsesSimpleDocument()throws {
let html: String = "<html><head><title>First!</title></head><body><p>First post! <img src=\"foo.png\" /></p></body></html>"
let doc: Document = try SwiftSoup.parse(html)
@ -24,32 +24,32 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("foo.png", try img.attr("src"))
XCTAssertEqual("img", img.tagName())
}
func testParsesRoughAttributes()throws {
let html: String = "<html><head><title>First!</title></head><body><p class=\"foo > bar\">First post! <img src=\"foo.png\" /></p></body></html>"
let doc: Document = try SwiftSoup.parse(html)
// need a better way to verify these:
let p: Element = doc.body()!.child(0)
XCTAssertEqual("p", p.tagName())
XCTAssertEqual("foo > bar", try p.attr("class"))
}
func testParsesQuiteRoughAttributes()throws {
let html: String = "<p =a>One<a <p>Something</p>Else"
// this gets a <p> with attr '=a' and an <a tag with an attribue named '<p'; and then auto-recreated
var doc: Document = try SwiftSoup.parse(html)
XCTAssertEqual("<p =a>One<a <p>Something</a></p>\n" +
"<a <p>Else</a>", try doc.body()!.html())
doc = try SwiftSoup.parse("<p .....>")
XCTAssertEqual("<p .....></p>", try doc.body()!.html())
}
func testParsesComments()throws {
let html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>"
let doc = try SwiftSoup.parse(html)
let body: Element = doc.body()!
let comment: Comment = body.childNode(1)as! Comment // comment should not be sub of img, as it's an empty tag
XCTAssertEqual(" <table><tr><td></table> ", comment.getData())
@ -57,7 +57,7 @@ class HtmlParserTest: XCTestCase {
let text: TextNode = p.childNode(0)as! TextNode
XCTAssertEqual("Hello", text.getWholeText())
}
func testParsesUnterminatedComments()throws {
let html = "<p>Hello<!-- <tr><td>"
let doc: Document = try SwiftSoup.parse(html)
@ -68,26 +68,26 @@ class HtmlParserTest: XCTestCase {
let comment: Comment = p.childNode(1)as! Comment
XCTAssertEqual(" <tr><td>", comment.getData())
}
func testDropsUnterminatedTag()throws {
// jsoup used to parse this to <p>, but whatwg, webkit will drop.
let h1: String = "<p"
var doc: Document = try SwiftSoup.parse(h1)
XCTAssertEqual(0, try doc.getElementsByTag("p").size())
XCTAssertEqual("", try doc.text())
let h2: String = "<div id=1<p id='2'"
doc = try SwiftSoup.parse(h2)
XCTAssertEqual("", try doc.text())
}
func testDropsUnterminatedAttribute()throws {
// jsoup used to parse this to <p id="foo">, but whatwg, webkit will drop.
let h1: String = "<p id=\"foo"
let doc: Document = try SwiftSoup.parse(h1)
XCTAssertEqual("", try doc.text())
}
func testParsesUnterminatedTextarea()throws {
// don't parse right to end, but break on <p>
let doc: Document = try SwiftSoup.parse("<body><p><textarea>one<p>two")
@ -95,7 +95,7 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("one", try t.text())
XCTAssertEqual("two", try doc.select("p").get(1).text())
}
func testParsesUnterminatedOption()throws {
// bit weird this -- browsers and spec get stuck in select until there's a </select>
let doc: Document = try SwiftSoup.parse("<body><p><select><option>One<option>Two</p><p>Three</p>")
@ -104,73 +104,73 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("One", try options.first()!.text())
XCTAssertEqual("TwoThree", try options.last()!.text())
}
func testSpaceAfterTag()throws {
let doc: Document = try SwiftSoup.parse("<div > <a name=\"top\"></a ><p id=1 >Hello</p></div>")
XCTAssertEqual("<div> <a name=\"top\"></a><p id=\"1\">Hello</p></div>", TextUtil.stripNewlines(try doc.body()!.html()))
}
func testCreatesDocumentStructure()throws {
let html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"
let doc = try SwiftSoup.parse(html)
let head: Element = doc.head()!
let body: Element = doc.body()!
XCTAssertEqual(1, doc.children().size()) // root node: contains html node
XCTAssertEqual(2, doc.child(0).children().size()) // html node: head and body
XCTAssertEqual(3, head.children().size())
XCTAssertEqual(1, body.children().size())
XCTAssertEqual("keywords",try head.getElementsByTag("meta").get(0).attr("name"))
XCTAssertEqual("keywords", try head.getElementsByTag("meta").get(0).attr("name"))
XCTAssertEqual(0, try body.getElementsByTag("meta").size())
XCTAssertEqual("jsoup",try doc.title())
XCTAssertEqual("Hello world",try body.text())
XCTAssertEqual("Hello world",try body.children().get(0).text())
XCTAssertEqual("jsoup", try doc.title())
XCTAssertEqual("Hello world", try body.text())
XCTAssertEqual("Hello world", try body.children().get(0).text())
}
func testCreatesStructureFromBodySnippet()throws {
// the bar baz stuff naturally goes into the body, but the 'foo' goes into root, and the normalisation routine
// needs to move into the start of the body
let html = "foo <b>bar</b> baz"
let doc = try SwiftSoup.parse(html)
XCTAssertEqual("foo bar baz",try doc.text())
XCTAssertEqual("foo bar baz", try doc.text())
}
func testHandlesEscapedData()throws {
let html = "<div title='Surf &amp; Turf'>Reef &amp; Beef</div>"
let doc = try SwiftSoup.parse(html)
let div: Element = try doc.getElementsByTag("div").get(0)
XCTAssertEqual("Surf & Turf", try div.attr("title"))
XCTAssertEqual("Reef & Beef", try div.text())
}
func testHandlesDataOnlyTags()throws {
let t: String = "<style>font-family: bold</style>"
let tels: Elements = try SwiftSoup.parse(t).getElementsByTag("style")
XCTAssertEqual("font-family: bold", tels.get(0).data())
XCTAssertEqual("", try tels.get(0).text())
let s: String = "<p>Hello</p><script>obj.insert('<a rel=\"none\" />');\ni++;</script><p>There</p>"
let doc: Document = try SwiftSoup.parse(s)
XCTAssertEqual("Hello There", try doc.text())
XCTAssertEqual("obj.insert('<a rel=\"none\" />');\ni++;", doc.data())
}
func testHandlesTextAfterData()throws {
let h: String = "<html><body>pre <script>inner</script> aft</body></html>"
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual("<html><head></head><body>pre <script>inner</script> aft</body></html>", try TextUtil.stripNewlines(doc.html()))
}
func testHandlesTextArea()throws {
let doc: Document = try SwiftSoup.parse("<textarea>Hello</textarea>")
let els: Elements = try doc.select("textarea")
XCTAssertEqual("Hello",try els.text())
XCTAssertEqual("Hello",try els.val())
XCTAssertEqual("Hello", try els.text())
XCTAssertEqual("Hello", try els.val())
}
func testPreservesSpaceInTextArea()throws {
// preserve because the tag is marked as preserve white space
let doc: Document = try SwiftSoup.parse("<textarea>\n\tOne\n\tTwo\n\tThree\n</textarea>")
@ -179,9 +179,9 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual(expect, try el.text())
XCTAssertEqual(expect, try el.val())
XCTAssertEqual(expect, try el.html())
XCTAssertEqual("<textarea>\n\t" + expect + "\n</textarea>",try el.outerHtml()) // but preserved in round-trip html
XCTAssertEqual("<textarea>\n\t" + expect + "\n</textarea>", try el.outerHtml()) // but preserved in round-trip html
}
func testPreservesSpaceInScript()throws {
// preserve because it's content is a data node
let doc: Document = try SwiftSoup.parse("<script>\nOne\n\tTwo\n\tThree\n</script>")
@ -191,7 +191,7 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("One\n\tTwo\n\tThree", try el.html())
XCTAssertEqual("<script>" + expect + "</script>", try el.outerHtml())
}
func testDoesNotCreateImplicitLists()throws {
// old jsoup used to wrap this in <ul>, but that's not to spec
let h: String = "<li>Point one<li>Point two"
@ -201,18 +201,18 @@ class HtmlParserTest: XCTestCase {
let lis: Elements = try doc.select("li")
XCTAssertEqual(2, lis.size())
XCTAssertEqual("body", lis.first()!.parent()!.tagName())
// no fiddling with non-implicit lists
let h2: String = "<ol><li><p>Point the first<li><p>Point the second"
let doc2: Document = try SwiftSoup.parse(h2)
XCTAssertEqual(0, try doc2.select("ul").size())
XCTAssertEqual(1, try doc2.select("ol").size())
XCTAssertEqual(2, try doc2.select("ol li").size())
XCTAssertEqual(2, try doc2.select("ol li p").size())
XCTAssertEqual(1, try doc2.select("ol li").get(0).children().size()) // one p in first li
}
func testDiscardsNakedTds()throws {
// jsoup used to make this into an implicit table; but browsers make it into a text run
let h: String = "<td>Hello<td><p>There<p>now"
@ -220,58 +220,58 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("Hello<p>There</p><p>now</p>", try TextUtil.stripNewlines(doc.body()!.html()))
// <tbody> is introduced if no implicitly creating table, but allows tr to be directly under table
}
func testHandlesNestedImplicitTable()throws {
let doc: Document = try SwiftSoup.parse("<table><td>1</td></tr> <td>2</td></tr> <td> <table><td>3</td> <td>4</td></table> <tr><td>5</table>")
XCTAssertEqual("<table><tbody><tr><td>1</td></tr> <tr><td>2</td></tr> <tr><td> <table><tbody><tr><td>3</td> <td>4</td></tr></tbody></table> </td></tr><tr><td>5</td></tr></tbody></table>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesWhatWgExpensesTableExample()throws {
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#examples-0
let doc = try SwiftSoup.parse("<table> <colgroup> <col> <colgroup> <col> <col> <col> <thead> <tr> <th> <th>2008 <th>2007 <th>2006 <tbody> <tr> <th scope=rowgroup> Research and development <td> $ 1,109 <td> $ 782 <td> $ 712 <tr> <th scope=row> Percentage of net sales <td> 3.4% <td> 3.3% <td> 3.7% <tbody> <tr> <th scope=rowgroup> Selling, general, and administrative <td> $ 3,761 <td> $ 2,963 <td> $ 2,433 <tr> <th scope=row> Percentage of net sales <td> 11.6% <td> 12.3% <td> 12.6% </table>")
XCTAssertEqual("<table> <colgroup> <col> </colgroup><colgroup> <col> <col> <col> </colgroup><thead> <tr> <th> </th><th>2008 </th><th>2007 </th><th>2006 </th></tr></thead><tbody> <tr> <th scope=\"rowgroup\"> Research and development </th><td> $ 1,109 </td><td> $ 782 </td><td> $ 712 </td></tr><tr> <th scope=\"row\"> Percentage of net sales </th><td> 3.4% </td><td> 3.3% </td><td> 3.7% </td></tr></tbody><tbody> <tr> <th scope=\"rowgroup\"> Selling, general, and administrative </th><td> $ 3,761 </td><td> $ 2,963 </td><td> $ 2,433 </td></tr><tr> <th scope=\"row\"> Percentage of net sales </th><td> 11.6% </td><td> 12.3% </td><td> 12.6% </td></tr></tbody></table>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesTbodyTable()throws {
let doc: Document = try SwiftSoup.parse("<html><head></head><body><table><tbody><tr><td>aaa</td><td>bbb</td></tr></tbody></table></body></html>")
XCTAssertEqual("<table><tbody><tr><td>aaa</td><td>bbb</td></tr></tbody></table>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesImplicitCaptionClose()throws {
let doc = try SwiftSoup.parse("<table><caption>A caption<td>One<td>Two")
XCTAssertEqual("<table><caption>A caption</caption><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testNoTableDirectInTable()throws {
let doc: Document = try SwiftSoup.parse("<table> <td>One <td><table><td>Two</table> <table><td>Three")
XCTAssertEqual("<table> <tbody><tr><td>One </td><td><table><tbody><tr><td>Two</td></tr></tbody></table> <table><tbody><tr><td>Three</td></tr></tbody></table></td></tr></tbody></table>",
try TextUtil.stripNewlines(doc.body()!.html()))
}
func testIgnoresDupeEndTrTag()throws {
let doc: Document = try SwiftSoup.parse("<table><tr><td>One</td><td><table><tr><td>Two</td></tr></tr></table></td><td>Three</td></tr></table>") // two </tr></tr>, must ignore or will close table
XCTAssertEqual("<table><tbody><tr><td>One</td><td><table><tbody><tr><td>Two</td></tr></tbody></table></td><td>Three</td></tr></tbody></table>",
try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesBaseTags()throws {
// only listen to the first base href
let h = "<a href=1>#</a><base href='/2/'><a href='3'>#</a><base href='http://bar'><a href=/4>#</a>"
let doc = try SwiftSoup.parse(h, "http://foo/")
XCTAssertEqual("http://foo/2/", doc.getBaseUri()) // gets set once, so doc and descendants have first only
let anchors: Elements = try doc.getElementsByTag("a")
XCTAssertEqual(3, anchors.size())
XCTAssertEqual("http://foo/2/", anchors.get(0).getBaseUri())
XCTAssertEqual("http://foo/2/", anchors.get(1).getBaseUri())
XCTAssertEqual("http://foo/2/", anchors.get(2).getBaseUri())
XCTAssertEqual("http://foo/2/1", try anchors.get(0).absUrl("href"))
XCTAssertEqual("http://foo/2/3", try anchors.get(1).absUrl("href"))
XCTAssertEqual("http://foo/4", try anchors.get(2).absUrl("href"))
}
func testHandlesProtocolRelativeUrl()throws {
let base = "https://example.com/"
let html = "<img src='//example.net/img.jpg'>"
@ -279,7 +279,7 @@ class HtmlParserTest: XCTestCase {
let el: Element = try doc.select("img").first()!
XCTAssertEqual("https://example.net/img.jpg", try el.absUrl("src"))
}
func testHandlesCdata()throws {
// todo: as this is html namespace, should actually treat as bogus comment, not cdata. keep as cdata for now
let h = "<div id=1><![CDATA[<html>\n<foo><&amp;]]></div>" // the &amp; in there should remain literal
@ -289,20 +289,20 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual(0, div.children().size())
XCTAssertEqual(1, div.childNodeSize()) // no elements, one text node
}
func testHandlesUnclosedCdataAtEOF()throws {
// https://github.com/jhy/jsoup/issues/349 would crash, as character reader would try to seek past EOF
let h = "<![CDATA[]]"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual(1, doc.body()!.childNodeSize())
}
func testHandlesInvalidStartTags()throws {
let h: String = "<div>Hello < There <&amp;></div>" // parse to <div {#text=Hello < There <&>}>
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual("Hello < There <&>", try doc.select("div").first()!.text())
}
func testHandlesUnknownTags()throws {
let h = "<div><foo title=bar>Hello<foo title=qux>there</foo></div>"
let doc = try SwiftSoup.parse(h)
@ -312,49 +312,49 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("qux", try foos.last()!.attr("title"))
XCTAssertEqual("there", try foos.last()!.text())
}
func testHandlesUnknownInlineTags()throws {
let h = "<p><cust>Test</cust></p><p><cust><cust>Test</cust></cust></p>"
let doc: Document = try SwiftSoup.parseBodyFragment(h)
let out: String = try doc.body()!.html()
XCTAssertEqual(h, TextUtil.stripNewlines(out))
}
func testParsesBodyFragment()throws {
let h = "<!-- comment --><p><a href='foo'>One</a></p>"
let doc: Document = try SwiftSoup.parseBodyFragment(h, "http://example.com")
XCTAssertEqual("<body><!-- comment --><p><a href=\"foo\">One</a></p></body>", try TextUtil.stripNewlines(doc.body()!.outerHtml()))
XCTAssertEqual("http://example.com/foo", try doc.select("a").first()!.absUrl("href"))
}
func testHandlesUnknownNamespaceTags()throws {
// note that the first foo:bar should not really be allowed to be self closing, if parsed in html mode.
let h = "<foo:bar id='1' /><abc:def id=2>Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>"
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual("<foo:bar id=\"1\" /><abc:def id=\"2\">Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesKnownEmptyBlocks()throws {
// if a known tag, allow self closing outside of spec, but force an end tag. unknown tags can be self closing.
let h = "<div id='1' /><script src='/foo' /><div id=2><img /><img></div><a id=3 /><i /><foo /><foo>One</foo> <hr /> hr text <hr> hr text two"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<div id=\"1\"></div><script src=\"/foo\"></script><div id=\"2\"><img><img></div><a id=\"3\"></a><i></i><foo /><foo>One</foo> <hr> hr text <hr> hr text two", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesSolidusAtAttributeEnd()throws {
// this test makes sure [<a href=/>link</a>] is parsed as [<a href="/">link</a>], not [<a href="" /><a>link</a>]
let h = "<a href=/>link</a>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<a href=\"/\">link</a>", try doc.body()!.html())
}
func testHandlesMultiClosingBody()throws {
let h = "<body><p>Hello</body><p>there</p></body></body></html><p>now"
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual(3, try doc.select("p").size())
XCTAssertEqual(3, doc.body()!.children().size())
}
func testHandlesUnclosedDefinitionLists()throws {
// jsoup used to create a <dl>, but that's not to spec
let h: String = "<dt>Foo<dd>Bar<dt>Qux<dd>Zug"
@ -363,18 +363,18 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual(4, try doc.select("dt, dd").size())
let dts: Elements = try doc.select("dt")
XCTAssertEqual(2, dts.size())
XCTAssertEqual("Zug",try dts.get(1).nextElementSibling()?.text())
XCTAssertEqual("Zug", try dts.get(1).nextElementSibling()?.text())
}
func testHandlesBlocksInDefinitions()throws {
// per the spec, dt and dd are inline, but in practise are block
let h = "<dl><dt><div id=1>Term</div></dt><dd><div id=2>Def</div></dd></dl>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("dt", try doc.select("#1").first()!.parent()!.tagName())
XCTAssertEqual("dd", try doc.select("#2").first()!.parent()!.tagName())
XCTAssertEqual("<dl><dt><div id=\"1\">Term</div></dt><dd><div id=\"2\">Def</div></dd></dl>",try TextUtil.stripNewlines(doc.body()!.html()))
XCTAssertEqual("<dl><dt><div id=\"1\">Term</div></dt><dd><div id=\"2\">Def</div></dd></dl>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesFrames()throws {
let h = "<html><head><script></script><noscript></noscript></head><frameset><frame src=foo></frame><frame src=foo></frameset></html>"
let doc = try SwiftSoup.parse(h)
@ -382,14 +382,14 @@ class HtmlParserTest: XCTestCase {
try TextUtil.stripNewlines(doc.html()))
// no body auto vivification
}
func testIgnoresContentAfterFrameset()throws {
let h = "<html><head><title>One</title></head><frameset><frame /><frame /></frameset><table></table></html>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<html><head><title>One</title></head><frameset><frame><frame></frameset></html>",try TextUtil.stripNewlines(doc.html()))
XCTAssertEqual("<html><head><title>One</title></head><frameset><frame><frame></frameset></html>", try TextUtil.stripNewlines(doc.html()))
// no body, no table. No crash!
}
func testHandlesJavadocFont()throws {
let h = "<TD BGCOLOR=\"#EEEEFF\" CLASS=\"NavBarCell1\"> <A HREF=\"deprecated-list.html\"><FONT CLASS=\"NavBarFont1\"><B>Deprecated</B></FONT></A>&nbsp;</TD>"
let doc = try SwiftSoup.parse(h)
@ -398,103 +398,103 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("font", a.child(0).tagName())
XCTAssertEqual("b", a.child(0).child(0).tagName())
}
func testHandlesBaseWithoutHref()throws {
let h = "<head><base target='_blank'></head><body><a href=/foo>Test</a></body>"
let doc = try SwiftSoup.parse(h, "http://example.com/")
let a: Element = try doc.select("a").first()!
XCTAssertEqual("/foo", try a.attr("href"))
XCTAssertEqual("http://example.com/foo",try a.attr("abs:href"))
XCTAssertEqual("http://example.com/foo", try a.attr("abs:href"))
}
func testNormalisesDocument()throws {
let h = "<!doctype html>One<html>Two<head>Three<link></head>Four<body>Five </body>Six </html>Seven "
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<!doctype html><html><head></head><body>OneTwoThree<link>FourFive Six Seven </body></html>",
try TextUtil.stripNewlines(doc.html()))
}
func testNormalisesEmptyDocument()throws {
let doc = try SwiftSoup.parse("")
XCTAssertEqual("<html><head></head><body></body></html>", try TextUtil.stripNewlines(doc.html()))
}
func testNormalisesHeadlessBody()throws {
let doc = try SwiftSoup.parse("<html><body><span class=\"foo\">bar</span>")
XCTAssertEqual("<html><head></head><body><span class=\"foo\">bar</span></body></html>",
try TextUtil.stripNewlines(doc.html()))
}
func testNormalisedBodyAfterContent()throws {
let doc = try SwiftSoup.parse("<font face=Arial><body class=name><div>One</div></body></font>")
XCTAssertEqual("<html><head></head><body class=\"name\"><font face=\"Arial\"><div>One</div></font></body></html>",
try TextUtil.stripNewlines(doc.html()))
}
func testfindsCharsetInMalformedMeta()throws {
let h = "<meta http-equiv=Content-Type content=text/html; charset=gb2312>"
// example cited for reason of html5's <meta charset> element
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("gb2312", try doc.select("meta").attr("charset"))
}
func testHgroup()throws {
// jsoup used to not allow hroup in h{n}, but that's not in spec, and browsers are OK
let doc = try SwiftSoup.parse("<h1>Hello <h2>There <hgroup><h1>Another<h2>headline</hgroup> <hgroup><h1>More</h1><p>stuff</p></hgroup>")
XCTAssertEqual("<h1>Hello </h1><h2>There <hgroup><h1>Another</h1><h2>headline</h2></hgroup> <hgroup><h1>More</h1><p>stuff</p></hgroup></h2>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testRelaxedTags()throws{
func testRelaxedTags()throws {
let doc = try SwiftSoup.parse("<abc_def id=1>Hello</abc_def> <abc-def>There</abc-def>")
XCTAssertEqual("<abc_def id=\"1\">Hello</abc_def> <abc-def>There</abc-def>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHeaderContents()throws{
func testHeaderContents()throws {
// h* tags (h1 .. h9) in browsers can handle any internal content other than other h*. which is not per any
// spec, which defines them as containing phrasing content only. so, reality over theory.
let doc = try SwiftSoup.parse("<h1>Hello <div>There</div> now</h1> <h2>More <h3>Content</h3></h2>")
XCTAssertEqual("<h1>Hello <div>There</div> now</h1> <h2>More </h2><h3>Content</h3>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testSpanContents()throws{
func testSpanContents()throws {
// like h1 tags, the spec says SPAN is phrasing only, but browsers and publisher treat span as a block tag
let doc = try SwiftSoup.parse("<span>Hello <div>there</div> <span>now</span></span>")
XCTAssertEqual("<span>Hello <div>there</div> <span>now</span></span>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testNoImagesInNoScriptInHead()throws{
func testNoImagesInNoScriptInHead()throws {
// jsoup used to allow, but against spec if parsing with noscript
let doc = try SwiftSoup.parse("<html><head><noscript><img src='foo'></noscript></head><body><p>Hello</p></body></html>")
XCTAssertEqual("<html><head><noscript>&lt;img src=\"foo\"&gt;</noscript></head><body><p>Hello</p></body></html>", try TextUtil.stripNewlines(doc.html()))
}
func testAFlowContents()throws{
func testAFlowContents()throws {
// html5 has <a> as either phrasing or block
let doc = try SwiftSoup.parse("<a>Hello <div>there</div> <span>now</span></a>")
XCTAssertEqual("<a>Hello <div>there</div> <span>now</span></a>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testFontFlowContents()throws{
func testFontFlowContents()throws {
// html5 has no definition of <font>; often used as flow
let doc = try SwiftSoup.parse("<font>Hello <div>there</div> <span>now</span></font>")
XCTAssertEqual("<font>Hello <div>there</div> <span>now</span></font>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testhandlesMisnestedTagsBI()throws{
func testhandlesMisnestedTagsBI()throws {
// whatwg: <b><i></b></i>
let h = "<p>1<b>2<i>3</b>4</i>5</p>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<p>1<b>2<i>3</i></b><i>4</i>5</p>", try doc.body()!.html())
// adoption agency on </b>, reconstruction of formatters on 4.
}
func testhandlesMisnestedTagsBP()throws{
func testhandlesMisnestedTagsBP()throws {
// whatwg: <b><p></b></p>
let h = "<b>1<p>2</b>3</p>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<b>1</b>\n<p><b>2</b>3</p>", try doc.body()!.html())
}
func testhandlesUnexpectedMarkupInTables()throws {
// whatwg - tests markers in active formatting (if they didn't work, would get in in table)
// also tests foster parenting
@ -502,7 +502,7 @@ class HtmlParserTest: XCTestCase {
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<b></b><b>bbb</b><table><tbody><tr><td>aaa</td></tr></tbody></table><b>ccc</b>", try TextUtil.stripNewlines(doc.body()!.html()))
}
func testHandlesUnclosedFormattingElements()throws {
// whatwg: formatting elements get collected and applied, but excess elements are thrown away
let h = "<!DOCTYPE html>\n" +
@ -524,21 +524,21 @@ class HtmlParserTest: XCTestCase {
"</html>"
XCTAssertEqual(want, try doc.html())
}
func testhandlesUnclosedAnchors()throws {
let h = "<a href='http://example.com/'>Link<p>Error link</a>"
let doc = try SwiftSoup.parse(h)
let want = "<a href=\"http://example.com/\">Link</a>\n<p><a href=\"http://example.com/\">Error link</a></p>"
XCTAssertEqual(want, try doc.body()!.html())
}
func testreconstructFormattingElements()throws {
// tests attributes and multi b
let h = "<p><b class=one>One <i>Two <b>Three</p><p>Hello</p>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<p><b class=\"one\">One <i>Two <b>Three</b></i></b></p>\n<p><b class=\"one\"><i><b>Hello</b></i></b></p>", try doc.body()!.html())
}
func testreconstructFormattingElementsInTable()throws {
// tests that tables get formatting markers -- the <b> applies outside the table and does not leak in,
// and the <i> inside the table and does not leak out.
@ -555,19 +555,19 @@ class HtmlParserTest: XCTestCase {
" </table> <p>Five</p></b>"
XCTAssertEqual(want, try doc.body()!.html())
}
func testcommentBeforeHtml()throws {
let h = "<!-- comment --><!-- comment 2 --><p>One</p>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<!-- comment --><!-- comment 2 --><html><head></head><body><p>One</p></body></html>", try TextUtil.stripNewlines(doc.html()))
}
func testemptyTdTag()throws {
let h = "<table><tr><td>One</td><td id='2' /></tr></table>"
let doc = try SwiftSoup.parse(h)
XCTAssertEqual("<td>One</td>\n<td id=\"2\"></td>", try doc.select("tr").first()!.html())
}
func testhandlesSolidusInA()throws {
// test for bug #66
let h = "<a class=lp href=/lib/14160711/>link text</a>"
@ -576,7 +576,7 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual("link text", try a.text())
XCTAssertEqual("/lib/14160711/", try a.attr("href"))
}
func testhandlesSpanInTbody()throws {
// test for bug 64
let h = "<table><tbody><span class='1'><tr><td>One</td></tr><tr><td>Two</td></tr></span></tbody></table>"
@ -584,77 +584,77 @@ class HtmlParserTest: XCTestCase {
XCTAssertEqual(try doc.select("span").first()!.children().size(), 0) // the span gets closed
XCTAssertEqual(try doc.select("table").size(), 1) // only one table
}
static var allTests = {
return [
("testParsesSimpleDocument" , testParsesSimpleDocument),
("testParsesRoughAttributes" , testParsesRoughAttributes),
("testParsesQuiteRoughAttributes" , testParsesQuiteRoughAttributes),
("testParsesComments" , testParsesComments),
("testParsesUnterminatedComments" , testParsesUnterminatedComments),
("testDropsUnterminatedTag" , testDropsUnterminatedTag),
("testDropsUnterminatedAttribute" , testDropsUnterminatedAttribute),
("testParsesUnterminatedTextarea" , testParsesUnterminatedTextarea),
("testParsesUnterminatedOption" , testParsesUnterminatedOption),
("testSpaceAfterTag" , testSpaceAfterTag),
("testCreatesDocumentStructure" , testCreatesDocumentStructure),
("testCreatesStructureFromBodySnippet" , testCreatesStructureFromBodySnippet),
("testHandlesEscapedData" , testHandlesEscapedData),
("testHandlesDataOnlyTags" , testHandlesDataOnlyTags),
("testHandlesTextAfterData" , testHandlesTextAfterData),
("testHandlesTextArea" , testHandlesTextArea),
("testPreservesSpaceInTextArea" , testPreservesSpaceInTextArea),
("testPreservesSpaceInScript" , testPreservesSpaceInScript),
("testDoesNotCreateImplicitLists" , testDoesNotCreateImplicitLists),
("testDiscardsNakedTds" , testDiscardsNakedTds),
("testHandlesNestedImplicitTable" , testHandlesNestedImplicitTable),
("testHandlesWhatWgExpensesTableExample" , testHandlesWhatWgExpensesTableExample),
("testHandlesTbodyTable" , testHandlesTbodyTable),
("testHandlesImplicitCaptionClose" , testHandlesImplicitCaptionClose),
("testNoTableDirectInTable" , testNoTableDirectInTable),
("testIgnoresDupeEndTrTag" , testIgnoresDupeEndTrTag),
("testHandlesBaseTags" , testHandlesBaseTags),
("testHandlesProtocolRelativeUrl" , testHandlesProtocolRelativeUrl),
("testHandlesCdata" , testHandlesCdata),
("testHandlesUnclosedCdataAtEOF" , testHandlesUnclosedCdataAtEOF),
("testHandlesInvalidStartTags" , testHandlesInvalidStartTags),
("testHandlesUnknownTags" , testHandlesUnknownTags),
("testHandlesUnknownInlineTags" , testHandlesUnknownInlineTags),
("testParsesBodyFragment" , testParsesBodyFragment),
("testHandlesUnknownNamespaceTags" , testHandlesUnknownNamespaceTags),
("testHandlesKnownEmptyBlocks" , testHandlesKnownEmptyBlocks),
("testHandlesSolidusAtAttributeEnd" , testHandlesSolidusAtAttributeEnd),
("testHandlesMultiClosingBody" , testHandlesMultiClosingBody),
("testHandlesUnclosedDefinitionLists" , testHandlesUnclosedDefinitionLists),
("testHandlesBlocksInDefinitions" , testHandlesBlocksInDefinitions),
("testHandlesFrames" , testHandlesFrames),
("testIgnoresContentAfterFrameset" , testIgnoresContentAfterFrameset),
("testHandlesJavadocFont" , testHandlesJavadocFont),
("testHandlesBaseWithoutHref" , testHandlesBaseWithoutHref),
("testNormalisesDocument" , testNormalisesDocument),
("testNormalisesEmptyDocument" , testNormalisesEmptyDocument),
("testNormalisesHeadlessBody" , testNormalisesHeadlessBody),
("testNormalisedBodyAfterContent" , testNormalisedBodyAfterContent),
("testfindsCharsetInMalformedMeta" , testfindsCharsetInMalformedMeta),
("testHgroup" , testHgroup),
("testRelaxedTags" , testRelaxedTags),
("testHeaderContents" , testHeaderContents),
("testSpanContents" , testSpanContents),
("testNoImagesInNoScriptInHead" , testNoImagesInNoScriptInHead),
("testAFlowContents" , testAFlowContents),
("testFontFlowContents" , testFontFlowContents),
("testhandlesMisnestedTagsBI" , testhandlesMisnestedTagsBI),
("testhandlesMisnestedTagsBP" , testhandlesMisnestedTagsBP),
("testhandlesUnexpectedMarkupInTables" , testhandlesUnexpectedMarkupInTables),
("testHandlesUnclosedFormattingElements" , testHandlesUnclosedFormattingElements),
("testhandlesUnclosedAnchors" , testhandlesUnclosedAnchors),
("testreconstructFormattingElements" , testreconstructFormattingElements),
("testreconstructFormattingElementsInTable" , testreconstructFormattingElementsInTable),
("testcommentBeforeHtml" , testcommentBeforeHtml),
("testemptyTdTag" , testemptyTdTag),
("testhandlesSolidusInA" , testhandlesSolidusInA),
("testhandlesSpanInTbody" , testhandlesSpanInTbody)
("testParsesSimpleDocument", testParsesSimpleDocument),
("testParsesRoughAttributes", testParsesRoughAttributes),
("testParsesQuiteRoughAttributes", testParsesQuiteRoughAttributes),
("testParsesComments", testParsesComments),
("testParsesUnterminatedComments", testParsesUnterminatedComments),
("testDropsUnterminatedTag", testDropsUnterminatedTag),
("testDropsUnterminatedAttribute", testDropsUnterminatedAttribute),
("testParsesUnterminatedTextarea", testParsesUnterminatedTextarea),
("testParsesUnterminatedOption", testParsesUnterminatedOption),
("testSpaceAfterTag", testSpaceAfterTag),
("testCreatesDocumentStructure", testCreatesDocumentStructure),
("testCreatesStructureFromBodySnippet", testCreatesStructureFromBodySnippet),
("testHandlesEscapedData", testHandlesEscapedData),
("testHandlesDataOnlyTags", testHandlesDataOnlyTags),
("testHandlesTextAfterData", testHandlesTextAfterData),
("testHandlesTextArea", testHandlesTextArea),
("testPreservesSpaceInTextArea", testPreservesSpaceInTextArea),
("testPreservesSpaceInScript", testPreservesSpaceInScript),
("testDoesNotCreateImplicitLists", testDoesNotCreateImplicitLists),
("testDiscardsNakedTds", testDiscardsNakedTds),
("testHandlesNestedImplicitTable", testHandlesNestedImplicitTable),
("testHandlesWhatWgExpensesTableExample", testHandlesWhatWgExpensesTableExample),
("testHandlesTbodyTable", testHandlesTbodyTable),
("testHandlesImplicitCaptionClose", testHandlesImplicitCaptionClose),
("testNoTableDirectInTable", testNoTableDirectInTable),
("testIgnoresDupeEndTrTag", testIgnoresDupeEndTrTag),
("testHandlesBaseTags", testHandlesBaseTags),
("testHandlesProtocolRelativeUrl", testHandlesProtocolRelativeUrl),
("testHandlesCdata", testHandlesCdata),
("testHandlesUnclosedCdataAtEOF", testHandlesUnclosedCdataAtEOF),
("testHandlesInvalidStartTags", testHandlesInvalidStartTags),
("testHandlesUnknownTags", testHandlesUnknownTags),
("testHandlesUnknownInlineTags", testHandlesUnknownInlineTags),
("testParsesBodyFragment", testParsesBodyFragment),
("testHandlesUnknownNamespaceTags", testHandlesUnknownNamespaceTags),
("testHandlesKnownEmptyBlocks", testHandlesKnownEmptyBlocks),
("testHandlesSolidusAtAttributeEnd", testHandlesSolidusAtAttributeEnd),
("testHandlesMultiClosingBody", testHandlesMultiClosingBody),
("testHandlesUnclosedDefinitionLists", testHandlesUnclosedDefinitionLists),
("testHandlesBlocksInDefinitions", testHandlesBlocksInDefinitions),
("testHandlesFrames", testHandlesFrames),
("testIgnoresContentAfterFrameset", testIgnoresContentAfterFrameset),
("testHandlesJavadocFont", testHandlesJavadocFont),
("testHandlesBaseWithoutHref", testHandlesBaseWithoutHref),
("testNormalisesDocument", testNormalisesDocument),
("testNormalisesEmptyDocument", testNormalisesEmptyDocument),
("testNormalisesHeadlessBody", testNormalisesHeadlessBody),
("testNormalisedBodyAfterContent", testNormalisedBodyAfterContent),
("testfindsCharsetInMalformedMeta", testfindsCharsetInMalformedMeta),
("testHgroup", testHgroup),
("testRelaxedTags", testRelaxedTags),
("testHeaderContents", testHeaderContents),
("testSpanContents", testSpanContents),
("testNoImagesInNoScriptInHead", testNoImagesInNoScriptInHead),
("testAFlowContents", testAFlowContents),
("testFontFlowContents", testFontFlowContents),
("testhandlesMisnestedTagsBI", testhandlesMisnestedTagsBI),
("testhandlesMisnestedTagsBP", testhandlesMisnestedTagsBP),
("testhandlesUnexpectedMarkupInTables", testhandlesUnexpectedMarkupInTables),
("testHandlesUnclosedFormattingElements", testHandlesUnclosedFormattingElements),
("testhandlesUnclosedAnchors", testhandlesUnclosedAnchors),
("testreconstructFormattingElements", testreconstructFormattingElements),
("testreconstructFormattingElementsInTable", testreconstructFormattingElementsInTable),
("testcommentBeforeHtml", testcommentBeforeHtml),
("testemptyTdTag", testemptyTdTag),
("testhandlesSolidusInA", testhandlesSolidusInA),
("testhandlesSpanInTbody", testhandlesSpanInTbody)
]
}()
}

View File

@ -10,103 +10,102 @@ import XCTest
import SwiftSoup
class NodeTest: XCTestCase {
func testHandlesBaseUri() {
do{
do {
let tag: Tag = try Tag.valueOf("a")
let attribs: Attributes = Attributes()
try attribs.put("relHref", "/foo")
try attribs.put("absHref", "http://bar/qux")
let noBase: Element = Element(tag, "", attribs)
XCTAssertEqual("", try noBase.absUrl("relHref")) // with no base, should NOT fallback to href attrib, whatever it is
XCTAssertEqual("http://bar/qux", try noBase.absUrl("absHref")) // no base but valid attrib, return attrib
let withBase: Element = Element(tag, "http://foo/", attribs)
XCTAssertEqual("http://foo/foo", try withBase.absUrl("relHref")) // construct abs from base + rel
XCTAssertEqual("http://bar/qux", try withBase.absUrl("absHref")) // href is abs, so returns that
XCTAssertEqual("", try withBase.absUrl("noval"))
let dodgyBase: Element = Element(tag, "wtf://no-such-protocol/", attribs)
XCTAssertEqual("http://bar/qux", try dodgyBase.absUrl("absHref")) // base fails, but href good, so get that
//TODO:Nabil in swift an url with scheme wtf is valid , find a method to validate schemes
//XCTAssertEqual("", try dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testSetBaseUriIsRecursive() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div><p></p></div>")
let baseUri: String = "https://jsoup.org"
try doc.setBaseUri(baseUri)
XCTAssertEqual(baseUri, doc.getBaseUri())
XCTAssertEqual(baseUri, try doc.select("div").first()?.getBaseUri())
XCTAssertEqual(baseUri, try doc.select("p").first()?.getBaseUri())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testHandlesAbsPrefix() {
do{
do {
let doc: Document = try SwiftSoup.parse("<a href=/foo>Hello</a>", "https://jsoup.org/")
let a: Element? = try doc.select("a").first()
XCTAssertEqual("/foo", try a?.attr("href"))
XCTAssertEqual("https://jsoup.org/foo", try a?.attr("abs:href"))
//XCTAssertTrue(a!.hasAttr("abs:href"));//TODO:nabil
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testHandlesAbsOnImage() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p><img src=\"/rez/osi_logo.png\" /></p>", "https://jsoup.org/")
let img: Element? = try doc.select("img").first()
XCTAssertEqual("https://jsoup.org/rez/osi_logo.png", try img?.attr("abs:src"))
XCTAssertEqual(try img?.absUrl("src"), try img?.attr("abs:src"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testHandlesAbsPrefixOnHasAttr() {
do{
do {
// 1: no abs url; 2: has abs url
let doc: Document = try SwiftSoup.parse("<a id=1 href='/foo'>One</a> <a id=2 href='https://jsoup.org/'>Two</a>")
let one: Element = try doc.select("#1").first()!
let two: Element = try doc.select("#2").first()!
XCTAssertFalse(one.hasAttr("abs:href"))
XCTAssertTrue(one.hasAttr("href"))
XCTAssertEqual("", try one.absUrl("href"))
XCTAssertTrue(two.hasAttr("abs:href"))
XCTAssertTrue(two.hasAttr("href"))
XCTAssertEqual("https://jsoup.org/", try two.absUrl("href"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testLiteralAbsPrefix() {
do{
do {
// if there is a literal attribute "abs:xxx", don't try and make absolute.
let doc: Document = try SwiftSoup.parse("<a abs:href='odd'>One</a>")
let el: Element = try doc.select("a").first()!
XCTAssertTrue(el.hasAttr("abs:href"))
XCTAssertEqual("odd", try el.attr("abs:href"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
//TODO:Nabil
/*
@ -123,217 +122,217 @@ class NodeTest: XCTestCase {
}
*/
func testHandleAbsOnLocalhostFileUris() {
do{
do {
let doc: Document = try SwiftSoup.parse("<a href='password'>One/a><a href='/var/log/messages'>Two</a>", "file://localhost/etc/")
let one: Element? = try doc.select("a").first()
XCTAssertEqual("file://localhost/etc/password", try one?.absUrl("href"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testHandlesAbsOnProtocolessAbsoluteUris() {
do{
do {
let doc1: Document = try SwiftSoup.parse("<a href='//example.net/foo'>One</a>", "http://example.com/")
let doc2: Document = try SwiftSoup.parse("<a href='//example.net/foo'>One</a>", "https://example.com/")
let one: Element? = try doc1.select("a").first()
let two: Element? = try doc2.select("a").first()
XCTAssertEqual("http://example.net/foo", try one?.absUrl("href"))
XCTAssertEqual("https://example.net/foo", try two?.absUrl("href"))
let doc3: Document = try SwiftSoup.parse("<img src=//www.google.com/images/errors/logo_sm.gif alt=Google>", "https://google.com")
XCTAssertEqual("https://www.google.com/images/errors/logo_sm.gif", try doc3.select("img").attr("abs:src"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testAbsHandlesRelativeQuery() {
do{
do {
let doc: Document = try SwiftSoup.parse("<a href='?foo'>One</a> <a href='bar.html?foo'>Two</a>", "https://jsoup.org/path/file?bar")
let a1: Element? = try doc.select("a").first()
XCTAssertEqual("https://jsoup.org/path/file?foo", try a1?.absUrl("href"))
let a2: Element? = try doc.select("a").get(1)
XCTAssertEqual("https://jsoup.org/path/bar.html?foo", try a2?.absUrl("href"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testAbsHandlesDotFromIndex() {
do{
do {
let doc: Document = try SwiftSoup.parse("<a href='./one/two.html'>One</a>", "http://example.com")
let a1: Element? = try doc.select("a").first()
XCTAssertEqual("http://example.com/one/two.html", try a1?.absUrl("href"))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testRemove() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>One <span>two</span> three</p>")
let p: Element? = try doc.select("p").first()
try p?.childNode(0).remove()
XCTAssertEqual("two three", try p?.text())
XCTAssertEqual("<span>two</span> three", TextUtil.stripNewlines(try p!.html()))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testReplace() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>One <span>two</span> three</p>")
let p: Element? = try doc.select("p").first()
let insert: Element = try doc.createElement("em").text("foo")
try p?.childNode(1).replaceWith(insert)
XCTAssertEqual("One <em>foo</em> three", try p?.html())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testOwnerDocument() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>Hello")
let p: Element? = try doc.select("p").first()
XCTAssertTrue(p?.ownerDocument() == doc)
XCTAssertTrue(doc.ownerDocument() == doc)
XCTAssertNil(doc.parent())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testBefore() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>One <b>two</b> three</p>")
let newNode: Element = Element(try Tag.valueOf("em"), "")
try newNode.appendText("four")
try doc.select("b").first()?.before(newNode)
XCTAssertEqual("<p>One <em>four</em><b>two</b> three</p>", try doc.body()?.html())
try doc.select("b").first()?.before("<i>five</i>")
XCTAssertEqual("<p>One <em>four</em><i>five</i><b>two</b> three</p>", try doc.body()?.html())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testAfter() {
do{
do {
let doc: Document = try SwiftSoup.parse("<p>One <b>two</b> three</p>")
let newNode: Element = Element(try Tag.valueOf("em"), "")
try newNode.appendText("four")
try _ = doc.select("b").first()?.after(newNode)
XCTAssertEqual("<p>One <b>two</b><em>four</em> three</p>", try doc.body()?.html())
try doc.select("b").first()?.after("<i>five</i>")
XCTAssertEqual("<p>One <b>two</b><i>five</i><em>four</em> three</p>", try doc.body()?.html())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testUnwrap() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div>One <span>Two <b>Three</b></span> Four</div>")
let span: Element? = try doc.select("span").first()
let twoText: Node? = span?.childNode(0)
let node: Node? = try span?.unwrap()
XCTAssertEqual("<div>One Two <b>Three</b> Four</div>", TextUtil.stripNewlines(try doc.body()!.html()))
XCTAssertTrue(((node as? TextNode) != nil))
XCTAssertEqual("Two ", (node as? TextNode)?.text())
XCTAssertEqual(node, twoText)
XCTAssertEqual(node?.parent(), try doc.select("div").first())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testUnwrapNoChildren() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div>One <span></span> Two</div>")
let span: Element? = try doc.select("span").first()
let node: Node? = try span?.unwrap()
XCTAssertEqual("<div>One Two</div>", TextUtil.stripNewlines(try doc.body()!.html()))
XCTAssertTrue(node == nil)
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testTraverse() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div><p>Hello</p></div><div>There</div>")
let accum: StringBuilder = StringBuilder()
class nv : NodeVisitor{
class nv: NodeVisitor {
let accum: StringBuilder
init (_ accum: StringBuilder){
init (_ accum: StringBuilder) {
self.accum = accum
}
func head(_ node: Node, _ depth: Int)throws{
func head(_ node: Node, _ depth: Int)throws {
accum.append("<" + node.nodeName() + ">")
}
func tail(_ node: Node, _ depth: Int)throws{
func tail(_ node: Node, _ depth: Int)throws {
accum.append("</" + node.nodeName() + ">")
}
}
try doc.select("div").first()?.traverse(nv(accum))
XCTAssertEqual("<div><p><#text></#text></p></div>", accum.toString())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testOrphanNodeReturnsNullForSiblingElements() {
do{
do {
let node: Node = Element(try Tag.valueOf("p"), "")
let el: Element = Element(try Tag.valueOf("p"), "")
XCTAssertEqual(0, node.siblingIndex)
XCTAssertEqual(0, node.siblingNodes().count)
XCTAssertNil(node.previousSibling())
XCTAssertNil(node.nextSibling())
XCTAssertEqual(0, el.siblingElements().size())
XCTAssertNil(try el.previousElementSibling())
XCTAssertNil(try el.nextElementSibling())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testNodeIsNotASiblingOfItself() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div><p>One<p>Two<p>Three</div>")
let p2: Element = try doc.select("p").get(1)
XCTAssertEqual("Two", try p2.text())
let nodes = p2.siblingNodes()
XCTAssertEqual(2, nodes.count)
XCTAssertEqual("<p>One</p>", try nodes[0].outerHtml())
XCTAssertEqual("<p>Three</p>", try nodes[1].outerHtml())
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testChildNodesCopy() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div id=1>Text 1 <p>One</p> Text 2 <p>Two<p>Three</div><div id=2>")
let div1: Element? = try doc.select("#1").first()
let div2: Element? = try doc.select("#2").first()
@ -345,56 +344,56 @@ class NodeTest: XCTestCase {
XCTAssertEqual("Text 1 ", tn1?.text())
try div2?.insertChildren(-1, divChildren!)
XCTAssertEqual("<div id=\"1\">Text 1 <p>One</p> Text 2 <p>Two</p><p>Three</p></div><div id=\"2\">Text 1 updated"+"<p>One</p> Text 2 <p>Two</p><p>Three</p></div>", TextUtil.stripNewlines(try doc.body()!.html()))
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
func testSupportsClone() {
do{
do {
let doc: Document = try SwiftSoup.parse("<div class=foo>Text</div>")
let el: Element = try doc.select("div").first()!
XCTAssertTrue(el.hasClass("foo"))
let elClone: Element = try (doc.copy() as! Document).select("div").first()!
XCTAssertTrue(elClone.hasClass("foo"))
XCTAssertTrue(try elClone.text() == "Text")
try el.removeClass("foo")
try el.text("None")
XCTAssertFalse(el.hasClass("foo"))
XCTAssertTrue(elClone.hasClass("foo"))
XCTAssertTrue(try el.text() == "None")
XCTAssertTrue(try elClone.text()=="Text")
}catch{
XCTAssertEqual(1,2)
} catch {
XCTAssertEqual(1, 2)
}
}
static var allTests = {
return [
("testHandlesBaseUri" , testHandlesBaseUri),
("testSetBaseUriIsRecursive" , testSetBaseUriIsRecursive),
("testHandlesAbsPrefix" , testHandlesAbsPrefix),
("testHandlesAbsOnImage" , testHandlesAbsOnImage),
("testHandlesAbsPrefixOnHasAttr" , testHandlesAbsPrefixOnHasAttr),
("testLiteralAbsPrefix" , testLiteralAbsPrefix),
("testHandleAbsOnLocalhostFileUris" , testHandleAbsOnLocalhostFileUris),
("testHandlesAbsOnProtocolessAbsoluteUris" , testHandlesAbsOnProtocolessAbsoluteUris),
("testAbsHandlesRelativeQuery" , testAbsHandlesRelativeQuery),
("testAbsHandlesDotFromIndex" , testAbsHandlesDotFromIndex),
("testRemove" , testRemove),
("testReplace" , testReplace),
("testOwnerDocument" , testOwnerDocument),
("testBefore" , testBefore),
("testAfter" , testAfter),
("testUnwrap" , testUnwrap),
("testUnwrapNoChildren" , testUnwrapNoChildren),
("testTraverse" , testTraverse),
("testOrphanNodeReturnsNullForSiblingElements" , testOrphanNodeReturnsNullForSiblingElements),
("testNodeIsNotASiblingOfItself" , testNodeIsNotASiblingOfItself),
("testChildNodesCopy" , testChildNodesCopy),
("testSupportsClone" , testSupportsClone)
("testHandlesBaseUri", testHandlesBaseUri),
("testSetBaseUriIsRecursive", testSetBaseUriIsRecursive),
("testHandlesAbsPrefix", testHandlesAbsPrefix),
("testHandlesAbsOnImage", testHandlesAbsOnImage),
("testHandlesAbsPrefixOnHasAttr", testHandlesAbsPrefixOnHasAttr),
("testLiteralAbsPrefix", testLiteralAbsPrefix),
("testHandleAbsOnLocalhostFileUris", testHandleAbsOnLocalhostFileUris),
("testHandlesAbsOnProtocolessAbsoluteUris", testHandlesAbsOnProtocolessAbsoluteUris),
("testAbsHandlesRelativeQuery", testAbsHandlesRelativeQuery),
("testAbsHandlesDotFromIndex", testAbsHandlesDotFromIndex),
("testRemove", testRemove),
("testReplace", testReplace),
("testOwnerDocument", testOwnerDocument),
("testBefore", testBefore),
("testAfter", testAfter),
("testUnwrap", testUnwrap),
("testUnwrapNoChildren", testUnwrapNoChildren),
("testTraverse", testTraverse),
("testOrphanNodeReturnsNullForSiblingElements", testOrphanNodeReturnsNullForSiblingElements),
("testNodeIsNotASiblingOfItself", testNodeIsNotASiblingOfItself),
("testChildNodesCopy", testChildNodesCopy),
("testSupportsClone", testSupportsClone)
]
}()
}

View File

@ -10,29 +10,29 @@ import XCTest
import SwiftSoup
class ParseSettingsTest: XCTestCase {
func testCaseSupport() {
let bothOn = ParseSettings(true, true)
let bothOff = ParseSettings(false, false)
let tagOn = ParseSettings(true, false)
let attrOn = ParseSettings(false, true)
XCTAssertEqual("FOO", bothOn.normalizeTag("FOO"))
XCTAssertEqual("FOO", bothOn.normalizeAttribute("FOO"))
XCTAssertEqual("foo", bothOff.normalizeTag("FOO"))
XCTAssertEqual("foo", bothOff.normalizeAttribute("FOO"))
XCTAssertEqual("FOO", tagOn.normalizeTag("FOO"))
XCTAssertEqual("foo", tagOn.normalizeAttribute("FOO"))
XCTAssertEqual("foo", attrOn.normalizeTag("FOO"))
XCTAssertEqual("FOO", attrOn.normalizeAttribute("FOO"))
}
static var allTests = {
return [
("testCaseSupport" , testCaseSupport)
("testCaseSupport", testCaseSupport)
]
}()
}

View File

@ -10,7 +10,7 @@ import XCTest
import SwiftSoup
class QueryParserTest: XCTestCase {
func testOrGetsCorrectPrecedence()throws {
// tests that a selector "a b, c d, e f" evals to (a AND b) OR (c AND d) OR (e AND f)"
// top level or, three child ands
@ -18,7 +18,7 @@ class QueryParserTest: XCTestCase {
XCTAssertTrue((eval as? CombiningEvaluator.Or) != nil)
let or: CombiningEvaluator.Or = eval as! CombiningEvaluator.Or
XCTAssertEqual(3, or.evaluators.count)
for innerEval:Evaluator in or.evaluators {
for innerEval: Evaluator in or.evaluators {
XCTAssertTrue((innerEval as? CombiningEvaluator.And) != nil)
let and: CombiningEvaluator.And = innerEval as! CombiningEvaluator.And
XCTAssertEqual(2, and.evaluators.count)
@ -26,27 +26,27 @@ class QueryParserTest: XCTestCase {
XCTAssertTrue((and.evaluators[1] as? StructuralEvaluator.Parent) != nil)
}
}
func testParsesMultiCorrectly()throws {
let eval: Evaluator = try QueryParser.parse(".foo > ol, ol > li + li")
XCTAssertTrue((eval as? CombiningEvaluator.Or) != nil)
let or: CombiningEvaluator.Or = eval as! CombiningEvaluator.Or
XCTAssertEqual(2, or.evaluators.count)
let andLeft: CombiningEvaluator.And = or.evaluators[0] as! CombiningEvaluator.And
let andRight: CombiningEvaluator.And = or.evaluators[1] as! CombiningEvaluator.And
XCTAssertEqual("ol :ImmediateParent.foo", andLeft.toString())
XCTAssertEqual(2, andLeft.evaluators.count)
XCTAssertEqual("li :prevli :ImmediateParentol", andRight.toString())
XCTAssertEqual(2, andRight.evaluators.count)
}
static var allTests = {
return [
("testOrGetsCorrectPrecedence" , testOrGetsCorrectPrecedence),
("testParsesMultiCorrectly" , testParsesMultiCorrectly)
("testOrGetsCorrectPrecedence", testOrGetsCorrectPrecedence),
("testParsesMultiCorrectly", testParsesMultiCorrectly)
]
}()
}

View File

@ -10,7 +10,7 @@ import XCTest
import SwiftSoup
class SelectorTest: XCTestCase {
func testByTag()throws {
// should be case insensitive
let els: Elements = try SwiftSoup.parse("<div id=1><div id=2><p>Hello</p></div></div><DIV id=3>").select("DIV")
@ -18,129 +18,129 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("1", els.get(0).id())
XCTAssertEqual("2", els.get(1).id())
XCTAssertEqual("3", els.get(2).id())
let none: Elements = try SwiftSoup.parse("<div id=1><div id=2><p>Hello</p></div></div><div id=3>").select("span")
XCTAssertEqual(0, none.size())
}
func testById()throws {
let els: Elements = try SwiftSoup.parse("<div><p id=foo>Hello</p><p id=foo>Foo two!</p></div>").select("#foo")
XCTAssertEqual(2, els.size())
XCTAssertEqual("Hello", try els.get(0).text())
XCTAssertEqual("Foo two!", try els.get(1).text())
let none: Elements = try SwiftSoup.parse("<div id=1></div>").select("#foo")
XCTAssertEqual(0, none.size())
}
func testByClass()throws {
let els: Elements = try SwiftSoup.parse("<p id=0 class='ONE two'><p id=1 class='one'><p id=2 class='two'>").select("P.One")
XCTAssertEqual(2, els.size())
XCTAssertEqual("0", els.get(0).id())
XCTAssertEqual("1", els.get(1).id())
let none: Elements = try SwiftSoup.parse("<div class='one'></div>").select(".foo")
XCTAssertEqual(0, none.size())
let els2: Elements = try SwiftSoup.parse("<div class='One-Two'></div>").select(".one-two")
XCTAssertEqual(1, els2.size())
}
func testByAttribute()throws {
let h: String = "<div Title=Foo /><div Title=Bar /><div Style=Qux /><div title=Bam /><div title=SLAM />" +
"<div data-name='with spaces'/>"
let doc: Document = try SwiftSoup.parse(h)
let withTitle: Elements = try doc.select("[title]")
XCTAssertEqual(4, withTitle.size())
let foo: Elements = try doc.select("[TITLE=foo]")
XCTAssertEqual(1, foo.size())
let foo2: Elements = try doc.select("[title=\"foo\"]")
XCTAssertEqual(1, foo2.size())
let foo3: Elements = try doc.select("[title=\"Foo\"]")
XCTAssertEqual(1, foo3.size())
let dataName: Elements = try doc.select("[data-name=\"with spaces\"]")
XCTAssertEqual(1, dataName.size())
XCTAssertEqual("with spaces", try dataName.first()?.attr("data-name"))
let not: Elements = try doc.select("div[title!=bar]")
XCTAssertEqual(5, not.size())
XCTAssertEqual("Foo", try not.first()?.attr("title"))
let starts: Elements = try doc.select("[title^=ba]")
XCTAssertEqual(2, starts.size())
XCTAssertEqual("Bar", try starts.first()?.attr("title"))
XCTAssertEqual("Bam", try starts.last()?.attr("title"))
let ends: Elements = try doc.select("[title$=am]")
XCTAssertEqual(2, ends.size())
XCTAssertEqual("Bam", try ends.first()?.attr("title"))
XCTAssertEqual("SLAM", try ends.last()?.attr("title"))
let contains: Elements = try doc.select("[title*=a]")
XCTAssertEqual(3, contains.size())
XCTAssertEqual("Bar", try contains.first()?.attr("title"))
XCTAssertEqual("SLAM", try contains.last()?.attr("title"))
}
func testNamespacedTag()throws {
let doc: Document = try SwiftSoup.parse("<div><abc:def id=1>Hello</abc:def></div> <abc:def class=bold id=2>There</abc:def>")
let byTag: Elements = try doc.select("abc|def")
XCTAssertEqual(2, byTag.size())
XCTAssertEqual("1", byTag.first()?.id())
XCTAssertEqual("2", byTag.last()?.id())
let byAttr: Elements = try doc.select(".bold")
XCTAssertEqual(1, byAttr.size())
XCTAssertEqual("2", byAttr.last()?.id())
let byTagAttr: Elements = try doc.select("abc|def.bold")
XCTAssertEqual(1, byTagAttr.size())
XCTAssertEqual("2", byTagAttr.last()?.id())
let byContains: Elements = try doc.select("abc|def:contains(e)")
XCTAssertEqual(2, byContains.size())
XCTAssertEqual("1", byContains.first()?.id())
XCTAssertEqual("2", byContains.last()?.id())
}
func testWildcardNamespacedTag()throws {
let doc: Document = try SwiftSoup.parse("<div><abc:def id=1>Hello</abc:def></div> <abc:def class=bold id=2>There</abc:def>")
let byTag: Elements = try doc.select("*|def")
XCTAssertEqual(2, byTag.size())
XCTAssertEqual("1", byTag.first()?.id())
XCTAssertEqual("2", byTag.last()?.id())
let byAttr: Elements = try doc.select(".bold")
XCTAssertEqual(1, byAttr.size())
XCTAssertEqual("2", byAttr.last()?.id())
let byTagAttr: Elements = try doc.select("*|def.bold")
XCTAssertEqual(1, byTagAttr.size())
XCTAssertEqual("2", byTagAttr.last()?.id())
let byContains: Elements = try doc.select("*|def:contains(e)")
XCTAssertEqual(2, byContains.size())
XCTAssertEqual("1", byContains.first()?.id())
XCTAssertEqual("2", byContains.last()?.id())
}
func testByAttributeStarting()throws {
let doc: Document = try SwiftSoup.parse("<div id=1 data-name=jsoup>Hello</div><p data-val=5 id=2>There</p><p id=3>No</p>")
var withData: Elements = try doc.select("[^data-]")
XCTAssertEqual(2, withData.size())
XCTAssertEqual("1", withData.first()?.id())
XCTAssertEqual("2", withData.last()?.id())
withData = try doc.select("p[^data-]")
XCTAssertEqual(1, withData.size())
XCTAssertEqual("2", withData.first()?.id())
}
func testByAttributeRegex()throws {
let doc: Document = try SwiftSoup.parse("<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif><img></p>")
let imgs: Elements = try doc.select("img[src~=(?i)\\.(png|jpe?g)]")
@ -149,7 +149,7 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("2", imgs.get(1).id())
XCTAssertEqual("3", imgs.get(2).id())
}
func testByAttributeRegexCharacterClass()throws {
let doc: Document = try SwiftSoup.parse("<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif id=4></p>")
let imgs: Elements = try doc.select("img[src~=[o]]")
@ -157,14 +157,14 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("1", imgs.get(0).id())
XCTAssertEqual("4", imgs.get(1).id())
}
func testByAttributeRegexCombined()throws {
let doc: Document = try SwiftSoup.parse("<div><table class=x><td>Hello</td></table></div>")
let els: Elements = try doc.select("div table[class~=x|y]")
XCTAssertEqual(1, els.size())
try XCTAssertEqual("Hello", els.text())
}
func testCombinedWithContains()throws {
let doc: Document = try SwiftSoup.parse("<p id=1>One</p><p>Two +</p><p>Three +</p>")
let els: Elements = try doc.select("p#1 + :contains(+)")
@ -172,7 +172,7 @@ class SelectorTest: XCTestCase {
try XCTAssertEqual("Two +", els.text())
XCTAssertEqual("p", els.first()?.tagName())
}
func testAllElements()throws {
let h: String = "<div><p>Hello</p><p><b>there</b></p></div>"
let doc: Document = try SwiftSoup.parse(h)
@ -182,19 +182,19 @@ class SelectorTest: XCTestCase {
XCTAssertEqual(3, allUnderDiv.size())
XCTAssertEqual("p", allUnderDiv.first()?.tagName())
}
func testAllWithClass()throws {
let h: String = "<p class=first>One<p class=first>Two<p>Three"
let doc: Document = try SwiftSoup.parse(h)
let ps: Elements = try doc.select("*.first")
XCTAssertEqual(2, ps.size())
}
func testGroupOr()throws {
let h: String = "<div title=foo /><div title=bar /><div /><p></p><img /><span title=qux>"
let doc: Document = try SwiftSoup.parse(h)
let els: Elements = try doc.select("p,div,[title]")
XCTAssertEqual(5, els.size())
XCTAssertEqual("div", els.get(0).tagName())
try XCTAssertEqual("foo", els.get(0).attr("title"))
@ -206,103 +206,103 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("p", els.get(3).tagName())
XCTAssertEqual("span", els.get(4).tagName())
}
func testGroupOrAttribute()throws {
let h: String = "<div id=1 /><div id=2 /><div title=foo /><div title=bar />"
let els: Elements = try SwiftSoup.parse(h).select("[id],[title=foo]")
XCTAssertEqual(3, els.size())
XCTAssertEqual("1", els.get(0).id())
XCTAssertEqual("2", els.get(1).id())
try XCTAssertEqual("foo", els.get(2).attr("title"))
}
func testDescendant()throws {
let h: String = "<div class=head><p class=first>Hello</p><p>There</p></div><p>None</p>"
let doc: Document = try SwiftSoup.parse(h)
let root: Element = try doc.getElementsByClass("HEAD").first()!
let els: Elements = try root.select(".head p")
XCTAssertEqual(2, els.size())
try XCTAssertEqual("Hello", els.get(0).text())
try XCTAssertEqual("There", els.get(1).text())
let p: Elements = try root.select("p.first")
XCTAssertEqual(1, p.size())
try XCTAssertEqual("Hello", p.get(0).text())
let empty: Elements = try root.select("p .first") // self, not descend, should not match
XCTAssertEqual(0, empty.size())
let aboveRoot: Elements = try root.select("body div.head")
XCTAssertEqual(0, aboveRoot.size())
}
func testAnd()throws {
let h: String = "<div id=1 class='foo bar' title=bar name=qux><p class=foo title=bar>Hello</p></div"
let doc: Document = try SwiftSoup.parse(h)
let div: Elements = try doc.select("div.foo")
XCTAssertEqual(1, div.size())
XCTAssertEqual("div", div.first()?.tagName())
let p: Elements = try doc.select("div .foo") // space indicates like "div *.foo"
XCTAssertEqual(1, p.size())
XCTAssertEqual("p", p.first()?.tagName())
let div2: Elements = try doc.select("div#1.foo.bar[title=bar][name=qux]") // very specific!
XCTAssertEqual(1, div2.size())
XCTAssertEqual("div", div2.first()?.tagName())
let p2: Elements = try doc.select("div *.foo") // space indicates like "div *.foo"
XCTAssertEqual(1, p2.size())
XCTAssertEqual("p", p2.first()?.tagName())
}
func testDeeperDescendant()throws {
let h: String = "<div class=head><p><span class=first>Hello</div><div class=head><p class=first><span>Another</span><p>Again</div>"
let doc: Document = try SwiftSoup.parse(h)
let root: Element = try doc.getElementsByClass("head").first()!
let els: Elements = try root.select("div p .first")
XCTAssertEqual(1, els.size())
try XCTAssertEqual("Hello", els.first()?.text())
XCTAssertEqual("span", els.first()?.tagName())
let aboveRoot: Elements = try root.select("body p .first")
XCTAssertEqual(0, aboveRoot.size())
}
func testParentChildElement()throws {
let h: String = "<div id=1><div id=2><div id = 3></div></div></div><div id=4></div>"
let doc: Document = try SwiftSoup.parse(h)
let divs: Elements = try doc.select("div > div")
XCTAssertEqual(2, divs.size())
XCTAssertEqual("2", divs.get(0).id()) // 2 is child of 1
XCTAssertEqual("3", divs.get(1).id()) // 3 is child of 2
let div2: Elements = try doc.select("div#1 > div")
XCTAssertEqual(1, div2.size())
XCTAssertEqual("2", div2.get(0).id())
}
func testParentWithClassChild()throws {
let h: String = "<h1 class=foo><a href=1 /></h1><h1 class=foo><a href=2 class=bar /></h1><h1><a href=3 /></h1>"
let doc: Document = try SwiftSoup.parse(h)
let allAs: Elements = try doc.select("h1 > a")
XCTAssertEqual(3, allAs.size())
XCTAssertEqual("a", allAs.first()?.tagName())
let fooAs: Elements = try doc.select("h1.foo > a")
XCTAssertEqual(2, fooAs.size())
XCTAssertEqual("a", fooAs.first()?.tagName())
let barAs: Elements = try doc.select("h1.foo > a.bar")
XCTAssertEqual(1, barAs.size())
}
func testParentChildStar()throws {
let h: String = "<div id=1><p>Hello<p><b>there</b></p></div><div id=2><span>Hi</span></div>"
let doc: Document = try SwiftSoup.parse(h)
@ -312,7 +312,7 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("p", divChilds.get(1).tagName())
XCTAssertEqual("span", divChilds.get(2).tagName())
}
func testMultiChildDescent()throws {
let h: String = "<div id=foo><h1 class=bar><a href=http://example.com/>One</a></h1></div>"
let doc: Document = try SwiftSoup.parse(h)
@ -320,17 +320,17 @@ class SelectorTest: XCTestCase {
XCTAssertEqual(1, els.size())
XCTAssertEqual("a", els.first()?.tagName())
}
func testCaseInsensitive()throws {
let h: String = "<dIv tItle=bAr><div>" // mixed case so a simple toLowerCase() on value doesn't catch
let doc: Document = try SwiftSoup.parse(h)
XCTAssertEqual(2, try doc.select("DIV").size())
XCTAssertEqual(1, try doc.select("DIV[TITLE]").size())
XCTAssertEqual(1, try doc.select("DIV[TITLE=BAR]").size())
XCTAssertEqual(0, try doc.select("DIV[TITLE=BARBARELLA").size())
}
func testAdjacentSiblings()throws {
let h: String = "<ol><li>One<li>Two<li>Three</ol>"
let doc: Document = try SwiftSoup.parse(h)
@ -339,7 +339,7 @@ class SelectorTest: XCTestCase {
try XCTAssertEqual("Two", sibs.get(0).text())
try XCTAssertEqual("Three", sibs.get(1).text())
}
func testAdjacentSiblingsWithId()throws {
let h: String = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>"
let doc: Document = try SwiftSoup.parse(h)
@ -347,35 +347,35 @@ class SelectorTest: XCTestCase {
XCTAssertEqual(1, sibs.size())
try XCTAssertEqual("Two", sibs.get(0).text())
}
func testNotAdjacent()throws {
let h: String = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>"
let doc: Document = try SwiftSoup.parse(h)
let sibs: Elements = try doc.select("li#1 + li#3")
XCTAssertEqual(0, sibs.size())
}
func testMixCombinator()throws {
let h: String = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>"
let doc: Document = try SwiftSoup.parse(h)
let sibs: Elements = try doc.select("body > div.foo li + li")
XCTAssertEqual(2, sibs.size())
try XCTAssertEqual("Two", sibs.get(0).text())
try XCTAssertEqual("Three", sibs.get(1).text())
}
func testMixCombinatorGroup()throws {
let h: String = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>"
let doc: Document = try SwiftSoup.parse(h)
let els: Elements = try doc.select(".foo > ol, ol > li + li")
XCTAssertEqual(3, els.size())
XCTAssertEqual("ol", els.get(0).tagName())
try XCTAssertEqual("Two", els.get(1).text())
try XCTAssertEqual("Three", els.get(2).text())
}
func testGeneralSiblings()throws {
let h: String = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>"
let doc: Document = try SwiftSoup.parse(h)
@ -383,41 +383,41 @@ class SelectorTest: XCTestCase {
XCTAssertEqual(1, els.size())
try XCTAssertEqual("Three", els.first()?.text())
}
// for http://github.com/jhy/jsoup/issues#issue/10
func testCharactersInIdAndClass()throws {
// using CSS spec for identifiers (id and class): a-z0-9, -, _. NOT . (which is OK in html spec, but not css)
let h: String = "<div><p id='a1-foo_bar'>One</p><p class='b2-qux_bif'>Two</p></div>"
let doc: Document = try SwiftSoup.parse(h)
let el1: Element = try doc.getElementById("a1-foo_bar")!
try XCTAssertEqual("One", el1.text())
let el2: Element = try doc.getElementsByClass("b2-qux_bif").first()!
XCTAssertEqual("Two", try el2.text())
let el3: Element = try doc.select("#a1-foo_bar").first()!
XCTAssertEqual("One", try el3.text())
let el4: Element = try doc.select(".b2-qux_bif").first()!
XCTAssertEqual("Two", try el4.text())
}
// for http://github.com/jhy/jsoup/issues#issue/13
func testSupportsLeadingCombinator()throws {
var h: String = "<div><p><span>One</span><span>Two</span></p></div>"
var doc: Document = try SwiftSoup.parse(h)
let p: Element = try doc.select("div > p").first()!
let spans: Elements = try p.select("> span")
XCTAssertEqual(2, spans.size())
try XCTAssertEqual("One", spans.first()?.text())
// make sure doesn't get nested
h = "<div id=1><div id=2><div id=3></div></div></div>"
doc = try SwiftSoup.parse(h)
let div: Element = try doc.select("div").select(" > div").first()!
XCTAssertEqual("2", div.id())
}
func testPseudoLessThan()throws {
let doc: Document = try SwiftSoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>")
let ps: Elements = try doc.select("div p:lt(2)")
@ -426,7 +426,7 @@ class SelectorTest: XCTestCase {
try XCTAssertEqual("Two", ps.get(1).text())
try XCTAssertEqual("Four", ps.get(2).text())
}
func testPseudoGreaterThan()throws {
let doc: Document = try SwiftSoup.parse("<div><p>One</p><p>Two</p><p>Three</p></div><div><p>Four</p>")
let ps: Elements = try doc.select("div p:gt(0)")
@ -434,215 +434,215 @@ class SelectorTest: XCTestCase {
try XCTAssertEqual("Two", ps.get(0).text())
try XCTAssertEqual("Three", ps.get(1).text())
}
func testPseudoEquals()throws {
let doc: Document = try SwiftSoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>")
let ps: Elements = try doc.select("div p:eq(0)")
XCTAssertEqual(2, ps.size())
try XCTAssertEqual("One", ps.get(0).text())
try XCTAssertEqual("Four", ps.get(1).text())
let ps2: Elements = try doc.select("div:eq(0) p:eq(0)")
XCTAssertEqual(1, ps2.size())
try XCTAssertEqual("One", ps2.get(0).text())
XCTAssertEqual("p", ps2.get(0).tagName())
}
func testPseudoBetween()throws {
let doc: Document = try SwiftSoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>")
let ps: Elements = try doc.select("div p:gt(0):lt(2)")
XCTAssertEqual(1, ps.size())
try XCTAssertEqual("Two", ps.get(0).text())
}
func testPseudoCombined()throws {
let doc: Document = try SwiftSoup.parse("<div class='foo'><p>One</p><p>Two</p></div><div><p>Three</p><p>Four</p></div>")
let ps: Elements = try doc.select("div.foo p:gt(0)")
XCTAssertEqual(1, ps.size())
try XCTAssertEqual("Two", ps.get(0).text())
}
func testPseudoHas()throws {
let doc: Document = try SwiftSoup.parse("<div id=0><p><span>Hello</span></p></div> <div id=1><span class=foo>There</span></div> <div id=2><p>Not</p></div>")
let divs1: Elements = try doc.select("div:has(span)")
XCTAssertEqual(2, divs1.size())
XCTAssertEqual("0", divs1.get(0).id())
XCTAssertEqual("1", divs1.get(1).id())
let divs2: Elements = try doc.select("div:has([class]")
XCTAssertEqual(1, divs2.size())
XCTAssertEqual("1", divs2.get(0).id())
let divs3: Elements = try doc.select("div:has(span, p)")
XCTAssertEqual(3, divs3.size())
XCTAssertEqual("0", divs3.get(0).id())
XCTAssertEqual("1", divs3.get(1).id())
XCTAssertEqual("2", divs3.get(2).id())
let els1: Elements = try doc.body()!.select(":has(p)")
XCTAssertEqual(3, els1.size()) // body, div, dib
XCTAssertEqual("body", els1.first()?.tagName())
XCTAssertEqual("0", els1.get(1).id())
XCTAssertEqual("2", els1.get(2).id())
}
func testNestedHas()throws {
let doc: Document = try SwiftSoup.parse("<div><p><span>One</span></p></div> <div><p>Two</p></div>")
var divs: Elements = try doc.select("div:has(p:has(span))")
XCTAssertEqual(1, divs.size())
try XCTAssertEqual("One", divs.first()?.text())
// test matches in has
divs = try doc.select("div:has(p:matches((?i)two))")
XCTAssertEqual(1, divs.size())
XCTAssertEqual("div", divs.first()?.tagName())
try XCTAssertEqual("Two", divs.first()?.text())
// test contains in has
divs = try doc.select("div:has(p:contains(two))")
XCTAssertEqual(1, divs.size())
XCTAssertEqual("div", divs.first()?.tagName())
try XCTAssertEqual("Two", divs.first()?.text())
}
func testPseudoContains()throws {
let doc: Document = try SwiftSoup.parse("<div><p>The Rain.</p> <p class=light>The <i>rain</i>.</p> <p>Rain, the.</p></div>")
let ps1: Elements = try doc.select("p:contains(Rain)")
XCTAssertEqual(3, ps1.size())
let ps2: Elements = try doc.select("p:contains(the rain)")
XCTAssertEqual(2, ps2.size())
try XCTAssertEqual("The Rain.", ps2.first()?.html())
try XCTAssertEqual("The <i>rain</i>.", ps2.last()?.html())
let ps3: Elements = try doc.select("p:contains(the Rain):has(i)")
XCTAssertEqual(1, ps3.size())
try XCTAssertEqual("light", ps3.first()?.className())
let ps4: Elements = try doc.select(".light:contains(rain)")
XCTAssertEqual(1, ps4.size())
try XCTAssertEqual("light", ps3.first()?.className())
let ps5: Elements = try doc.select(":contains(rain)")
XCTAssertEqual(8, ps5.size()) // html, body, div,...
}
func testPsuedoContainsWithParentheses()throws {
let doc: Document = try SwiftSoup.parse("<div><p id=1>This (is good)</p><p id=2>This is bad)</p>")
let ps1: Elements = try doc.select("p:contains(this (is good))")
XCTAssertEqual(1, ps1.size())
XCTAssertEqual("1", ps1.first()?.id())
let ps2: Elements = try doc.select("p:contains(this is bad\\))")
XCTAssertEqual(1, ps2.size())
XCTAssertEqual("2", ps2.first()?.id())
}
func testContainsOwn()throws {
let doc: Document = try SwiftSoup.parse("<p id=1>Hello <b>there</b> now</p>")
let ps: Elements = try doc.select("p:containsOwn(Hello now)")
XCTAssertEqual(1, ps.size())
XCTAssertEqual("1", ps.first()?.id())
XCTAssertEqual(0, try doc.select("p:containsOwn(there)").size())
}
func testMatches()throws {
let doc: Document = try SwiftSoup.parse("<p id=1>The <i>Rain</i></p> <p id=2>There are 99 bottles.</p> <p id=3>Harder (this)</p> <p id=4>Rain</p>")
let p1: Elements = try doc.select("p:matches(The rain)") // no match, case sensitive
XCTAssertEqual(0, p1.size())
let p2: Elements = try doc.select("p:matches((?i)the rain)") // case insense. should include root, html, body
XCTAssertEqual(1, p2.size())
XCTAssertEqual("1", p2.first()?.id())
let p4: Elements = try doc.select("p:matches((?i)^rain$)") // bounding
XCTAssertEqual(1, p4.size())
XCTAssertEqual("4", p4.first()?.id())
let p5: Elements = try doc.select("p:matches(\\d+)")
XCTAssertEqual(1, p5.size())
XCTAssertEqual("2", p5.first()?.id())
let p6: Elements = try doc.select("p:matches(\\w+\\s+\\(\\w+\\))") // test bracket matching
XCTAssertEqual(1, p6.size())
XCTAssertEqual("3", p6.first()?.id())
let p7: Elements = try doc.select("p:matches((?i)the):has(i)") // multi
XCTAssertEqual(1, p7.size())
XCTAssertEqual("1", p7.first()?.id())
}
func testMatchesOwn()throws {
let doc: Document = try SwiftSoup.parse("<p id=1>Hello <b>there</b> now</p>")
let p1: Elements = try doc.select("p:matchesOwn((?i)hello now)")
XCTAssertEqual(1, p1.size())
XCTAssertEqual("1", p1.first()?.id())
XCTAssertEqual(0, try doc.select("p:matchesOwn(there)").size())
}
func testRelaxedTags()throws {
let doc: Document = try SwiftSoup.parse("<abc_def id=1>Hello</abc_def> <abc-def id=2>There</abc-def>")
let el1: Elements = try doc.select("abc_def")
XCTAssertEqual(1, el1.size())
XCTAssertEqual("1", el1.first()?.id())
let el2: Elements = try doc.select("abc-def")
XCTAssertEqual(1, el2.size())
XCTAssertEqual("2", el2.first()?.id())
}
func testNotParas()throws {
let doc: Document = try SwiftSoup.parse("<p id=1>One</p> <p>Two</p> <p><span>Three</span></p>")
let el1: Elements = try doc.select("p:not([id=1])")
XCTAssertEqual(2, el1.size())
try XCTAssertEqual("Two", el1.first()?.text())
try XCTAssertEqual("Three", el1.last()?.text())
let el2: Elements = try doc.select("p:not(:has(span))")
XCTAssertEqual(2, el2.size())
try XCTAssertEqual("One", el2.first()?.text())
try XCTAssertEqual("Two", el2.last()?.text())
}
func testNotAll()throws {
let doc: Document = try SwiftSoup.parse("<p>Two</p> <p><span>Three</span></p>")
let el1: Elements = try doc.body()!.select(":not(p)") // should just be the span
XCTAssertEqual(2, el1.size())
XCTAssertEqual("body", el1.first()?.tagName())
XCTAssertEqual("span", el1.last()?.tagName())
}
func testNotClass()throws {
let doc: Document = try SwiftSoup.parse("<div class=left>One</div><div class=right id=1><p>Two</p></div>")
let el1: Elements = try doc.select("div:not(.left)")
XCTAssertEqual(1, el1.size())
XCTAssertEqual("1", el1.first()?.id())
}
func testHandlesCommasInSelector()throws {
let doc: Document = try SwiftSoup.parse("<p name='1,2'>One</p><div>Two</div><ol><li>123</li><li>Text</li></ol>")
let ps: Elements = try doc.select("[name=1,2]")
XCTAssertEqual(1, ps.size())
let containers: Elements = try doc.select("div, li:matches([0-9,]+)")
XCTAssertEqual(2, containers.size())
XCTAssertEqual("div", containers.get(0).tagName())
XCTAssertEqual("li", containers.get(1).tagName())
try XCTAssertEqual("123", containers.get(1).text())
}
func testSelectSupplementaryCharacter()throws {
#if !os(Linux)
let s = String(Character(UnicodeScalar(135361)!))
@ -655,34 +655,34 @@ class SelectorTest: XCTestCase {
func testSelectClassWithSpace()throws {
let html: String = "<div class=\"value\">class without space</div>\n"
+ "<div class=\"value \">class with space</div>"
let doc: Document = try SwiftSoup.parse(html)
var found: Elements = try doc.select("div[class=value ]")
XCTAssertEqual(2, found.size())
try XCTAssertEqual("class without space", found.get(0).text())
try XCTAssertEqual("class with space", found.get(1).text())
found = try doc.select("div[class=\"value \"]")
XCTAssertEqual(2, found.size())
try XCTAssertEqual("class without space", found.get(0).text())
try XCTAssertEqual("class with space", found.get(1).text())
found = try doc.select("div[class=\"value\\ \"]")
XCTAssertEqual(0, found.size())
}
func testSelectSameElements()throws {
let html: String = "<div>one</div><div>one</div>"
let doc: Document = try SwiftSoup.parse(html)
let els: Elements = try doc.select("div")
XCTAssertEqual(2, els.size())
let subSelect: Elements = try els.select(":contains(one)")
XCTAssertEqual(2, subSelect.size())
}
func testAttributeWithBrackets()throws {
let html: String = "<div data='End]'>One</div> <div data='[Another)]]'>Two</div>"
let doc: Document = try SwiftSoup.parse(html)
@ -692,62 +692,62 @@ class SelectorTest: XCTestCase {
XCTAssertEqual("One", try doc.select("div[data=\"End]\"").first()?.text())
XCTAssertEqual("Two", try doc.select("div[data=\"[Another)]]\"").first()?.text())
}
static var allTests = {
return [
("testByTag" , testByTag),
("testById" , testById),
("testByClass" , testByClass),
("testByAttribute" , testByAttribute),
("testNamespacedTag" , testNamespacedTag),
("testWildcardNamespacedTag" , testWildcardNamespacedTag),
("testByAttributeStarting" , testByAttributeStarting),
("testByAttributeRegex" , testByAttributeRegex),
("testByAttributeRegexCharacterClass" , testByAttributeRegexCharacterClass),
("testByAttributeRegexCombined" , testByAttributeRegexCombined),
("testCombinedWithContains" , testCombinedWithContains),
("testAllElements" , testAllElements),
("testAllWithClass" , testAllWithClass),
("testGroupOr" , testGroupOr),
("testGroupOrAttribute" , testGroupOrAttribute),
("testDescendant" , testDescendant),
("testAnd" , testAnd),
("testDeeperDescendant" , testDeeperDescendant),
("testParentChildElement" , testParentChildElement),
("testParentWithClassChild" , testParentWithClassChild),
("testParentChildStar" , testParentChildStar),
("testMultiChildDescent" , testMultiChildDescent),
("testCaseInsensitive" , testCaseInsensitive),
("testAdjacentSiblings" , testAdjacentSiblings),
("testAdjacentSiblingsWithId" , testAdjacentSiblingsWithId),
("testNotAdjacent" , testNotAdjacent),
("testMixCombinator" , testMixCombinator),
("testMixCombinatorGroup" , testMixCombinatorGroup),
("testGeneralSiblings" , testGeneralSiblings),
("testCharactersInIdAndClass" , testCharactersInIdAndClass),
("testSupportsLeadingCombinator" , testSupportsLeadingCombinator),
("testPseudoLessThan" , testPseudoLessThan),
("testPseudoGreaterThan" , testPseudoGreaterThan),
("testPseudoEquals" , testPseudoEquals),
("testPseudoBetween" , testPseudoBetween),
("testPseudoCombined" , testPseudoCombined),
("testPseudoHas" , testPseudoHas),
("testNestedHas" , testNestedHas),
("testPseudoContains" , testPseudoContains),
("testPsuedoContainsWithParentheses" , testPsuedoContainsWithParentheses),
("testContainsOwn" , testContainsOwn),
("testMatches" , testMatches),
("testMatchesOwn" , testMatchesOwn),
("testRelaxedTags" , testRelaxedTags),
("testNotParas" , testNotParas),
("testNotAll" , testNotAll),
("testNotClass" , testNotClass),
("testHandlesCommasInSelector" , testHandlesCommasInSelector),
("testSelectSupplementaryCharacter" , testSelectSupplementaryCharacter),
("testSelectClassWithSpace" , testSelectClassWithSpace),
("testSelectSameElements" , testSelectSameElements),
("testAttributeWithBrackets" , testAttributeWithBrackets)
("testByTag", testByTag),
("testById", testById),
("testByClass", testByClass),
("testByAttribute", testByAttribute),
("testNamespacedTag", testNamespacedTag),
("testWildcardNamespacedTag", testWildcardNamespacedTag),
("testByAttributeStarting", testByAttributeStarting),
("testByAttributeRegex", testByAttributeRegex),
("testByAttributeRegexCharacterClass", testByAttributeRegexCharacterClass),
("testByAttributeRegexCombined", testByAttributeRegexCombined),
("testCombinedWithContains", testCombinedWithContains),
("testAllElements", testAllElements),
("testAllWithClass", testAllWithClass),
("testGroupOr", testGroupOr),
("testGroupOrAttribute", testGroupOrAttribute),
("testDescendant", testDescendant),
("testAnd", testAnd),
("testDeeperDescendant", testDeeperDescendant),
("testParentChildElement", testParentChildElement),
("testParentWithClassChild", testParentWithClassChild),
("testParentChildStar", testParentChildStar),
("testMultiChildDescent", testMultiChildDescent),
("testCaseInsensitive", testCaseInsensitive),
("testAdjacentSiblings", testAdjacentSiblings),
("testAdjacentSiblingsWithId", testAdjacentSiblingsWithId),
("testNotAdjacent", testNotAdjacent),
("testMixCombinator", testMixCombinator),
("testMixCombinatorGroup", testMixCombinatorGroup),
("testGeneralSiblings", testGeneralSiblings),
("testCharactersInIdAndClass", testCharactersInIdAndClass),
("testSupportsLeadingCombinator", testSupportsLeadingCombinator),
("testPseudoLessThan", testPseudoLessThan),
("testPseudoGreaterThan", testPseudoGreaterThan),
("testPseudoEquals", testPseudoEquals),
("testPseudoBetween", testPseudoBetween),
("testPseudoCombined", testPseudoCombined),
("testPseudoHas", testPseudoHas),
("testNestedHas", testNestedHas),
("testPseudoContains", testPseudoContains),
("testPsuedoContainsWithParentheses", testPsuedoContainsWithParentheses),
("testContainsOwn", testContainsOwn),
("testMatches", testMatches),
("testMatchesOwn", testMatchesOwn),
("testRelaxedTags", testRelaxedTags),
("testNotParas", testNotParas),
("testNotAll", testNotAll),
("testNotClass", testNotClass),
("testHandlesCommasInSelector", testHandlesCommasInSelector),
("testSelectSupplementaryCharacter", testSelectSupplementaryCharacter),
("testSelectClassWithSpace", testSelectClassWithSpace),
("testSelectSameElements", testSelectSameElements),
("testAttributeWithBrackets", testAttributeWithBrackets)
]
}()
}

View File

@ -10,17 +10,17 @@ import XCTest
import SwiftSoup
class StringUtilTest: XCTestCase {
override func setUp() {
super.setUp()
// Put setup code here. This method is called before the invocation of each test method in the class.
}
override func tearDown() {
// Put teardown code here. This method is called after the invocation of each test method in the class.
super.tearDown()
}
// func testSite()
// {
// let myURLString = "http://comcast.net"
@ -40,42 +40,41 @@ class StringUtilTest: XCTestCase {
// print("Error")
// }
// }
func testJoin() {
XCTAssertEqual("",StringUtil.join([""], sep: " "))
XCTAssertEqual("one",StringUtil.join(["one"], sep: " "))
XCTAssertEqual("one two three",StringUtil.join(["one", "two", "three"], sep: " "))
XCTAssertEqual("", StringUtil.join([""], sep: " "))
XCTAssertEqual("one", StringUtil.join(["one"], sep: " "))
XCTAssertEqual("one two three", StringUtil.join(["one", "two", "three"], sep: " "))
}
func testPadding() {
XCTAssertEqual("", StringUtil.padding(0))
XCTAssertEqual(" ", StringUtil.padding(1))
XCTAssertEqual(" ", StringUtil.padding(2))
XCTAssertEqual(" ", StringUtil.padding(15))
}
func testIsBlank() {
//XCTAssertTrue(StringUtil.isBlank(nil))
XCTAssertTrue(StringUtil.isBlank(""))
XCTAssertTrue(StringUtil.isBlank(" "))
XCTAssertTrue(StringUtil.isBlank(" \r\n "))
XCTAssertFalse(StringUtil.isBlank("hello"))
XCTAssertFalse(StringUtil.isBlank(" hello "))
}
func testIsNumeric() {
// XCTAssertFalse(StringUtil.isNumeric(nil))
XCTAssertFalse(StringUtil.isNumeric(" "))
XCTAssertFalse(StringUtil.isNumeric("123 546"))
XCTAssertFalse(StringUtil.isNumeric("hello"))
XCTAssertFalse(StringUtil.isNumeric("123.334"))
XCTAssertTrue(StringUtil.isNumeric("1"))
XCTAssertTrue(StringUtil.isNumeric("1234"))
}
func testIsWhitespace() {
XCTAssertTrue(StringUtil.isWhitespace("\t"))
XCTAssertTrue(StringUtil.isWhitespace("\n"))
@ -83,12 +82,12 @@ class StringUtilTest: XCTestCase {
XCTAssertTrue(StringUtil.isWhitespace(Character.BackslashF))
XCTAssertTrue(StringUtil.isWhitespace("\r\n"))
XCTAssertTrue(StringUtil.isWhitespace(" "))
XCTAssertFalse(StringUtil.isWhitespace("\u{00a0}"))
XCTAssertFalse(StringUtil.isWhitespace("\u{2000}"))
XCTAssertFalse(StringUtil.isWhitespace("\u{3000}"))
}
func testNormaliseWhiteSpace() {
XCTAssertEqual(" ", StringUtil.normaliseWhitespace(" \r \n \r\n"))
XCTAssertEqual(" hello there ", StringUtil.normaliseWhitespace(" hello \r \n there \n"))
@ -99,12 +98,12 @@ class StringUtilTest: XCTestCase {
func testNormaliseWhiteSpaceHandlesHighSurrogates()throws {
let test71540chars = "\\u{d869}\\u{deb2}\\u{304b}\\u{309a} 1"
let test71540charsExpectedSingleWhitespace = "\\u{d869}\\u{deb2}\\u{304b}\\u{309a} 1"
XCTAssertEqual(test71540charsExpectedSingleWhitespace, StringUtil.normaliseWhitespace(test71540chars))
let extractedText = try SwiftSoup.parse(test71540chars).text()
XCTAssertEqual(test71540charsExpectedSingleWhitespace, extractedText)
}
func testResolvesRelativeUrls() {
XCTAssertEqual("http://example.com/one/two?three", StringUtil.resolve("http://example.com", relUrl: "./one/two?three"))
XCTAssertEqual("http://example.com/one/two?three", StringUtil.resolve("http://example.com?one", relUrl: "./one/two?three"))
@ -127,7 +126,7 @@ class StringUtilTest: XCTestCase {
self.measure {
// Put the code you want to measure the time of here.
}
}
}

View File

@ -10,10 +10,10 @@ import XCTest
import SwiftSoup
class SwiftSoupTests: XCTestCase {
static var allTests = {
return [
]
}()
}

View File

@ -10,77 +10,77 @@ import XCTest
import SwiftSoup
class TagTest: XCTestCase {
func testIsCaseSensitive()throws {
let p1: Tag = try Tag.valueOf("P")
let p2: Tag = try Tag.valueOf("p")
XCTAssertFalse(p1.equals(p2))
}
func testCanBeInsensitive()throws {
let p1: Tag = try Tag.valueOf("P", ParseSettings.htmlDefault)
let p2: Tag = try Tag.valueOf("p", ParseSettings.htmlDefault)
XCTAssertEqual(p1, p2)
}
func testTrims()throws {
let p1: Tag = try Tag.valueOf("p")
let p2: Tag = try Tag.valueOf(" p ")
XCTAssertEqual(p1, p2)
}
func testEquality()throws {
let p1: Tag = try Tag.valueOf("p")
let p2: Tag = try Tag.valueOf("p")
XCTAssertTrue(p1.equals(p2))
XCTAssertTrue(p1 == p2)
}
func testDivSemantics()throws {
let div = try Tag.valueOf("div")
XCTAssertTrue(div.isBlock())
XCTAssertTrue(div.formatAsBlock())
}
func testPSemantics()throws {
let p = try Tag.valueOf("p")
XCTAssertTrue(p.isBlock())
XCTAssertFalse(p.formatAsBlock())
}
func testImgSemantics()throws {
let img = try Tag.valueOf("img")
XCTAssertTrue(img.isInline())
XCTAssertTrue(img.isSelfClosing())
XCTAssertFalse(img.isBlock())
}
func testDefaultSemantics()throws {
let foo = try Tag.valueOf("FOO") // not defined
let foo2 = try Tag.valueOf("FOO")
XCTAssertEqual(foo, foo2)
XCTAssertTrue(foo.isInline())
XCTAssertTrue(foo.formatAsBlock())
}
func testValueOfChecksNotEmpty() {
XCTAssertThrowsError(try Tag.valueOf(" "))
}
static var allTests = {
return [
("testIsCaseSensitive" , testIsCaseSensitive),
("testCanBeInsensitive" , testCanBeInsensitive),
("testTrims" , testTrims),
("testEquality" , testEquality),
("testDivSemantics" , testDivSemantics),
("testPSemantics" , testPSemantics),
("testImgSemantics" , testImgSemantics),
("testDefaultSemantics" , testDefaultSemantics),
("testValueOfChecksNotEmpty" , testValueOfChecksNotEmpty),
("testIsCaseSensitive", testIsCaseSensitive),
("testCanBeInsensitive", testCanBeInsensitive),
("testTrims", testTrims),
("testEquality", testEquality),
("testDivSemantics", testDivSemantics),
("testPSemantics", testPSemantics),
("testImgSemantics", testImgSemantics),
("testDefaultSemantics", testDefaultSemantics),
("testValueOfChecksNotEmpty", testValueOfChecksNotEmpty),
]
}()
}

View File

@ -10,41 +10,41 @@ import XCTest
@testable import SwiftSoup
class TextNodeTest: XCTestCase {
func testBlank() {
let one = TextNode("", "")
let two = TextNode(" ", "")
let three = TextNode(" \n\n ", "")
let four = TextNode("Hello", "")
let five = TextNode(" \nHello ", "")
XCTAssertTrue(one.isBlank())
XCTAssertTrue(two.isBlank())
XCTAssertTrue(three.isBlank())
XCTAssertFalse(four.isBlank())
XCTAssertFalse(five.isBlank())
}
func testTextBean()throws {
let doc = try SwiftSoup.parse("<p>One <span>two &amp;</span> three &amp;</p>")
let p : Element = try doc.select("p").first()!
let p: Element = try doc.select("p").first()!
let span: Element = try doc.select("span").first()!
XCTAssertEqual("two &", try span.text())
let spanText: TextNode = span.childNode(0) as! TextNode
XCTAssertEqual("two &", spanText.text())
let tn: TextNode = p.childNode(2) as! TextNode
XCTAssertEqual(" three &", tn.text())
tn.text(" POW!")
XCTAssertEqual("One <span>two &amp;</span> POW!", TextUtil.stripNewlines(try p.html()))
try _ = tn.attr("text", "kablam &")
XCTAssertEqual("kablam &", tn.text())
XCTAssertEqual("One <span>two &amp;</span>kablam &amp;", try TextUtil.stripNewlines(p.html()))
}
func testSplitText()throws {
let doc: Document = try SwiftSoup.parse("<div>Hello there</div>")
let div: Element = try doc.select("div").first()!
@ -56,33 +56,32 @@ class TextNodeTest: XCTestCase {
XCTAssertEqual("Hello there!", try div.text())
XCTAssertTrue(tn.parent() == tail.parent())
}
func testSplitAnEmbolden()throws {
let doc: Document = try SwiftSoup.parse("<div>Hello there</div>")
let div: Element = try doc.select("div").first()!
let tn: TextNode = div.childNode(0) as! TextNode
let tail: TextNode = try tn.splitText(6)
try tail.wrap("<b></b>")
XCTAssertEqual("Hello <b>there</b>", TextUtil.stripNewlines(try div.html())) // not great that we get \n<b>there there... must correct
}
func testWithSupplementaryCharacter()throws{
func testWithSupplementaryCharacter()throws {
#if !os(Linux)
let doc: Document = try SwiftSoup.parse(String(Character(UnicodeScalar(135361)!)))
let t: TextNode = doc.body()!.textNodes()[0]
XCTAssertEqual(String(Character(UnicodeScalar(135361)!)), try t.outerHtml().trim())
#endif
}
static var allTests = {
return [
("testBlank" , testBlank),
("testTextBean" , testTextBean),
("testSplitText" , testSplitText),
("testSplitAnEmbolden" , testSplitAnEmbolden),
("testWithSupplementaryCharacter" , testWithSupplementaryCharacter)
("testBlank", testBlank),
("testTextBean", testTextBean),
("testSplitText", testSplitText),
("testSplitAnEmbolden", testSplitAnEmbolden),
("testWithSupplementaryCharacter", testWithSupplementaryCharacter)
]
}()
}

View File

@ -10,7 +10,7 @@ import Foundation
@testable import SwiftSoup
class TextUtil {
public static func stripNewlines(_ text: String)->String {
public static func stripNewlines(_ text: String) -> String {
let regex = try! NCRegularExpression(pattern: "\\n\\s*", options: .caseInsensitive)
var str = text
str = regex.stringByReplacingMatches(in: str, options: [], range: NSRange(0..<str.utf16.count), withTemplate: "")

View File

@ -10,68 +10,68 @@ import XCTest
import SwiftSoup
class TokenQueueTest: XCTestCase {
func testChompBalanced() {
let tq = TokenQueue(":contains(one (two) three) four")
let pre = tq.consumeTo("(")
let guts = tq.chompBalanced("(", ")")
let remainder = tq.remainder()
XCTAssertEqual(":contains", pre)
XCTAssertEqual("one (two) three", guts)
XCTAssertEqual(" four", remainder)
}
func testChompEscapedBalanced() {
let tq = TokenQueue(":contains(one (two) \\( \\) \\) three) four")
let pre = tq.consumeTo("(")
let guts = tq.chompBalanced("(", ")")
let remainder = tq.remainder()
XCTAssertEqual(":contains", pre)
XCTAssertEqual("one (two) \\( \\) \\) three", guts)
XCTAssertEqual("one (two) ( ) ) three", TokenQueue.unescape(guts))
XCTAssertEqual(" four", remainder)
}
func testChompBalancedMatchesAsMuchAsPossible() {
let tq = TokenQueue("unbalanced(something(or another")
tq.consumeTo("(")
let match = tq.chompBalanced("(", ")")
XCTAssertEqual("something(or another", match)
}
func testUnescape() {
XCTAssertEqual("one ( ) \\", TokenQueue.unescape("one \\( \\) \\\\"))
}
func testChompToIgnoreCase() {
let t = "<textarea>one < two </TEXTarea>"
var tq = TokenQueue(t)
var data = tq.chompToIgnoreCase("</textarea")
XCTAssertEqual("<textarea>one < two ", data)
tq = TokenQueue("<textarea> one two < three </oops>")
data = tq.chompToIgnoreCase("</textarea")
XCTAssertEqual("<textarea> one two < three </oops>", data)
}
func testAddFirst() {
let tq = TokenQueue("One Two")
tq.consumeWord()
tq.addFirst("Three")
XCTAssertEqual("Three Two", tq.remainder())
}
static var allTests = {
return [
("testChompBalanced" , testChompBalanced),
("testChompEscapedBalanced" , testChompEscapedBalanced),
("testChompBalancedMatchesAsMuchAsPossible" , testChompBalancedMatchesAsMuchAsPossible),
("testUnescape" , testUnescape),
("testChompToIgnoreCase" , testChompToIgnoreCase),
("testAddFirst" , testAddFirst)
("testChompBalanced", testChompBalanced),
("testChompEscapedBalanced", testChompEscapedBalanced),
("testChompBalancedMatchesAsMuchAsPossible", testChompBalancedMatchesAsMuchAsPossible),
("testUnescape", testUnescape),
("testChompToIgnoreCase", testChompToIgnoreCase),
("testAddFirst", testAddFirst)
]
}()
}

View File

@ -10,36 +10,36 @@ import XCTest
import SwiftSoup
class XmlTreeBuilderTest: XCTestCase {
func testSimpleXmlParse()throws {
let xml = "<doc id=2 href='/bar'>Foo <br /><link>One</link><link>Two</link></doc>"
let tb: XmlTreeBuilder = XmlTreeBuilder()
let doc: Document = try tb.parse(xml, "http://foo.com/")
XCTAssertEqual("<doc id=\"2\" href=\"/bar\">Foo <br /><link>One</link><link>Two</link></doc>",try TextUtil.stripNewlines(doc.html()))
XCTAssertEqual("<doc id=\"2\" href=\"/bar\">Foo <br /><link>One</link><link>Two</link></doc>", try TextUtil.stripNewlines(doc.html()))
XCTAssertEqual(try doc.getElementById("2")?.absUrl("href"), "http://foo.com/bar")
}
func testPopToClose()throws {
// test: </val> closes Two, </bar> ignored
let xml = "<doc><val>One<val>Two</val></bar>Three</doc>"
let tb: XmlTreeBuilder = XmlTreeBuilder()
let doc = try tb.parse(xml, "http://foo.com/")
XCTAssertEqual("<doc><val>One<val>Two</val>Three</val></doc>",try TextUtil.stripNewlines(doc.html()))
XCTAssertEqual("<doc><val>One<val>Two</val>Three</val></doc>", try TextUtil.stripNewlines(doc.html()))
}
func testCommentAndDocType()throws {
let xml = "<!DOCTYPE HTML><!-- a comment -->One <qux />Two"
let tb: XmlTreeBuilder = XmlTreeBuilder()
let doc = try tb.parse(xml, "http://foo.com/")
XCTAssertEqual("<!DOCTYPE HTML><!-- a comment -->One <qux />Two",try TextUtil.stripNewlines(doc.html()))
XCTAssertEqual("<!DOCTYPE HTML><!-- a comment -->One <qux />Two", try TextUtil.stripNewlines(doc.html()))
}
func testSupplyParserToJsoupClass()throws {
let xml = "<doc><val>One<val>Two</val></bar>Three</doc>"
let doc = try SwiftSoup.parse(xml, "http://foo.com/", Parser.xmlParser())
try XCTAssertEqual("<doc><val>One<val>Two</val>Three</val></doc>",TextUtil.stripNewlines(doc.html()))
try XCTAssertEqual("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html()))
}
//TODO: nabil
// public void testSupplyParserToConnection() throws IOException {
// String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml";
@ -57,7 +57,7 @@ class XmlTreeBuilderTest: XCTestCase {
// XCTAssertEqual(0, xmlDoc.select("head").size()); // xml parser does not
// XCTAssertEqual(0, autoXmlDoc.select("head").size()); // xml parser does not
// }
//TODO: nabil
// func testSupplyParserToDataStream()throws {
// let testBundle = Bundle(for: type(of: self))
@ -68,40 +68,39 @@ class XmlTreeBuilderTest: XCTestCase {
// XCTAssertEqual("<doc><val>One<val>Two</val>Three</val></doc>",
// TextUtil.stripNewlines(doc.html()));
// }
func testDoesNotForceSelfClosingKnownTags()throws {
// html will force "<br>one</br>" to logically "<br />One<br />". XML should be stay "<br>one</br> -- don't recognise tag.
let htmlDoc = try SwiftSoup.parse("<br>one</br>")
XCTAssertEqual("<br>one\n<br>", try htmlDoc.body()?.html())
let xmlDoc = try SwiftSoup.parse("<br>one</br>", "",Parser.xmlParser())
let xmlDoc = try SwiftSoup.parse("<br>one</br>", "", Parser.xmlParser())
XCTAssertEqual("<br>one</br>", try xmlDoc.html())
}
func testHandlesXmlDeclarationAsDeclaration()throws {
let html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->"
let doc = try SwiftSoup.parse(html, "", Parser.xmlParser())
try XCTAssertEqual("<?xml encoding=\"UTF-8\"?> <body> One </body> <!-- comment -->",StringUtil.normaliseWhitespace(doc.outerHtml()))
try XCTAssertEqual("<?xml encoding=\"UTF-8\"?> <body> One </body> <!-- comment -->", StringUtil.normaliseWhitespace(doc.outerHtml()))
XCTAssertEqual("#declaration", doc.childNode(0).nodeName())
XCTAssertEqual("#comment", doc.childNode(2).nodeName())
}
func testXmlFragment()throws {
let xml = "<one src='/foo/' />Two<three><four /></three>"
let nodes: Array<Node> = try Parser.parseXmlFragment(xml, "http://example.com/")
XCTAssertEqual(3, nodes.count)
try XCTAssertEqual("http://example.com/foo/", nodes[0].absUrl("src"))
XCTAssertEqual("one", nodes[0].nodeName())
XCTAssertEqual("Two", (nodes[1] as? TextNode)?.text())
}
func testXmlParseDefaultsToHtmlOutputSyntax()throws {
let doc = try SwiftSoup.parse("x", "", Parser.xmlParser())
XCTAssertEqual(OutputSettings.Syntax.xml, doc.outputSettings().syntax())
}
func testDoesHandleEOFInTag()throws {
let html = "<img src=asdf onerror=\"alert(1)\" x="
let xmlDoc = try SwiftSoup.parse(html, "", Parser.xmlParser())
@ -116,7 +115,7 @@ class XmlTreeBuilderTest: XCTestCase {
// XCTAssertEqual("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?> <data>äöåéü</data>",
// TextUtil.stripNewlines(doc.html()));
// }
func testParseDeclarationAttributes()throws {
let xml = "<?xml version='1' encoding='UTF-8' something='else'?><val>One</val>"
let doc = try SwiftSoup.parse(xml, "", Parser.xmlParser())
@ -127,13 +126,13 @@ class XmlTreeBuilderTest: XCTestCase {
try XCTAssertEqual("version=\"1\" encoding=\"UTF-8\" something=\"else\"", decl.getWholeDeclaration())
try XCTAssertEqual("<?xml version=\"1\" encoding=\"UTF-8\" something=\"else\"?>", decl.outerHtml())
}
func testCaseSensitiveDeclaration()throws {
let xml = "<?XML version='1' encoding='UTF-8' something='else'?>"
let doc = try SwiftSoup.parse(xml, "", Parser.xmlParser())
try XCTAssertEqual("<?XML version=\"1\" encoding=\"UTF-8\" something=\"else\"?>", doc.outerHtml())
}
func testCreatesValidProlog()throws {
let document = Document.createShell("")
document.outputSettings().syntax(syntax: OutputSettings.Syntax.xml)
@ -144,37 +143,37 @@ class XmlTreeBuilderTest: XCTestCase {
" <body></body>\n" +
"</html>", document.outerHtml())
}
func preservesCaseByDefault()throws {
let xml = "<TEST ID=1>Check</TEST>"
let doc = try SwiftSoup.parse(xml, "", Parser.xmlParser())
try XCTAssertEqual("<TEST ID=\"1\">Check</TEST>", TextUtil.stripNewlines(doc.html()))
}
func canNormalizeCase()throws {
let xml = "<TEST ID=1>Check</TEST>"
let doc = try SwiftSoup.parse(xml, "", Parser.xmlParser().settings(ParseSettings.htmlDefault))
try XCTAssertEqual("<test id=\"1\">Check</test>", TextUtil.stripNewlines(doc.html()))
}
static var allTests = {
return [
("testSimpleXmlParse" , testSimpleXmlParse),
("testPopToClose" , testPopToClose),
("testCommentAndDocType" , testCommentAndDocType),
("testSupplyParserToJsoupClass" , testSupplyParserToJsoupClass),
("testDoesNotForceSelfClosingKnownTags" , testDoesNotForceSelfClosingKnownTags),
("testHandlesXmlDeclarationAsDeclaration" , testHandlesXmlDeclarationAsDeclaration),
("testXmlFragment" , testXmlFragment),
("testXmlParseDefaultsToHtmlOutputSyntax" , testXmlParseDefaultsToHtmlOutputSyntax),
("testDoesHandleEOFInTag" , testDoesHandleEOFInTag),
("testParseDeclarationAttributes" , testParseDeclarationAttributes),
("testCaseSensitiveDeclaration" , testCaseSensitiveDeclaration),
("testCreatesValidProlog" , testCreatesValidProlog),
("preservesCaseByDefault" , preservesCaseByDefault),
("canNormalizeCase" , canNormalizeCase)
("testSimpleXmlParse", testSimpleXmlParse),
("testPopToClose", testPopToClose),
("testCommentAndDocType", testCommentAndDocType),
("testSupplyParserToJsoupClass", testSupplyParserToJsoupClass),
("testDoesNotForceSelfClosingKnownTags", testDoesNotForceSelfClosingKnownTags),
("testHandlesXmlDeclarationAsDeclaration", testHandlesXmlDeclarationAsDeclaration),
("testXmlFragment", testXmlFragment),
("testXmlParseDefaultsToHtmlOutputSyntax", testXmlParseDefaultsToHtmlOutputSyntax),
("testDoesHandleEOFInTag", testDoesHandleEOFInTag),
("testParseDeclarationAttributes", testParseDeclarationAttributes),
("testCaseSensitiveDeclaration", testCaseSensitiveDeclaration),
("testCreatesValidProlog", testCreatesValidProlog),
("preservesCaseByDefault", preservesCaseByDefault),
("canNormalizeCase", canNormalizeCase)
]
}()
}