170 lines
7.5 KiB
Swift
170 lines
7.5 KiB
Swift
//
|
||
// EntitiesTest.swift
|
||
// SwiftSoup
|
||
//
|
||
// Created by Nabil Chatbi on 09/10/16.
|
||
// Copyright © 2016 Nabil Chatbi.. All rights reserved.
|
||
//
|
||
|
||
import Foundation
|
||
import XCTest
|
||
import SwiftSoup
|
||
|
||
class EntitiesTest: XCTestCase {
|
||
|
||
func testLinuxTestSuiteIncludesAllTests() {
|
||
#if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
|
||
let thisClass = type(of: self)
|
||
let linuxCount = thisClass.allTests.count
|
||
let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
|
||
XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
|
||
#endif
|
||
}
|
||
|
||
func testEscape()throws {
|
||
let text = "Hello &<> Å å π 新 there ¾ © »"
|
||
|
||
let escapedAscii = Entities.escape(text, OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base))
|
||
let escapedAsciiFull = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.extended))
|
||
let escapedAsciiXhtml = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.xhtml))
|
||
let escapedUtfFull = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.extended))
|
||
let escapedUtfFull2 = Entities.escape(text)
|
||
let escapedUtfMin = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.xhtml))
|
||
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAscii)
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiFull)
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiXhtml)
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull)
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull2)
|
||
XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfMin)
|
||
// odd that it's defined as aring in base but angst in full
|
||
|
||
// round trip
|
||
XCTAssertEqual(text, try Entities.unescape(escapedAscii))
|
||
XCTAssertEqual(text, try Entities.unescape(escapedAsciiFull))
|
||
XCTAssertEqual(text, try Entities.unescape(escapedAsciiXhtml))
|
||
XCTAssertEqual(text, try Entities.unescape(escapedUtfFull))
|
||
XCTAssertEqual(text, try Entities.unescape(escapedUtfFull2))
|
||
XCTAssertEqual(text, try Entities.unescape(escapedUtfMin))
|
||
}
|
||
|
||
func testXhtml() {
|
||
//let text = "& > < "";
|
||
XCTAssertEqual(UnicodeScalar(38), Entities.EscapeMode.xhtml.codepointForName("amp"))
|
||
XCTAssertEqual(UnicodeScalar(62), Entities.EscapeMode.xhtml.codepointForName("gt"))
|
||
XCTAssertEqual(UnicodeScalar(60), Entities.EscapeMode.xhtml.codepointForName("lt"))
|
||
XCTAssertEqual(UnicodeScalar(34), Entities.EscapeMode.xhtml.codepointForName("quot"))
|
||
|
||
XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(38)!))
|
||
XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(62)!))
|
||
XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(60)!))
|
||
XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(34)!))
|
||
}
|
||
|
||
func testGetByName() {
|
||
//XCTAssertEqual("≫⃒", Entities.getByName(name: "nGt"));//todo:nabil same codepoint 8811 in java but charachters different
|
||
//XCTAssertEqual("fj", Entities.getByName(name: "fjlig"));
|
||
XCTAssertEqual("≫", Entities.getByName(name: "gg"))
|
||
XCTAssertEqual("©", Entities.getByName(name: "copy"))
|
||
}
|
||
|
||
func testEscapeSupplementaryCharacter() {
|
||
let text: String = "𡃁"
|
||
let escapedAscii: String = Entities.escape(text, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.base))
|
||
XCTAssertEqual("𡃁", escapedAscii)
|
||
let escapedUtf: String = Entities.escape(text, OutputSettings().charset(.utf8).escapeMode(Entities.EscapeMode.base))
|
||
XCTAssertEqual(text, escapedUtf)
|
||
}
|
||
|
||
func testNotMissingMultis()throws {
|
||
let text: String = "⫽⃥"
|
||
let un: String = "\u{2AFD}\u{20E5}"
|
||
XCTAssertEqual(un, try Entities.unescape(text))
|
||
}
|
||
|
||
func testnotMissingSupplementals()throws {
|
||
let text: String = "⨔ 𝔮"
|
||
let un: String = "⨔ 𝔮"//+"\u{D835}\u{DD2E}" // 𝔮
|
||
XCTAssertEqual(un, try Entities.unescape(text))
|
||
}
|
||
|
||
func testUnescape()throws {
|
||
let text: String = "Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©"
|
||
XCTAssertEqual("Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©", try Entities.unescape(text))
|
||
|
||
XCTAssertEqual("&0987654321; &unknown", try Entities.unescape("&0987654321; &unknown"))
|
||
}
|
||
|
||
func testStrictUnescape()throws { // for attributes, enforce strict unescaping (must look like &#xxx; , not just &#xxx)
|
||
let text: String = "Hello &= &"
|
||
XCTAssertEqual("Hello &= &", try Entities.unescape(string: text, strict: true))
|
||
XCTAssertEqual("Hello &= &", try Entities.unescape(text))
|
||
XCTAssertEqual("Hello &= &", try Entities.unescape(string: text, strict: false))
|
||
}
|
||
|
||
func testCaseSensitive()throws {
|
||
let unescaped: String = "Ü ü & &"
|
||
XCTAssertEqual("Ü ü & &",
|
||
Entities.escape(unescaped, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.extended)))
|
||
|
||
let escaped: String = "Ü ü & &"
|
||
XCTAssertEqual("Ü ü & &", try Entities.unescape(escaped))
|
||
}
|
||
|
||
func testQuoteReplacements()throws {
|
||
let escaped: String = "\ $"
|
||
let unescaped: String = "\\ $"
|
||
|
||
XCTAssertEqual(unescaped, try Entities.unescape(escaped))
|
||
}
|
||
|
||
func testLetterDigitEntities()throws {
|
||
let html: String = "<p>¹²³¼½¾</p>"
|
||
let doc: Document = try SwiftSoup.parse(html)
|
||
doc.outputSettings().charset(.ascii)
|
||
let p: Element = try doc.select("p").first()!
|
||
XCTAssertEqual("¹²³¼½¾", try p.html())
|
||
XCTAssertEqual("¹²³¼½¾", try p.text())
|
||
doc.outputSettings().charset(.utf8)
|
||
XCTAssertEqual("¹²³¼½¾", try p.html())
|
||
}
|
||
|
||
func testNoSpuriousDecodes()throws {
|
||
let string: String = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2"
|
||
XCTAssertEqual(string, try Entities.unescape(string))
|
||
}
|
||
|
||
func testUscapesGtInXmlAttributesButNotInHtml()throws {
|
||
// https://github.com/jhy/jsoup/issues/528 - < is OK in HTML attribute values, but not in XML
|
||
|
||
let docHtml: String = "<a title='<p>One</p>'>One</a>"
|
||
let doc: Document = try SwiftSoup.parse(docHtml)
|
||
let element: Element = try doc.select("a").first()!
|
||
|
||
doc.outputSettings().escapeMode(Entities.EscapeMode.base)
|
||
XCTAssertEqual("<a title=\"<p>One</p>\">One</a>", try element.outerHtml())
|
||
|
||
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml)
|
||
XCTAssertEqual("<a title=\"<p>One</p>\">One</a>", try element.outerHtml())
|
||
}
|
||
|
||
static var allTests = {
|
||
return [
|
||
("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
|
||
("testEscape", testEscape),
|
||
("testXhtml", testXhtml),
|
||
("testGetByName", testGetByName),
|
||
("testEscapeSupplementaryCharacter", testEscapeSupplementaryCharacter),
|
||
("testNotMissingMultis", testNotMissingMultis),
|
||
("testnotMissingSupplementals", testnotMissingSupplementals),
|
||
("testUnescape", testUnescape),
|
||
("testStrictUnescape", testStrictUnescape),
|
||
("testCaseSensitive", testCaseSensitive),
|
||
("testQuoteReplacements", testQuoteReplacements),
|
||
("testLetterDigitEntities", testLetterDigitEntities),
|
||
("testNoSpuriousDecodes", testNoSpuriousDecodes),
|
||
("testUscapesGtInXmlAttributesButNotInHtml", testUscapesGtInXmlAttributesButNotInHtml)
|
||
]
|
||
}()
|
||
}
|