Convert all tag sets to constant arrays, use native contains()

This commit is contained in:
Garth Snyder 2019-03-03 19:54:09 -08:00
parent 904ebe27fa
commit 6af808ef63
3 changed files with 176 additions and 143 deletions

View File

@ -12,21 +12,27 @@ import Foundation
* HTML Tree Builder; creates a DOM from Tokens.
*/
class HtmlTreeBuilder: TreeBuilder {
// tag searches
public static let TagsSearchInScope: [String] = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
private static let TagSearchList: [String] = ["ol", "ul"]
private static let TagSearchButton: [String] = ["button"]
private static let TagSearchTableScope: [String] = ["html", "table"]
private static let TagSearchSelectScope: [String] = ["optgroup", "option"]
private static let TagSearchEndTags: [String] = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
private static let TagSearchSpecial: [String] = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"]
private enum TagSets {
// tag searches
static let inScope = ["applet", "caption", "html", "table", "td", "th", "marquee", "object"]
static let list = ["ol", "ul"]
static let button = ["button"]
static let tableScope = ["html", "table"]
static let selectScope = ["optgroup", "option"]
static let endTags = ["dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"]
static let titleTextarea = ["title", "textarea"]
static let frames = ["iframe", "noembed", "noframes", "style", "xmp"]
static let special: Set<String> = ["address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"]
}
private var _state: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // the current state
private var _originalState: HtmlTreeBuilderState = HtmlTreeBuilderState.Initial // original / marked state
@ -71,19 +77,19 @@ class HtmlTreeBuilder: TreeBuilder {
}
// initialise the tokeniser state:
let contextTag: String = context.tagName()
if (StringUtil.inString(contextTag, haystack: "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata)
} else if (StringUtil.inString(contextTag, haystack: "iframe", "noembed", "noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext)
} else if (contextTag=="script") {
tokeniser.transition(TokeniserState.ScriptData)
} else if (contextTag==("noscript")) {
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
} else if (contextTag=="plaintext") {
tokeniser.transition(TokeniserState.Data)
} else {
tokeniser.transition(TokeniserState.Data) // default
switch context.tagName() {
case TagSets.titleTextarea:
tokeniser.transition(TokeniserState.Rcdata)
case TagSets.frames:
tokeniser.transition(TokeniserState.Rawtext)
case "script":
tokeniser.transition(TokeniserState.ScriptData)
case "noscript":
tokeniser.transition(TokeniserState.Data) // if scripting enabled, rawtext
case "plaintext":
tokeniser.transition(TokeniserState.Data)
default:
tokeniser.transition(TokeniserState.Data)
}
root = try Element(Tag.valueOf("html", settings), baseUri)
@ -346,7 +352,7 @@ class HtmlTreeBuilder: TreeBuilder {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
stack.remove(at: pos)
if (StringUtil.inString(next.nodeName(), elNames)) {
if elNames.contains(next.nodeName()) {
break
}
}
@ -381,7 +387,8 @@ class HtmlTreeBuilder: TreeBuilder {
private func clearStackToContext(_ nodeNames: [String]) {
for pos in (0..<stack.count).reversed() {
let next: Element = stack[pos]
if (StringUtil.inString(next.nodeName(), nodeNames) || next.nodeName()=="html") {
let nextName = next.nodeName()
if nodeNames.contains(nextName) || nextName == "html" {
break
} else {
stack.remove(at: pos)
@ -482,25 +489,21 @@ class HtmlTreeBuilder: TreeBuilder {
}
}
// todo: tidy up in specific scope methods
private var specificScopeTarget: [String?] = [nil]
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
specificScopeTarget[0] = targetName
return try inSpecificScope(specificScopeTarget, baseTypes, extraTypes)
private func inSpecificScope(_ targetName: String, _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
return try inSpecificScope([targetName], baseTypes, extraTypes)
}
private func inSpecificScope(_ targetNames: [String?], _ baseTypes: [String], _ extraTypes: [String]?)throws->Bool {
private func inSpecificScope(_ targetNames: [String], _ baseTypes: [String], _ extraTypes: [String]? = nil)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (StringUtil.inString(elName, targetNames)) {
let el = stack[pos]
let elName = el.nodeName()
if targetNames.contains(elName) {
return true
}
if (StringUtil.inString(elName, baseTypes)) {
if baseTypes.contains(elName) {
return false
}
if (extraTypes != nil && StringUtil.inString(elName, extraTypes!)) {
if let extraTypes = extraTypes, extraTypes.contains(elName) {
return false
}
}
@ -509,39 +512,34 @@ class HtmlTreeBuilder: TreeBuilder {
}
func inScope(_ targetNames: [String])throws->Bool {
return try inSpecificScope(targetNames, HtmlTreeBuilder.TagsSearchInScope, nil)
return try inSpecificScope(targetNames, TagSets.inScope)
}
func inScope(_ targetName: String)throws->Bool {
return try inScope(targetName, nil)
}
func inScope(_ targetName: String, _ extras: [String]?)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagsSearchInScope, extras)
func inScope(_ targetName: String, _ extras: [String]? = nil)throws->Bool {
return try inSpecificScope(targetName, TagSets.inScope, extras)
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}
func inListItemScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchList)
return try inScope(targetName, TagSets.list)
}
func inButtonScope(_ targetName: String)throws->Bool {
return try inScope(targetName, HtmlTreeBuilder.TagSearchButton)
return try inScope(targetName, TagSets.button)
}
func inTableScope(_ targetName: String)throws->Bool {
return try inSpecificScope(targetName, HtmlTreeBuilder.TagSearchTableScope, nil)
return try inSpecificScope(targetName, TagSets.tableScope)
}
func inSelectScope(_ targetName: String)throws->Bool {
for pos in (0..<stack.count).reversed() {
let el: Element = stack[pos]
let elName: String = el.nodeName()
if (elName.equals(targetName)) {
let elName = stack[pos].nodeName()
if elName == targetName {
return true
}
if (!StringUtil.inString(elName, HtmlTreeBuilder.TagSearchSelectScope)) { // all elements except
if !TagSets.selectScope.contains(elName) {
return false
}
}
@ -595,23 +593,28 @@ class HtmlTreeBuilder: TreeBuilder {
process, then the UA must perform the above steps as if that element was not in the above list.
*/
func generateImpliedEndTags(_ excludeTag: String?) {
while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
pop()
func generateImpliedEndTags(_ excludeTag: String? = nil) {
// Is this correct? I get the sense that something is supposed to happen here
// even if excludeTag == nil. But the original code doesn't seem to do that. -GS
//
// while ((excludeTag != nil && !currentElement()!.nodeName().equals(excludeTag!)) &&
// StringUtil.inString(currentElement()!.nodeName(), HtmlTreeBuilder.TagSearchEndTags)) {
// pop()
// }
guard let excludeTag = excludeTag else { return }
while true {
let nodeName = currentElement()!.nodeName()
guard nodeName != excludeTag else { return }
guard TagSets.endTags.contains(nodeName) else { return }
pop()
}
}
func generateImpliedEndTags() {
generateImpliedEndTags(nil)
}
func isSpecial(_ el: Element) -> Bool {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
let name: String = el.nodeName()
return StringUtil.inString(name, HtmlTreeBuilder.TagSearchSpecial)
return TagSets.special.contains(name)
}
func lastFormattingElement() -> Element? {
@ -769,3 +772,10 @@ class HtmlTreeBuilder: TreeBuilder {
}
}
}
fileprivate func ~= (pattern: [String], value: String) -> Bool {
return pattern.contains(value)
}

View File

@ -36,6 +36,29 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
case AfterAfterBody
case AfterAfterFrameset
case ForeignContent
private enum TagSets {
static let outer = ["head", "body", "html", "br"]
static let outer2 = ["body", "html", "br"]
static let outer3 = ["body", "html"]
static let baseEtc = ["base", "basefont", "bgsound", "command", "link"]
static let baseEtc2 = ["basefont", "bgsound", "link", "meta", "noframes", "style"]
static let baseEtc3 = ["base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"]
static let headNoscript = ["head", "noscript"]
static let table = ["table", "tbody", "tfoot", "thead", "tr"]
static let tableSections = ["tbody", "tfoot", "thead"]
static let tableMix = ["body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let tableMix2 = ["body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let tableMix3 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead"]
static let tableMix4 = ["body", "caption", "col", "colgroup", "html", "td", "th", "tr"]
static let tableMix5 = ["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"]
static let tableMix6 = ["body", "caption", "col", "colgroup", "html", "td", "th"]
static let tableMix7 = ["body", "caption", "col", "colgroup", "html"]
static let tableMix8 = ["caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"]
static let tableRowsAndCols = ["caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"]
static let thTd = ["th", "td"]
static let inputKeygenTextarea = ["input", "keygen", "textarea"]
}
private static let nullString: String = "\u{0000}"
@ -83,10 +106,10 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
try tb.insert(t.asComment())
} else if (HtmlTreeBuilderState.isWhitespace(t)) {
return true // ignore whitespace
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
} else if t.startTagNormalName() == "html" {
try tb.insert(t.asStartTag())
tb.transition(.BeforeHead)
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) {
} else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
return try anythingElse(t, tb)
} else if (t.isEndTag()) {
tb.error(self)
@ -103,13 +126,13 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) {
tb.error(self)
return false
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
} else if t.startTagNormalName() == "html" {
return try HtmlTreeBuilderState.InBody.process(t, tb) // does not transition
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("head"))!) {
} else if t.startTagNormalName() == "head" {
let head: Element = try tb.insert(t.asStartTag())
tb.setHeadElement(head)
tb.transition(.InHead)
} else if (t.isEndTag() && (StringUtil.inString(t.asEndTag().normalName()!, haystack: "head", "body", "html", "br"))) {
} else if let nName = t.endTagNormalName(), TagSets.outer.contains(nName) {
try tb.processStartTag("head")
return try tb.process(t)
} else if (t.isEndTag()) {
@ -142,7 +165,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
var name: String = start.normalName()!
if (name.equals("html")) {
return try HtmlTreeBuilderState.InBody.process(t, tb)
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "command", "link")) {
} else if TagSets.baseEtc.contains(name) {
let el: Element = try tb.insertEmpty(start)
// jsoup special: update base the frist time it is seen
if (name.equals("base") && el.hasAttr("href")) {
@ -153,7 +176,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
// todo: charset switches
} else if (name.equals("title")) {
try HtmlTreeBuilderState.handleRcData(start, tb)
} else if (StringUtil.inString(name, haystack: "noframes", "style")) {
} else if name == "noframes" || name == "style" {
try HtmlTreeBuilderState.handleRawtext(start, tb)
} else if (name.equals("noscript")) {
// else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
@ -179,7 +202,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (name?.equals("head"))! {
tb.pop()
tb.transition(.AfterHead)
} else if (name != nil && StringUtil.inString(name!, haystack: "body", "html", "br")) {
} else if let name = name, TagSets.outer2.contains(name) {
return try anythingElse(t, tb)
} else {
tb.error(self)
@ -198,17 +221,16 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
}
if (t.isDoctype()) {
tb.error(self)
} else if (t.isStartTag() && (t.asStartTag().normalName()?.equals("html"))!) {
} else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody)
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("noscript"))!) {
} else if t.endTagNormalName() == "noscript" {
tb.pop()
tb.transition(.InHead)
} else if (HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
haystack: "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
} else if HtmlTreeBuilderState.isWhitespace(t) || t.isComment() || (t.isStartTag() && TagSets.baseEtc2.contains(t.asStartTag().normalName()!)) {
return try tb.process(t, .InHead)
} else if (t.isEndTag() && (t.asEndTag().normalName()?.equals("br"))!) {
} else if t.endTagNormalName() == "br" {
return try anythingElse(t, tb)
} else if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!, haystack: "head", "noscript")) || t.isEndTag()) {
} else if (t.isStartTag() && TagSets.headNoscript.contains(t.asStartTag().normalName()!)) || t.isEndTag() {
tb.error(self)
return false
} else {
@ -241,7 +263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (name.equals("frameset")) {
try tb.insert(startTag)
tb.transition(.InFrameset)
} else if (StringUtil.inString(name, haystack: "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) {
} else if TagSets.baseEtc3.contains(name) {
tb.error(self)
let head: Element = tb.getHeadElement()!
tb.push(head)
@ -254,7 +276,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
try anythingElse(t, tb)
}
} else if (t.isEndTag()) {
if (StringUtil.inString(t.asEndTag().normalName()!, haystack: "body", "html")) {
if TagSets.outer3.contains(t.asEndTag().normalName()!) {
try anythingElse(t, tb)
} else {
tb.error(self)
@ -853,7 +875,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
func anythingElse(_ t: Token, _ tb: HtmlTreeBuilder)throws->Bool {
tb.error(self)
var processed: Bool
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) {
if let cur = tb.currentElement(), TagSets.table.contains(cur.nodeName()) {
tb.setFosterInserts(true)
processed = try tb.process(t, .InBody)
tb.setFosterInserts(false)
@ -889,11 +911,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (name.equals("col")) {
try tb.processStartTag("colgroup")
return try tb.process(t)
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
} else if TagSets.tableSections.contains(name) {
tb.clearStackToTableContext()
try tb.insert(startTag)
tb.transition(.InTableBody)
} else if (StringUtil.inString(name, haystack: "td", "th", "tr")) {
} else if ["td", "th", "tr"].contains(name) {
try tb.processStartTag("tbody")
return try tb.process(t)
} else if (name.equals("table")) {
@ -901,7 +923,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let processed: Bool = try tb.processEndTag("table")
if (processed) // only ignored if in fragment
{return try tb.process(t)}
} else if (StringUtil.inString(name, haystack: "style", "script")) {
} else if ["style", "script"].contains(name) {
return try tb.process(t, .InHead)
} else if (name.equals("input")) {
if (!startTag._attributes.get(key: "type").equalsIgnoreCase(string: "hidden")) {
@ -932,8 +954,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.popStackToClose("table")
}
tb.resetInsertionMode()
} else if (StringUtil.inString(name,
haystack: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
} else if TagSets.tableMix.contains(name) {
tb.error(self)
return false
} else {
@ -970,7 +991,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (!HtmlTreeBuilderState.isWhitespace(character)) {
// InTable anything else section:
tb.error(self)
if (tb.currentElement() != nil && StringUtil.inString(tb.currentElement()!.nodeName(), haystack: "table", "tbody", "tfoot", "thead", "tr")) {
if tb.currentElement() != nil && TagSets.table.contains(tb.currentElement()!.nodeName()) {
tb.setFosterInserts(true)
try tb.process(Token.Char().data(character), .InBody)
tb.setFosterInserts(false)
@ -988,7 +1009,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
}
return true
case .InCaption:
if (t.isEndTag() && t.asEndTag().normalName()!.equals("caption")) {
if t.endTagNormalName() == "caption" {
let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName()
if (try name != nil && !tb.inTableScope(name!)) {
@ -1003,18 +1024,21 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.clearFormattingElementsToLastMarker()
tb.transition(.InTable)
}
} else if ((
t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
t.isEndTag() && t.asEndTag().normalName()!.equals("table"))
) {
} else if (t.isStartTag() && TagSets.tableRowsAndCols.contains(t.asStartTag().normalName()!)) ||
(t.isEndTag() && t.asEndTag().normalName()!.equals("table"))
{
// Note: original code relies on && precedence being higher than ||
//
// if ((t.isStartTag() && StringUtil.inString(t.asStartTag().normalName()!,
// haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
// t.isEndTag() && t.asEndTag().normalName()!.equals("table"))) {
tb.error(self)
let processed: Bool = try tb.processEndTag("caption")
if (processed) {
return try tb.process(t)
}
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName()!,
haystack: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
} else if let nName = t.endTagNormalName(), TagSets.tableMix2.contains(nName) {
tb.error(self)
return false
} else {
@ -1102,11 +1126,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.clearStackToTableBodyContext()
try tb.insert(startTag)
tb.transition(.InRow)
} else if (StringUtil.inString(name, haystack: "th", "td")) {
} else if let name = name, TagSets.thTd.contains(name) {
tb.error(self)
try tb.processStartTag("tr")
return try tb.process(startTag)
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
} else if let name = name, TagSets.tableMix3.contains(name) {
return try exitTableBody(t, tb)
} else {
return try anythingElse(t, tb)
@ -1115,8 +1139,8 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
case .EndTag:
let endTag: Token.EndTag = t.asEndTag()
let name = endTag.normalName()
if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
if (try !tb.inTableScope(name!)) {
if let name = name, TagSets.tableSections.contains(name) {
if (try !tb.inTableScope(name)) {
tb.error(self)
return false
} else {
@ -1126,7 +1150,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
}
} else if ("table".equals(name)) {
return try exitTableBody(t, tb)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
} else if let name = name, TagSets.tableMix4.contains(name) {
tb.error(self)
return false
} else {
@ -1155,12 +1179,12 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let startTag: Token.StartTag = t.asStartTag()
let name: String? = startTag.normalName()
if (StringUtil.inString(name, haystack: "th", "td")) {
if let name = name, TagSets.thTd.contains(name) {
tb.clearStackToTableRowContext()
try tb.insert(startTag)
tb.transition(.InCell)
tb.insertMarkerToFormattingElements()
} else if (StringUtil.inString(name, haystack: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) {
} else if let name = name, TagSets.tableMix5.contains(name) {
return try handleMissingTr(t, tb)
} else {
return try anythingElse(t, tb)
@ -1179,14 +1203,14 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.transition(.InTableBody)
} else if ("table".equals(name)) {
return try handleMissingTr(t, tb)
} else if (StringUtil.inString(name, haystack: "tbody", "tfoot", "thead")) {
if (try !tb.inTableScope(name!)) {
} else if let name = name, TagSets.tableSections.contains(name) {
if (try !tb.inTableScope(name)) {
tb.error(self)
return false
}
try tb.processEndTag("tr")
return try tb.process(t)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html", "td", "th")) {
} else if let name = name, TagSets.tableMix6.contains(name) {
tb.error(self)
return false
} else {
@ -1213,24 +1237,24 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
let endTag: Token.EndTag = t.asEndTag()
let name: String? = endTag.normalName()
if (StringUtil.inString(name, haystack: "td", "th")) {
if (try !tb.inTableScope(name!)) {
if let name = name, TagSets.thTd.contains(name) {
if (try !tb.inTableScope(name)) {
tb.error(self)
tb.transition(.InRow) // might not be in scope if empty: <td /> and processing fake end tag
return false
}
tb.generateImpliedEndTags()
if (!name!.equals(tb.currentElement()?.nodeName())) {
if (!name.equals(tb.currentElement()?.nodeName())) {
tb.error(self)
}
tb.popStackToClose(name!)
tb.popStackToClose(name)
tb.clearFormattingElementsToLastMarker()
tb.transition(.InRow)
} else if (StringUtil.inString(name, haystack: "body", "caption", "col", "colgroup", "html")) {
} else if let name = name, TagSets.tableMix7.contains(name) {
tb.error(self)
return false
} else if (StringUtil.inString(name, haystack: "table", "tbody", "tfoot", "thead", "tr")) {
if (try !tb.inTableScope(name!)) {
} else if let name = name, TagSets.table.contains(name) {
if (try !tb.inTableScope(name)) {
tb.error(self)
return false
}
@ -1239,9 +1263,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else {
return try anythingElse(t, tb)
}
} else if (t.isStartTag() &&
StringUtil.inString(t.asStartTag().normalName(),
haystack: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
} else if let nName = t.startTagNormalName(), TagSets.tableRowsAndCols.contains(nName) {
if (try !(tb.inTableScope("td") || tb.inTableScope("th"))) {
tb.error(self)
return false
@ -1293,7 +1315,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if ("select".equals(name)) {
tb.error(self)
return try tb.processEndTag("select")
} else if (StringUtil.inString(name, haystack: "input", "keygen", "textarea")) {
} else if let name = name, TagSets.inputKeygenTextarea.contains(name) {
tb.error(self)
if (try !tb.inSelectScope("select")) {
return false // frag
@ -1346,17 +1368,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
}
return true
case .InSelectInTable:
if (t.isStartTag() && StringUtil.inString(t.asStartTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
if let nName = t.startTagNormalName(), TagSets.tableMix8.contains(nName) {
tb.error(self)
try tb.processEndTag("select")
return try tb.process(t)
} else if (t.isEndTag() && StringUtil.inString(t.asEndTag().normalName(), haystack: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
} else if let nName = t.endTagNormalName(), TagSets.tableMix8.contains(nName) {
tb.error(self)
if (try t.asEndTag().normalName() != nil && tb.inTableScope(t.asEndTag().normalName()!)) {
if try tb.inTableScope(nName) {
try tb.processEndTag("select")
return try (tb.process(t))
} else {
return false
return false
}
} else {
return try tb.process(t, .InSelect)
@ -1369,9 +1391,9 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) {
tb.error(self)
return false
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) {
} else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody)
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) {
} else if t.endTagNormalName() == "html" {
if (tb.isFragmentParsing()) {
tb.error(self)
return false
@ -1410,7 +1432,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
tb.error(self)
return false
}
} else if (t.isEndTag() && "frameset".equals(t.asEndTag().normalName())) {
} else if t.endTagNormalName() == "frameset" {
if ("html".equals(tb.currentElement()?.nodeName())) { // frag
tb.error(self)
return false
@ -1439,11 +1461,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if (t.isDoctype()) {
tb.error(self)
return false
} else if (t.isStartTag() && "html".equals(t.asStartTag().normalName())) {
} else if t.startTagNormalName() == "html" {
return try tb.process(t, .InBody)
} else if (t.isEndTag() && "html".equals(t.asEndTag().normalName())) {
} else if t.endTagNormalName() == "html" {
tb.transition(.AfterAfterFrameset)
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) {
} else if t.startTagNormalName() == "noframes" {
return try tb.process(t, .InHead)
} else if (t.isEOF()) {
// cool your heels, we're complete
@ -1470,11 +1492,11 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
if (t.isComment()) {
try tb.insert(t.asComment())
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.isStartTag() && "html".equals(t.asStartTag().normalName()))) {
} else if (t.isDoctype() || HtmlTreeBuilderState.isWhitespace(t) || (t.startTagNormalName() == "html")) {
return try tb.process(t, .InBody)
} else if (t.isEOF()) {
// nice work chuck
} else if (t.isStartTag() && "noframes".equals(t.asStartTag().normalName())) {
} else if t.startTagNormalName() == "noframes" {
return try tb.process(t, .InHead)
} else {
tb.error(self)
@ -1547,3 +1569,17 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
fileprivate static let InBodyEndTableFosters: [String] = ["table", "tbody", "tfoot", "thead", "tr"]
}
}
fileprivate extension Token {
func endTagNormalName() -> String? {
guard isEndTag() else { return nil }
return asEndTag().normalName()
}
func startTagNormalName() -> String? {
guard isStartTag() else { return nil }
return asStartTag().normalName()
}
}

View File

@ -162,19 +162,6 @@ open class StringUtil {
}
}
public static func inString(_ needle: String?, haystack: String...) -> Bool {
return inString(needle, haystack)
}
public static func inString(_ needle: String?, _ haystack: [String?]) -> Bool {
if(needle == nil) {return false}
for hay in haystack {
if(hay != nil && hay! == needle!) {
return true
}
}
return false
}
// open static func inSorted(_ needle: String, haystack: [String]) -> Bool {
// return binarySearch(haystack, searchItem: needle) >= 0
// }