1673 lines
59 KiB
Swift
1673 lines
59 KiB
Swift
//
|
|
// TokeniserState.swift
|
|
// SwiftSoup
|
|
//
|
|
// Created by Nabil Chatbi on 12/10/16.
|
|
// Copyright © 2016 Nabil Chatbi.. All rights reserved.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
protocol TokeniserStateProtocol {
|
|
func read(_ t: Tokeniser, _ r: CharacterReader)throws
|
|
}
|
|
|
|
public class TokeniserStateVars {
|
|
static let attributeSingleValueCharsSorted = [Byte.apostrophe, Byte.ampersand, Byte.null].sorted()
|
|
static let attributeDoubleValueCharsSorted = [Byte.quote, Byte.ampersand, Byte.null].sorted()
|
|
static let attributeNameCharsSorted = [Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space, Byte.forwardSlash, Byte.equals, Byte.greaterThan, Byte.null, Byte.backSlash, Byte.apostrophe, Byte.lessThan].sorted()
|
|
static let attributeValueUnquoted = [Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space, Byte.ampersand, Byte.greaterThan, Byte.null, Byte.quote, Byte.apostrophe, Byte.lessThan, Byte.equals, Byte.backquote].sorted()
|
|
|
|
static let replacementChar: Byte = Byte.replacementChar
|
|
static let replacementStr: String = String(Byte.replacementChar)
|
|
static let eof: Byte = Byte.EOF
|
|
}
|
|
|
|
enum TokeniserState: TokeniserStateProtocol {
|
|
case Data
|
|
case CharacterReferenceInData
|
|
case Rcdata
|
|
case CharacterReferenceInRcdata
|
|
case Rawtext
|
|
case ScriptData
|
|
case PLAINTEXT
|
|
case TagOpen
|
|
case EndTagOpen
|
|
case TagName
|
|
case RcdataLessthanSign
|
|
case RCDATAEndTagOpen
|
|
case RCDATAEndTagName
|
|
case RawtextLessthanSign
|
|
case RawtextEndTagOpen
|
|
case RawtextEndTagName
|
|
case ScriptDataLessthanSign
|
|
case ScriptDataEndTagOpen
|
|
case ScriptDataEndTagName
|
|
case ScriptDataEscapeStart
|
|
case ScriptDataEscapeStartDash
|
|
case ScriptDataEscaped
|
|
case ScriptDataEscapedDash
|
|
case ScriptDataEscapedDashDash
|
|
case ScriptDataEscapedLessthanSign
|
|
case ScriptDataEscapedEndTagOpen
|
|
case ScriptDataEscapedEndTagName
|
|
case ScriptDataDoubleEscapeStart
|
|
case ScriptDataDoubleEscaped
|
|
case ScriptDataDoubleEscapedDash
|
|
case ScriptDataDoubleEscapedDashDash
|
|
case ScriptDataDoubleEscapedLessthanSign
|
|
case ScriptDataDoubleEscapeEnd
|
|
case BeforeAttributeName
|
|
case AttributeName
|
|
case AfterAttributeName
|
|
case BeforeAttributeValue
|
|
case AttributeValue_doubleQuoted
|
|
case AttributeValue_singleQuoted
|
|
case AttributeValue_unquoted
|
|
case AfterAttributeValue_quoted
|
|
case SelfClosingStartTag
|
|
case BogusComment
|
|
case MarkupDeclarationOpen
|
|
case CommentStart
|
|
case CommentStartDash
|
|
case Comment
|
|
case CommentEndDash
|
|
case CommentEnd
|
|
case CommentEndBang
|
|
case Doctype
|
|
case BeforeDoctypeName
|
|
case DoctypeName
|
|
case AfterDoctypeName
|
|
case AfterDoctypePublicKeyword
|
|
case BeforeDoctypePublicIdentifier
|
|
case DoctypePublicIdentifier_doubleQuoted
|
|
case DoctypePublicIdentifier_singleQuoted
|
|
case AfterDoctypePublicIdentifier
|
|
case BetweenDoctypePublicAndSystemIdentifiers
|
|
case AfterDoctypeSystemKeyword
|
|
case BeforeDoctypeSystemIdentifier
|
|
case DoctypeSystemIdentifier_doubleQuoted
|
|
case DoctypeSystemIdentifier_singleQuoted
|
|
case AfterDoctypeSystemIdentifier
|
|
case BogusDoctype
|
|
case CdataSection
|
|
|
|
internal func read(_ t: Tokeniser, _ r: CharacterReader)throws {
|
|
switch self {
|
|
case .Data:
|
|
switch (r.current()) {
|
|
case Byte.ampersand:
|
|
t.advanceTransition(.CharacterReferenceInData)
|
|
break
|
|
case Byte.lessThan:
|
|
t.advanceTransition(.TagOpen)
|
|
break
|
|
case Byte.null:
|
|
t.error(self) // NOT replacement character (oddly?)
|
|
t.emit(r.consume())
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
try t.emit(Token.EOF())
|
|
break
|
|
default:
|
|
let data: String = r.consumeData()
|
|
t.emit(data)
|
|
break
|
|
}
|
|
break
|
|
case .CharacterReferenceInData:
|
|
try TokeniserState.readCharRef(t, .Data)
|
|
break
|
|
case .Rcdata:
|
|
switch (r.current()) {
|
|
case Byte.ampersand:
|
|
t.advanceTransition(.CharacterReferenceInRcdata)
|
|
break
|
|
case Byte.lessThan:
|
|
t.advanceTransition(.RcdataLessthanSign)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
r.advance()
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
try t.emit(Token.EOF())
|
|
break
|
|
default:
|
|
let data = r.consumeToAny(Byte.ampersand, Byte.lessThan, Byte.null)
|
|
t.emit(data)
|
|
break
|
|
}
|
|
break
|
|
case .CharacterReferenceInRcdata:
|
|
try TokeniserState.readCharRef(t, .Rcdata)
|
|
break
|
|
case .Rawtext:
|
|
try TokeniserState.readData(t, r, self, .RawtextLessthanSign)
|
|
break
|
|
case .ScriptData:
|
|
try TokeniserState.readData(t, r, self, .ScriptDataLessthanSign)
|
|
break
|
|
case .PLAINTEXT:
|
|
switch (r.current()) {
|
|
case Byte.null:
|
|
t.error(self)
|
|
r.advance()
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
try t.emit(Token.EOF())
|
|
break
|
|
default:
|
|
let data = r.consumeTo(Byte.null)
|
|
t.emit(data)
|
|
break
|
|
}
|
|
break
|
|
case .TagOpen:
|
|
// from < in data
|
|
switch (r.current()) {
|
|
case Byte.exclamation:
|
|
t.advanceTransition(.MarkupDeclarationOpen)
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.advanceTransition(.EndTagOpen)
|
|
break
|
|
case Byte.questionMark:
|
|
t.advanceTransition(.BogusComment)
|
|
break
|
|
default:
|
|
if (r.matchesLetter()) {
|
|
t.createTagPending(true)
|
|
t.transition(.TagName)
|
|
} else {
|
|
t.error(self)
|
|
t.emit(Byte.lessThan) // char that got us here
|
|
t.transition(.Data)
|
|
}
|
|
break
|
|
}
|
|
break
|
|
case .EndTagOpen:
|
|
if (r.isEmpty()) {
|
|
t.eofError(self)
|
|
t.emit("</")
|
|
t.transition(.Data)
|
|
} else if (r.matchesLetter()) {
|
|
t.createTagPending(false)
|
|
t.transition(.TagName)
|
|
} else if (r.matches(Byte.greaterThan)) {
|
|
t.error(self)
|
|
t.advanceTransition(.Data)
|
|
} else {
|
|
t.error(self)
|
|
t.advanceTransition(.BogusComment)
|
|
}
|
|
break
|
|
case .TagName:
|
|
// from < or </ in data, will have start or end tag pending
|
|
// previous TagOpen state did NOT consume, will have a letter char in current
|
|
//String tagName = r.consumeToAnySorted(tagCharsSorted).toLowerCase()
|
|
let tagName = r.consumeTagName()
|
|
t.tagPending.appendTagName(tagName)
|
|
|
|
switch (r.consume()) {
|
|
case Byte.horizontalTab:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.newLine:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.carriageReturn:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.formfeed:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.space:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null: // replacement
|
|
t.tagPending.appendTagName(TokeniserStateVars.replacementStr)
|
|
break
|
|
case TokeniserStateVars.eof: // should emit pending tag?
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
// no default, as covered with above consumeToAny
|
|
default:
|
|
break
|
|
}
|
|
case .RcdataLessthanSign:
|
|
if (r.matches(Byte.forwardSlash)) {
|
|
t.createTempBuffer()
|
|
t.advanceTransition(.RCDATAEndTagOpen)
|
|
} else if (r.matchesLetter() && t.appropriateEndTagName() != nil && !r.containsIgnoreCase("</" + t.appropriateEndTagName()!)) {
|
|
// diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
|
|
// consuming to EOF break out here
|
|
t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName()!)
|
|
try t.emitTagPending()
|
|
r.unconsume() // undo Byte.lessThan
|
|
t.transition(.Data)
|
|
} else {
|
|
t.emit(Byte.lessThan)
|
|
t.transition(.Rcdata)
|
|
}
|
|
break
|
|
case .RCDATAEndTagOpen:
|
|
if (r.matchesLetter()) {
|
|
t.createTagPending(false)
|
|
t.tagPending.appendTagName(r.current())
|
|
t.dataBuffer.append(r.current())
|
|
t.advanceTransition(.RCDATAEndTagName)
|
|
} else {
|
|
t.emit("</")
|
|
t.transition(.Rcdata)
|
|
}
|
|
break
|
|
case .RCDATAEndTagName:
|
|
if (r.matchesLetter()) {
|
|
let name = r.consumeLetterSequence()
|
|
t.tagPending.appendTagName(name)
|
|
t.dataBuffer.append(name)
|
|
return
|
|
}
|
|
|
|
func anythingElse(_ t: Tokeniser, _ r: CharacterReader) {
|
|
t.emit("</" + t.dataBuffer.toString())
|
|
r.unconsume()
|
|
t.transition(.Rcdata)
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.BeforeAttributeName)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.newLine:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.BeforeAttributeName)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.carriageReturn:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.BeforeAttributeName)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.formfeed:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.BeforeAttributeName)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.space:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.BeforeAttributeName)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.forwardSlash:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
t.transition(.SelfClosingStartTag)
|
|
} else {
|
|
anythingElse(t, r)
|
|
}
|
|
break
|
|
case Byte.greaterThan:
|
|
if (try t.isAppropriateEndTagToken()) {
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
} else {anythingElse(t, r)}
|
|
break
|
|
default:
|
|
anythingElse(t, r)
|
|
break
|
|
}
|
|
break
|
|
case .RawtextLessthanSign:
|
|
if (r.matches(Byte.forwardSlash)) {
|
|
t.createTempBuffer()
|
|
t.advanceTransition(.RawtextEndTagOpen)
|
|
} else {
|
|
t.emit(Byte.lessThan)
|
|
t.transition(.Rawtext)
|
|
}
|
|
break
|
|
case .RawtextEndTagOpen:
|
|
TokeniserState.readEndTag(t, r, .RawtextEndTagName, .Rawtext)
|
|
break
|
|
case .RawtextEndTagName:
|
|
try TokeniserState.handleDataEndTag(t, r, .Rawtext)
|
|
break
|
|
case .ScriptDataLessthanSign:
|
|
switch (r.consume()) {
|
|
case Byte.forwardSlash:
|
|
t.createTempBuffer()
|
|
t.transition(.ScriptDataEndTagOpen)
|
|
break
|
|
case Byte.exclamation:
|
|
t.emit("<!")
|
|
t.transition(.ScriptDataEscapeStart)
|
|
break
|
|
default:
|
|
t.emit(Byte.lessThan)
|
|
r.unconsume()
|
|
t.transition(.ScriptData)
|
|
}
|
|
break
|
|
case .ScriptDataEndTagOpen:
|
|
TokeniserState.readEndTag(t, r, .ScriptDataEndTagName, .ScriptData)
|
|
break
|
|
case .ScriptDataEndTagName:
|
|
try TokeniserState.handleDataEndTag(t, r, .ScriptData)
|
|
break
|
|
case .ScriptDataEscapeStart:
|
|
if (r.matches(Byte.hyphen)) {
|
|
t.emit(Byte.hyphen)
|
|
t.advanceTransition(.ScriptDataEscapeStartDash)
|
|
} else {
|
|
t.transition(.ScriptData)
|
|
}
|
|
break
|
|
case .ScriptDataEscapeStartDash:
|
|
if (r.matches(Byte.hyphen)) {
|
|
t.emit(Byte.hyphen)
|
|
t.advanceTransition(.ScriptDataEscapedDashDash)
|
|
} else {
|
|
t.transition(.ScriptData)
|
|
}
|
|
break
|
|
case .ScriptDataEscaped:
|
|
if (r.isEmpty()) {
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
return
|
|
}
|
|
|
|
switch (r.current()) {
|
|
case Byte.hyphen:
|
|
t.emit(Byte.hyphen)
|
|
t.advanceTransition(.ScriptDataEscapedDash)
|
|
break
|
|
case Byte.lessThan:
|
|
t.advanceTransition(.ScriptDataEscapedLessthanSign)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
r.advance()
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
break
|
|
default:
|
|
let data = r.consumeToAny(Byte.hyphen, Byte.lessThan, Byte.null)
|
|
t.emit(data)
|
|
}
|
|
break
|
|
case .ScriptDataEscapedDash:
|
|
if (r.isEmpty()) {
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
return
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataEscapedDashDash)
|
|
break
|
|
case Byte.lessThan:
|
|
t.transition(.ScriptDataEscapedLessthanSign)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
t.transition(.ScriptDataEscaped)
|
|
break
|
|
default:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataEscapedDashDash:
|
|
if (r.isEmpty()) {
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
return
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.emit(c)
|
|
break
|
|
case Byte.lessThan:
|
|
t.transition(.ScriptDataEscapedLessthanSign)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.emit(c)
|
|
t.transition(.ScriptData)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
t.transition(.ScriptDataEscaped)
|
|
break
|
|
default:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataEscapedLessthanSign:
|
|
if (r.matchesLetter()) {
|
|
t.createTempBuffer()
|
|
t.dataBuffer.append(r.current())
|
|
t.emit("<" + String(r.current()))
|
|
t.advanceTransition(.ScriptDataDoubleEscapeStart)
|
|
} else if (r.matches(Byte.forwardSlash)) {
|
|
t.createTempBuffer()
|
|
t.advanceTransition(.ScriptDataEscapedEndTagOpen)
|
|
} else {
|
|
t.emit(Byte.lessThan)
|
|
t.transition(.ScriptDataEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataEscapedEndTagOpen:
|
|
if (r.matchesLetter()) {
|
|
t.createTagPending(false)
|
|
t.tagPending.appendTagName(r.current())
|
|
t.dataBuffer.append(r.current())
|
|
t.advanceTransition(.ScriptDataEscapedEndTagName)
|
|
} else {
|
|
t.emit("</")
|
|
t.transition(.ScriptDataEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataEscapedEndTagName:
|
|
try TokeniserState.handleDataEndTag(t, r, .ScriptDataEscaped)
|
|
break
|
|
case .ScriptDataDoubleEscapeStart:
|
|
TokeniserState.handleDataDoubleEscapeTag(t, r, .ScriptDataDoubleEscaped, .ScriptDataEscaped)
|
|
break
|
|
case .ScriptDataDoubleEscaped:
|
|
let c = r.current()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.emit(c)
|
|
t.advanceTransition(.ScriptDataDoubleEscapedDash)
|
|
break
|
|
case Byte.lessThan:
|
|
t.emit(c)
|
|
t.advanceTransition(.ScriptDataDoubleEscapedLessthanSign)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
r.advance()
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
let data = r.consumeToAny(Byte.hyphen, Byte.lessThan, Byte.null)
|
|
t.emit(data)
|
|
}
|
|
break
|
|
case .ScriptDataDoubleEscapedDash:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataDoubleEscapedDashDash)
|
|
break
|
|
case Byte.lessThan:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataDoubleEscapedLessthanSign)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
t.transition(.ScriptDataDoubleEscaped)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataDoubleEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataDoubleEscapedDashDash:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.emit(c)
|
|
break
|
|
case Byte.lessThan:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataDoubleEscapedLessthanSign)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.emit(c)
|
|
t.transition(.ScriptData)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
t.transition(.ScriptDataDoubleEscaped)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.emit(c)
|
|
t.transition(.ScriptDataDoubleEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataDoubleEscapedLessthanSign:
|
|
if (r.matches(Byte.forwardSlash)) {
|
|
t.emit(Byte.forwardSlash)
|
|
t.createTempBuffer()
|
|
t.advanceTransition(.ScriptDataDoubleEscapeEnd)
|
|
} else {
|
|
t.transition(.ScriptDataDoubleEscaped)
|
|
}
|
|
break
|
|
case .ScriptDataDoubleEscapeEnd:
|
|
TokeniserState.handleDataDoubleEscapeTag(t, r, .ScriptDataEscaped, .ScriptDataDoubleEscaped)
|
|
break
|
|
case .BeforeAttributeName:
|
|
// from tagname <xxx
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.newLine:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.carriageReturn:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.formfeed:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.space:
|
|
break // ignore whitespace
|
|
case Byte.forwardSlash:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
r.unconsume()
|
|
t.transition(.AttributeName)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
t.tagPending.appendAttributeName(c)
|
|
t.transition(.AttributeName)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
t.tagPending.appendAttributeName(c)
|
|
t.transition(.AttributeName)
|
|
break
|
|
case Byte.lessThan:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
t.tagPending.appendAttributeName(c)
|
|
t.transition(.AttributeName)
|
|
break
|
|
case Byte.equals:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
t.tagPending.appendAttributeName(c)
|
|
t.transition(.AttributeName)
|
|
break
|
|
default: // A-Z, anything else
|
|
try t.tagPending.newAttribute()
|
|
r.unconsume()
|
|
t.transition(.AttributeName)
|
|
}
|
|
break
|
|
case .AttributeName:
|
|
let name = r.consumeToAnySorted(TokeniserStateVars.attributeNameCharsSorted)
|
|
t.tagPending.appendAttributeName(name)
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab:
|
|
t.transition(.AfterAttributeName)
|
|
break
|
|
case Byte.newLine:
|
|
t.transition(.AfterAttributeName)
|
|
break
|
|
case Byte.carriageReturn:
|
|
t.transition(.AfterAttributeName)
|
|
break
|
|
case Byte.formfeed:
|
|
t.transition(.AfterAttributeName)
|
|
break
|
|
case Byte.space:
|
|
t.transition(.AfterAttributeName)
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.equals:
|
|
t.transition(.BeforeAttributeValue)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeName(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeName(c)
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeName(c)
|
|
case Byte.lessThan:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeName(c)
|
|
// no default, as covered in consumeToAny
|
|
default:
|
|
break
|
|
}
|
|
break
|
|
case .AfterAttributeName:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
// ignore
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.equals:
|
|
t.transition(.BeforeAttributeValue)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeName(TokeniserStateVars.replacementChar)
|
|
t.transition(.AttributeName)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote, Byte.apostrophe, Byte.lessThan:
|
|
t.error(self)
|
|
try t.tagPending.newAttribute()
|
|
t.tagPending.appendAttributeName(c)
|
|
t.transition(.AttributeName)
|
|
break
|
|
default: // A-Z, anything else
|
|
try t.tagPending.newAttribute()
|
|
r.unconsume()
|
|
t.transition(.AttributeName)
|
|
}
|
|
break
|
|
case .BeforeAttributeValue:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
// ignore
|
|
break
|
|
case Byte.quote:
|
|
t.transition(.AttributeValue_doubleQuoted)
|
|
break
|
|
case Byte.ampersand:
|
|
r.unconsume()
|
|
t.transition(.AttributeValue_unquoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.transition(.AttributeValue_singleQuoted)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(TokeniserStateVars.replacementChar)
|
|
t.transition(.AttributeValue_unquoted)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.lessThan, Byte.equals, Byte.backquote:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(c)
|
|
t.transition(.AttributeValue_unquoted)
|
|
break
|
|
default:
|
|
r.unconsume()
|
|
t.transition(.AttributeValue_unquoted)
|
|
}
|
|
break
|
|
case .AttributeValue_doubleQuoted:
|
|
let value = r.consumeToAny(TokeniserStateVars.attributeDoubleValueCharsSorted)
|
|
if (value.characters.count > 0) {
|
|
t.tagPending.appendAttributeValue(value)
|
|
} else {
|
|
t.tagPending.setEmptyAttributeValue()
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.quote:
|
|
t.transition(.AfterAttributeValue_quoted)
|
|
break
|
|
case Byte.ampersand:
|
|
|
|
if let ref = try t.consumeCharacterReference(Byte.quote, true) {
|
|
t.tagPending.appendAttributeValue(ref)
|
|
} else {
|
|
t.tagPending.appendAttributeValue(Byte.ampersand)
|
|
}
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
// no default, handled in consume to any above
|
|
default:
|
|
break
|
|
}
|
|
break
|
|
case .AttributeValue_singleQuoted:
|
|
let value = r.consumeToAny(TokeniserStateVars.attributeSingleValueCharsSorted)
|
|
if (value.characters.count > 0) {
|
|
t.tagPending.appendAttributeValue(value)
|
|
} else {
|
|
t.tagPending.setEmptyAttributeValue()
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.apostrophe:
|
|
t.transition(.AfterAttributeValue_quoted)
|
|
break
|
|
case Byte.ampersand:
|
|
|
|
if let ref = try t.consumeCharacterReference(Byte.apostrophe, true) {
|
|
t.tagPending.appendAttributeValue(ref)
|
|
} else {
|
|
t.tagPending.appendAttributeValue(Byte.ampersand)
|
|
}
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
// no default, handled in consume to any above
|
|
default:
|
|
break
|
|
}
|
|
break
|
|
case .AttributeValue_unquoted:
|
|
let value = r.consumeToAnySorted(TokeniserStateVars.attributeValueUnquoted)
|
|
if (value.characters.count > 0) {
|
|
t.tagPending.appendAttributeValue(value)
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.ampersand:
|
|
if let ref = try t.consumeCharacterReference(Byte.greaterThan, true) {
|
|
t.tagPending.appendAttributeValue(ref)
|
|
} else {
|
|
t.tagPending.appendAttributeValue(Byte.ampersand)
|
|
}
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote, Byte.apostrophe, Byte.lessThan, Byte.equals, Byte.backquote:
|
|
t.error(self)
|
|
t.tagPending.appendAttributeValue(c)
|
|
break
|
|
// no default, handled in consume to any above
|
|
default:
|
|
break
|
|
}
|
|
break
|
|
case .AfterAttributeValue_quoted:
|
|
// CharacterReferenceInAttributeValue state handled inline
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BeforeAttributeName)
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.transition(.SelfClosingStartTag)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
r.unconsume()
|
|
t.transition(.BeforeAttributeName)
|
|
}
|
|
break
|
|
case .SelfClosingStartTag:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.greaterThan:
|
|
t.tagPending._selfClosing = true
|
|
try t.emitTagPending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
r.unconsume()
|
|
t.transition(.BeforeAttributeName)
|
|
}
|
|
break
|
|
case .BogusComment:
|
|
// todo: handle bogus comment starting from eof. when does that trigger?
|
|
// rewind to capture character that lead us here
|
|
r.unconsume()
|
|
let comment: Token.Comment = Token.Comment()
|
|
comment.bogus = true
|
|
comment.data.append(r.consumeTo(Byte.greaterThan))
|
|
// todo: replace nullChar with replaceChar
|
|
try t.emit(comment)
|
|
t.advanceTransition(.Data)
|
|
break
|
|
case .MarkupDeclarationOpen:
|
|
if (r.matchConsume("--".makeBytes())) {
|
|
t.createCommentPending()
|
|
t.transition(.CommentStart)
|
|
} else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
|
|
t.transition(.Doctype)
|
|
} else if (r.matchConsume("[CDATA[".makeBytes())) {
|
|
// todo: should actually check current namepspace, and only non-html allows cdata. until namespace
|
|
// is implemented properly, keep handling as cdata
|
|
//} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
|
|
t.transition(.CdataSection)
|
|
} else {
|
|
t.error(self)
|
|
t.advanceTransition(.BogusComment) // advance so self character gets in bogus comment data's rewind
|
|
}
|
|
break
|
|
case .CommentStart:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.transition(.CommentStartDash)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.commentPending.data.append(TokeniserStateVars.replacementChar)
|
|
t.transition(.Comment)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.commentPending.data.append(c)
|
|
t.transition(.Comment)
|
|
}
|
|
break
|
|
case .CommentStartDash:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.transition(.CommentStartDash)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.commentPending.data.append(TokeniserStateVars.replacementChar)
|
|
t.transition(.Comment)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.commentPending.data.append(c)
|
|
t.transition(.Comment)
|
|
}
|
|
break
|
|
case .Comment:
|
|
let c = r.current()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.advanceTransition(.CommentEndDash)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
r.advance()
|
|
t.commentPending.data.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.commentPending.data.append(r.consumeToAny(Byte.hyphen, Byte.null))
|
|
}
|
|
break
|
|
case .CommentEndDash:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.transition(.CommentEnd)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.commentPending.data.append(Byte.hyphen).append(TokeniserStateVars.replacementChar)
|
|
t.transition(.Comment)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.commentPending.data.append(Byte.hyphen).append(c)
|
|
t.transition(.Comment)
|
|
}
|
|
break
|
|
case .CommentEnd:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.greaterThan:
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.commentPending.data.append("--").append(TokeniserStateVars.replacementChar)
|
|
t.transition(.Comment)
|
|
break
|
|
case Byte.exclamation:
|
|
t.error(self)
|
|
t.transition(.CommentEndBang)
|
|
break
|
|
case Byte.hyphen:
|
|
t.error(self)
|
|
t.commentPending.data.append(Byte.hyphen)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.commentPending.data.append("--").append(c)
|
|
t.transition(.Comment)
|
|
}
|
|
break
|
|
case .CommentEndBang:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.hyphen:
|
|
t.commentPending.data.append("--!")
|
|
t.transition(.CommentEndDash)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.commentPending.data.append("--!").append(TokeniserStateVars.replacementChar)
|
|
t.transition(.Comment)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
try t.emitCommentPending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.commentPending.data.append("--!").append(c)
|
|
t.transition(.Comment)
|
|
}
|
|
break
|
|
case .Doctype:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BeforeDoctypeName)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
// note: fall through to > case
|
|
case Byte.greaterThan: // catch invalid <!DOCTYPE>
|
|
t.error(self)
|
|
t.createDoctypePending()
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.transition(.BeforeDoctypeName)
|
|
}
|
|
break
|
|
case .BeforeDoctypeName:
|
|
if (r.matchesLetter()) {
|
|
t.createDoctypePending()
|
|
t.transition(.DoctypeName)
|
|
return
|
|
}
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
break // ignore whitespace
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.createDoctypePending()
|
|
t.doctypePending.name.append(TokeniserStateVars.replacementChar)
|
|
t.transition(.DoctypeName)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.createDoctypePending()
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.createDoctypePending()
|
|
t.doctypePending.name.append(c)
|
|
t.transition(.DoctypeName)
|
|
}
|
|
break
|
|
case .DoctypeName:
|
|
if (r.matchesLetter()) {
|
|
let name = r.consumeLetterSequence()
|
|
t.doctypePending.name.append(name)
|
|
return
|
|
}
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.greaterThan:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.AfterDoctypeName)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.doctypePending.name.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.doctypePending.name.append(c)
|
|
}
|
|
break
|
|
case .AfterDoctypeName:
|
|
if (r.isEmpty()) {
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
return
|
|
}
|
|
if (r.matchesAny(Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space)) {
|
|
r.advance() // ignore whitespace
|
|
} else if (r.matches(Byte.greaterThan)) {
|
|
try t.emitDoctypePending()
|
|
t.advanceTransition(.Data)
|
|
} else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) {
|
|
t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY
|
|
t.transition(.AfterDoctypePublicKeyword)
|
|
} else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) {
|
|
t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY;
|
|
t.transition(.AfterDoctypeSystemKeyword)
|
|
} else {
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.advanceTransition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .AfterDoctypePublicKeyword:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BeforeDoctypePublicIdentifier)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
// set public id to empty string
|
|
t.transition(.DoctypePublicIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
// set public id to empty string
|
|
t.transition(.DoctypePublicIdentifier_singleQuoted)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.transition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .BeforeDoctypePublicIdentifier:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
break
|
|
case Byte.quote:
|
|
// set public id to empty string
|
|
t.transition(.DoctypePublicIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
// set public id to empty string
|
|
t.transition(.DoctypePublicIdentifier_singleQuoted)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.transition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .DoctypePublicIdentifier_doubleQuoted:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.quote:
|
|
t.transition(.AfterDoctypePublicIdentifier)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.doctypePending.publicIdentifier.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.doctypePending.publicIdentifier.append(c)
|
|
}
|
|
break
|
|
case .DoctypePublicIdentifier_singleQuoted:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.apostrophe:
|
|
t.transition(.AfterDoctypePublicIdentifier)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.doctypePending.publicIdentifier.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.doctypePending.publicIdentifier.append(c)
|
|
}
|
|
break
|
|
case .AfterDoctypePublicIdentifier:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BetweenDoctypePublicAndSystemIdentifiers)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_singleQuoted)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.transition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .BetweenDoctypePublicAndSystemIdentifiers:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_singleQuoted)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.transition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .AfterDoctypeSystemKeyword:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(.BeforeDoctypeSystemIdentifier)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case Byte.quote:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
t.error(self)
|
|
// system id empty
|
|
t.transition(.DoctypeSystemIdentifier_singleQuoted)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
}
|
|
break
|
|
case .BeforeDoctypeSystemIdentifier:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
break
|
|
case Byte.quote:
|
|
// set system id to empty string
|
|
t.transition(.DoctypeSystemIdentifier_doubleQuoted)
|
|
break
|
|
case Byte.apostrophe:
|
|
// set public id to empty string
|
|
t.transition(.DoctypeSystemIdentifier_singleQuoted)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
t.transition(.BogusDoctype)
|
|
}
|
|
break
|
|
case .DoctypeSystemIdentifier_doubleQuoted:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.quote:
|
|
t.transition(.AfterDoctypeSystemIdentifier)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.doctypePending.systemIdentifier.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.doctypePending.systemIdentifier.append(c)
|
|
}
|
|
break
|
|
case .DoctypeSystemIdentifier_singleQuoted:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.apostrophe:
|
|
t.transition(.AfterDoctypeSystemIdentifier)
|
|
break
|
|
case Byte.null:
|
|
t.error(self)
|
|
t.doctypePending.systemIdentifier.append(TokeniserStateVars.replacementChar)
|
|
break
|
|
case Byte.greaterThan:
|
|
t.error(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.doctypePending.systemIdentifier.append(c)
|
|
}
|
|
break
|
|
case .AfterDoctypeSystemIdentifier:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
t.eofError(self)
|
|
t.doctypePending.forceQuirks = true
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
t.error(self)
|
|
t.transition(.BogusDoctype)
|
|
// NOT force quirks
|
|
}
|
|
break
|
|
case .BogusDoctype:
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.greaterThan:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
try t.emitDoctypePending()
|
|
t.transition(.Data)
|
|
break
|
|
default:
|
|
// ignore char
|
|
break
|
|
}
|
|
break
|
|
case .CdataSection:
|
|
let data = r.consumeTo("]]>")
|
|
t.emit(data)
|
|
r.matchConsume("]]>".makeBytes())
|
|
t.transition(.Data)
|
|
break
|
|
}
|
|
}
|
|
|
|
var description: String {return String(describing: type(of: self))}
|
|
/**
|
|
* Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. Same body impl, just
|
|
* different else exit transitions.
|
|
*/
|
|
private static func handleDataEndTag(_ t: Tokeniser, _ r: CharacterReader, _ elseTransition: TokeniserState)throws {
|
|
if (r.matchesLetter()) {
|
|
let name = r.consumeLetterSequence()
|
|
t.tagPending.appendTagName(name)
|
|
t.dataBuffer.append(name)
|
|
return
|
|
}
|
|
|
|
var needsExitTransition = false
|
|
if (try t.isAppropriateEndTagToken() && !r.isEmpty()) {
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space:
|
|
t.transition(BeforeAttributeName)
|
|
break
|
|
case Byte.forwardSlash:
|
|
t.transition(SelfClosingStartTag)
|
|
break
|
|
case Byte.greaterThan:
|
|
try t.emitTagPending()
|
|
t.transition(Data)
|
|
break
|
|
default:
|
|
t.dataBuffer.append(c)
|
|
needsExitTransition = true
|
|
}
|
|
} else {
|
|
needsExitTransition = true
|
|
}
|
|
|
|
if (needsExitTransition) {
|
|
t.emit("</" + t.dataBuffer.toString())
|
|
t.transition(elseTransition)
|
|
}
|
|
}
|
|
|
|
private static func readData(_ t: Tokeniser, _ r: CharacterReader, _ current: TokeniserState, _ advance: TokeniserState)throws {
|
|
switch (r.current()) {
|
|
case Byte.lessThan:
|
|
t.advanceTransition(advance)
|
|
break
|
|
case Byte.null:
|
|
t.error(current)
|
|
r.advance()
|
|
t.emit(TokeniserStateVars.replacementChar)
|
|
break
|
|
case TokeniserStateVars.eof:
|
|
try t.emit(Token.EOF())
|
|
break
|
|
default:
|
|
let data = r.consumeToAny(Byte.lessThan, Byte.null)
|
|
t.emit(data)
|
|
break
|
|
}
|
|
}
|
|
|
|
private static func readCharRef(_ t: Tokeniser, _ advance: TokeniserState)throws {
|
|
let c = try t.consumeCharacterReference(nil, false)
|
|
if (c == nil) {
|
|
t.emit(Byte.ampersand)
|
|
} else {
|
|
t.emit(c!)
|
|
}
|
|
t.transition(advance)
|
|
}
|
|
|
|
private static func readEndTag(_ t: Tokeniser, _ r: CharacterReader, _ a: TokeniserState, _ b: TokeniserState) {
|
|
if (r.matchesLetter()) {
|
|
t.createTagPending(false)
|
|
t.transition(a)
|
|
} else {
|
|
t.emit("</")
|
|
t.transition(b)
|
|
}
|
|
}
|
|
|
|
private static func handleDataDoubleEscapeTag(_ t: Tokeniser, _ r: CharacterReader, _ primary: TokeniserState, _ fallback: TokeniserState) {
|
|
if (r.matchesLetter()) {
|
|
let name = r.consumeLetterSequence()
|
|
t.dataBuffer.append(name)
|
|
t.emit(name)
|
|
return
|
|
}
|
|
|
|
let c = r.consume()
|
|
switch (c) {
|
|
case Byte.horizontalTab, Byte.newLine, Byte.carriageReturn, Byte.formfeed, Byte.space, Byte.forwardSlash, Byte.greaterThan:
|
|
if (t.dataBuffer.toString() == "script") {
|
|
t.transition(primary)
|
|
} else {
|
|
t.transition(fallback)
|
|
}
|
|
t.emit(c)
|
|
break
|
|
default:
|
|
r.unconsume()
|
|
t.transition(fallback)
|
|
}
|
|
}
|
|
|
|
}
|