Convert CharacterReader to native string indexing
This commit is contained in:
parent
5da0b123b9
commit
a4eca3ed31
|
@ -14,43 +14,47 @@ import Foundation
|
||||||
public final class CharacterReader {
|
public final class CharacterReader {
|
||||||
private static let empty = ""
|
private static let empty = ""
|
||||||
public static let EOF: UnicodeScalar = "\u{FFFF}"//65535
|
public static let EOF: UnicodeScalar = "\u{FFFF}"//65535
|
||||||
private let input: [UnicodeScalar]
|
private let input: String.UnicodeScalarView
|
||||||
private let length: Int
|
private var pos: String.UnicodeScalarView.Index
|
||||||
private var pos: Int = 0
|
private var mark: String.UnicodeScalarView.Index
|
||||||
private var mark: Int = 0
|
|
||||||
//private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage
|
//private let stringCache: Array<String?> // holds reused strings in this doc, to lessen garbage
|
||||||
|
|
||||||
public init(_ input: String) {
|
public init(_ input: String) {
|
||||||
self.input = Array(input.unicodeScalars)
|
self.input = input.unicodeScalars
|
||||||
self.length = self.input.count
|
self.pos = input.startIndex
|
||||||
//stringCache = Array(repeating:nil, count:512)
|
self.mark = input.startIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
public func getPos() -> Int {
|
public func getPos() -> Int {
|
||||||
return self.pos
|
return input.distance(from: input.startIndex, to: pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func isEmpty() -> Bool {
|
public func isEmpty() -> Bool {
|
||||||
return pos >= length
|
return pos >= input.endIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
public func current() -> UnicodeScalar {
|
public func current() -> UnicodeScalar {
|
||||||
return (pos >= length) ? CharacterReader.EOF : input[pos]
|
return (pos >= input.endIndex) ? CharacterReader.EOF : input[pos]
|
||||||
}
|
}
|
||||||
|
|
||||||
@discardableResult
|
@discardableResult
|
||||||
public func consume() -> UnicodeScalar {
|
public func consume() -> UnicodeScalar {
|
||||||
let val = (pos >= length) ? CharacterReader.EOF : input[pos]
|
guard pos < input.endIndex else {
|
||||||
pos += 1
|
return CharacterReader.EOF
|
||||||
|
}
|
||||||
|
let val = input[pos]
|
||||||
|
pos = input.index(after: pos)
|
||||||
return val
|
return val
|
||||||
}
|
}
|
||||||
|
|
||||||
public func unconsume() {
|
public func unconsume() {
|
||||||
pos -= 1
|
guard pos > input.startIndex else { return }
|
||||||
|
pos = input.index(before: pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func advance() {
|
public func advance() {
|
||||||
pos += 1
|
guard pos < input.endIndex else { return }
|
||||||
|
pos = input.index(after: pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func markPos() {
|
public func markPos() {
|
||||||
|
@ -62,221 +66,169 @@ public final class CharacterReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeAsString() -> String {
|
public func consumeAsString() -> String {
|
||||||
let p = pos
|
guard pos < input.endIndex else { return "" }
|
||||||
pos+=1
|
let str = String(input[pos])
|
||||||
return String(input[p])
|
pos = input.index(after: pos)
|
||||||
//return String(input, pos+=1, 1)
|
return str
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of characters between the current position and the next instance of the input char
|
* Locate the next occurrence of a Unicode scalar
|
||||||
* @param c scan target
|
|
||||||
* @return offset between current position and next instance of target. -1 if not found.
|
|
||||||
*/
|
|
||||||
public func nextIndexOf(_ c: UnicodeScalar) -> Int {
|
|
||||||
// doesn't handle scanning for surrogates
|
|
||||||
for i in pos..<length {
|
|
||||||
if (c == input[i]) {
|
|
||||||
return i - pos
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of characters between the current position and the next instance of the input sequence
|
|
||||||
*
|
*
|
||||||
* @param seq scan target
|
* - Parameter c: scan target
|
||||||
* @return offset between current position and next instance of target. -1 if not found.
|
* - Returns: offset between current position and next instance of target. -1 if not found.
|
||||||
*/
|
*/
|
||||||
public func nextIndexOf(_ seq: String) -> Int {
|
public func nextIndexOf(_ c: UnicodeScalar) -> String.UnicodeScalarView.Index? {
|
||||||
// doesn't handle scanning for surrogates
|
// doesn't handle scanning for surrogates
|
||||||
if(seq.isEmpty) {return -1}
|
return input[pos...].firstIndex(of: c)
|
||||||
let startChar: UnicodeScalar = seq.unicodeScalar(0)
|
|
||||||
for var offset in pos..<length {
|
|
||||||
// scan to first instance of startchar:
|
|
||||||
if (startChar != input[offset]) {
|
|
||||||
offset+=1
|
|
||||||
while(offset < length && startChar != input[offset]) { offset+=1 }
|
|
||||||
}
|
|
||||||
var i = offset + 1
|
|
||||||
let last = i + seq.unicodeScalars.count-1
|
|
||||||
if (offset < length && last <= length) {
|
|
||||||
var j = 1
|
|
||||||
while i < last && seq.unicodeScalar(j) == input[i] {
|
|
||||||
j+=1
|
|
||||||
i+=1
|
|
||||||
}
|
|
||||||
// found full sequence
|
|
||||||
if (i == last) {
|
|
||||||
return offset - pos
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locate the next occurence of a target string
|
||||||
|
*
|
||||||
|
* - Parameter seq: scan target
|
||||||
|
* - Returns: index of next instance of target. nil if not found.
|
||||||
|
*/
|
||||||
|
public func nextIndexOf(_ seq: String) -> String.UnicodeScalarView.Index? {
|
||||||
|
// doesn't handle scanning for surrogates
|
||||||
|
var start = pos
|
||||||
|
let targetScalars = seq.unicodeScalars
|
||||||
|
guard let firstChar = targetScalars.first else { return pos } // search for "" -> current place
|
||||||
|
MATCH: while true {
|
||||||
|
// Match on first scalar
|
||||||
|
guard let firstCharIx = input[start...].firstIndex(of: firstChar) else { return nil }
|
||||||
|
var current = firstCharIx
|
||||||
|
// Then manually match subsequent scalars
|
||||||
|
for scalar in targetScalars.dropFirst() {
|
||||||
|
current = input.index(after: current)
|
||||||
|
guard current < input.endIndex else { return nil }
|
||||||
|
if input[current] != scalar {
|
||||||
|
start = input.index(after: firstCharIx)
|
||||||
|
continue MATCH
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// full match; current is at position of last matching character
|
||||||
|
return firstCharIx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public func consumeTo(_ c: UnicodeScalar) -> String {
|
public func consumeTo(_ c: UnicodeScalar) -> String {
|
||||||
let offset = nextIndexOf(c)
|
guard let targetIx = nextIndexOf(c) else {
|
||||||
if (offset != -1) {
|
|
||||||
let consumed = cacheString(pos, offset)
|
|
||||||
pos += offset
|
|
||||||
return consumed
|
|
||||||
} else {
|
|
||||||
return consumeToEnd()
|
return consumeToEnd()
|
||||||
}
|
}
|
||||||
|
let consumed = cacheString(pos, targetIx)
|
||||||
|
pos = targetIx
|
||||||
|
return consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeTo(_ seq: String) -> String {
|
public func consumeTo(_ seq: String) -> String {
|
||||||
let offset = nextIndexOf(seq)
|
guard let targetIx = nextIndexOf(seq) else {
|
||||||
if (offset != -1) {
|
|
||||||
let consumed = cacheString(pos, offset)
|
|
||||||
pos += offset
|
|
||||||
return consumed
|
|
||||||
} else {
|
|
||||||
return consumeToEnd()
|
return consumeToEnd()
|
||||||
}
|
}
|
||||||
|
let consumed = cacheString(pos, targetIx)
|
||||||
|
pos = targetIx
|
||||||
|
return consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeToAny(_ chars: UnicodeScalar...) -> String {
|
public func consumeToAny(_ chars: UnicodeScalar...) -> String {
|
||||||
return consumeToAny(chars)
|
return consumeToAny(chars)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeToAny(_ chars: [UnicodeScalar]) -> String {
|
public func consumeToAny(_ chars: [UnicodeScalar]) -> String {
|
||||||
let start: Int = pos
|
let start = pos
|
||||||
let remaining: Int = length
|
while pos < input.endIndex {
|
||||||
let val = input
|
if chars.contains(input[pos]) {
|
||||||
OUTER: while (pos < remaining) {
|
break
|
||||||
if chars.contains(val[pos]) {
|
}
|
||||||
break OUTER
|
pos = input.index(after: pos)
|
||||||
}
|
|
||||||
// for c in chars {
|
|
||||||
// if (val[pos] == c){
|
|
||||||
// break OUTER
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
pos += 1
|
|
||||||
}
|
}
|
||||||
|
return cacheString(start, pos)
|
||||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String {
|
public func consumeToAnySorted(_ chars: UnicodeScalar...) -> String {
|
||||||
return consumeToAnySorted(chars)
|
return consumeToAny(chars)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String {
|
public func consumeToAnySorted(_ chars: [UnicodeScalar]) -> String {
|
||||||
let start = pos
|
return consumeToAny(chars)
|
||||||
let remaining = length
|
|
||||||
let val = input
|
|
||||||
|
|
||||||
while (pos < remaining) {
|
|
||||||
|
|
||||||
if chars.contains(val[pos]) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
pos += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static let dataTerminators: [UnicodeScalar] = [.Ampersand, .LessThan, TokeniserStateVars.nullScalr]
|
||||||
|
// read to &, <, or null
|
||||||
public func consumeData() -> String {
|
public func consumeData() -> String {
|
||||||
// &, <, null
|
return consumeToAny(CharacterReader.dataTerminators)
|
||||||
let start = pos
|
|
||||||
let remaining = length
|
|
||||||
let val = input
|
|
||||||
|
|
||||||
while (pos < remaining) {
|
|
||||||
let c: UnicodeScalar = val[pos]
|
|
||||||
if (c == UnicodeScalar.Ampersand || c == UnicodeScalar.LessThan || c == TokeniserStateVars.nullScalr) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
pos += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static let tagNameTerminators: [UnicodeScalar] = [.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr]
|
||||||
|
// read to '\t', '\n', '\r', '\f', ' ', '/', '>', or nullChar
|
||||||
public func consumeTagName() -> String {
|
public func consumeTagName() -> String {
|
||||||
// '\t', '\n', '\r', '\f', ' ', '/', '>', nullChar
|
return consumeToAny(CharacterReader.tagNameTerminators)
|
||||||
let start = pos
|
|
||||||
let remaining = length
|
|
||||||
let val = input
|
|
||||||
|
|
||||||
while (pos < remaining) {
|
|
||||||
let c: UnicodeScalar = val[pos]
|
|
||||||
if (c == UnicodeScalar.BackslashT || c == UnicodeScalar.BackslashN || c == UnicodeScalar.BackslashR || c == UnicodeScalar.BackslashF || c == UnicodeScalar.Space || c == UnicodeScalar.Slash || c == UnicodeScalar.GreaterThan || c == TokeniserStateVars.nullScalr) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
pos += 1
|
|
||||||
}
|
|
||||||
return pos > start ? cacheString(start, pos-start) : CharacterReader.empty
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeToEnd() -> String {
|
public func consumeToEnd() -> String {
|
||||||
let data = cacheString(pos, length-pos)
|
let consumed = cacheString(pos, input.endIndex)
|
||||||
pos = length
|
pos = input.endIndex
|
||||||
return data
|
return consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeLetterSequence() -> String {
|
public func consumeLetterSequence() -> String {
|
||||||
let start = pos
|
let start = pos
|
||||||
while (pos < length) {
|
while pos < input.endIndex {
|
||||||
let c: UnicodeScalar = input[pos]
|
let c = input[pos]
|
||||||
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
||||||
pos += 1
|
pos = input.index(after: pos)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cacheString(start, pos - start)
|
return cacheString(start, pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeLetterThenDigitSequence() -> String {
|
public func consumeLetterThenDigitSequence() -> String {
|
||||||
let start = pos
|
let start = pos
|
||||||
while (pos < length) {
|
while pos < input.endIndex {
|
||||||
let c = input[pos]
|
let c = input[pos]
|
||||||
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
if ((c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)) {
|
||||||
pos += 1
|
pos = input.index(after: pos)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (!isEmpty()) {
|
while pos < input.endIndex {
|
||||||
let c = input[pos]
|
let c = input[pos]
|
||||||
if (c >= "0" && c <= "9") {
|
if (c >= "0" && c <= "9") {
|
||||||
pos += 1
|
pos = input.index(after: pos)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return cacheString(start, pos)
|
||||||
return cacheString(start, pos - start)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeHexSequence() -> String {
|
public func consumeHexSequence() -> String {
|
||||||
let start = pos
|
let start = pos
|
||||||
while (pos < length) {
|
while pos < input.endIndex {
|
||||||
let c = input[pos]
|
let c = input[pos]
|
||||||
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) {
|
if ((c >= "0" && c <= "9") || (c >= "A" && c <= "F") || (c >= "a" && c <= "f")) {
|
||||||
pos+=1
|
pos = input.index(after: pos)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cacheString(start, pos - start)
|
return cacheString(start, pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func consumeDigitSequence() -> String {
|
public func consumeDigitSequence() -> String {
|
||||||
let start = pos
|
let start = pos
|
||||||
while (pos < length) {
|
while pos < input.endIndex {
|
||||||
let c = input[pos]
|
let c = input[pos]
|
||||||
if (c >= "0" && c <= "9") {
|
if (c >= "0" && c <= "9") {
|
||||||
pos+=1
|
pos = input.index(after: pos)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cacheString(start, pos - start)
|
return cacheString(start, pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matches(_ c: UnicodeScalar) -> Bool {
|
public func matches(_ c: UnicodeScalar) -> Bool {
|
||||||
|
@ -284,180 +236,85 @@ public final class CharacterReader {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matches(_ seq: String) -> Bool {
|
public func matches(_ seq: String, ignoreCase: Bool = false, consume: Bool = false) -> Bool {
|
||||||
let scanLength = seq.unicodeScalars.count
|
var current = pos
|
||||||
if (scanLength > length - pos) {
|
let scalars = seq.unicodeScalars
|
||||||
return false
|
for scalar in scalars {
|
||||||
}
|
guard current < input.endIndex else { return false }
|
||||||
|
if ignoreCase {
|
||||||
for offset in 0..<scanLength {
|
guard input[current].uppercase == scalar.uppercase else { return false }
|
||||||
if (seq.unicodeScalar(offset) != input[pos+offset]) {
|
} else {
|
||||||
return false
|
guard input[current] == scalar else { return false }
|
||||||
}
|
}
|
||||||
|
current = input.index(after: current)
|
||||||
|
}
|
||||||
|
if consume {
|
||||||
|
pos = current
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matchesIgnoreCase(_ seq: String ) -> Bool {
|
public func matchesIgnoreCase(_ seq: String ) -> Bool {
|
||||||
|
return matches(seq, ignoreCase: true)
|
||||||
let scanLength = seq.unicodeScalars.count
|
|
||||||
if(scanLength == 0) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if (scanLength > length - pos) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
for offset in 0..<scanLength {
|
|
||||||
let upScan: UnicodeScalar = seq.unicodeScalar(offset).uppercase
|
|
||||||
let upTarget: UnicodeScalar = input[pos+offset].uppercase
|
|
||||||
if (upScan != upTarget) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matchesAny(_ seq: UnicodeScalar...) -> Bool {
|
public func matchesAny(_ seq: UnicodeScalar...) -> Bool {
|
||||||
if (isEmpty()) {
|
return matchesAny(seq)
|
||||||
return false
|
}
|
||||||
}
|
|
||||||
|
public func matchesAny(_ seq: [UnicodeScalar]) -> Bool {
|
||||||
let c: UnicodeScalar = input[pos]
|
guard pos < input.endIndex else { return false }
|
||||||
for seek in seq {
|
return seq.contains(input[pos])
|
||||||
if (seek == c) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool {
|
public func matchesAnySorted(_ seq: [UnicodeScalar]) -> Bool {
|
||||||
return !isEmpty() && seq.contains(input[pos])
|
return matchesAny(seq)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matchesLetter() -> Bool {
|
public func matchesLetter() -> Bool {
|
||||||
if (isEmpty()) {
|
guard pos < input.endIndex else { return false }
|
||||||
return false
|
let c = input[pos]
|
||||||
}
|
|
||||||
let c = input[pos]
|
|
||||||
return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)
|
return (c >= "A" && c <= "Z") || (c >= "a" && c <= "z") || c.isMemberOfCharacterSet(CharacterSet.letters)
|
||||||
}
|
}
|
||||||
|
|
||||||
public func matchesDigit() -> Bool {
|
public func matchesDigit() -> Bool {
|
||||||
if (isEmpty()) {
|
guard pos < input.endIndex else { return false }
|
||||||
return false
|
let c = input[pos]
|
||||||
}
|
return c >= "0" && c <= "9"
|
||||||
let c = input[pos]
|
|
||||||
return (c >= "0" && c <= "9")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@discardableResult
|
@discardableResult
|
||||||
public func matchConsume(_ seq: String) -> Bool {
|
public func matchConsume(_ seq: String) -> Bool {
|
||||||
if (matches(seq)) {
|
return matches(seq, consume: true)
|
||||||
pos += seq.unicodeScalars.count
|
|
||||||
return true
|
|
||||||
} else {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@discardableResult
|
@discardableResult
|
||||||
public func matchConsumeIgnoreCase(_ seq: String) -> Bool {
|
public func matchConsumeIgnoreCase(_ seq: String) -> Bool {
|
||||||
if (matchesIgnoreCase(seq)) {
|
return matches(seq, ignoreCase: true, consume: true)
|
||||||
pos += seq.unicodeScalars.count
|
|
||||||
return true
|
|
||||||
} else {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public func containsIgnoreCase(_ seq: String ) -> Bool {
|
public func containsIgnoreCase(_ seq: String ) -> Bool {
|
||||||
// used to check presence of </title>, </style>. only finds consistent case.
|
// used to check presence of </title>, </style>. only finds consistent case.
|
||||||
let loScan = seq.lowercased(with: Locale(identifier: "en"))
|
let loScan = seq.lowercased(with: Locale(identifier: "en"))
|
||||||
let hiScan = seq.uppercased(with: Locale(identifier: "eng"))
|
let hiScan = seq.uppercased(with: Locale(identifier: "eng"))
|
||||||
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1)
|
return nextIndexOf(loScan) != nil || nextIndexOf(hiScan) != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
public func toString() -> String {
|
public func toString() -> String {
|
||||||
return String(input[pos..<length])
|
return String(input[pos...])
|
||||||
//return String.unicodescalars(Array(input[pos..<length]))
|
|
||||||
//return input.string(pos, length - pos)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks.
|
* Originally intended as a caching mechanism for strings, but caching doesn't
|
||||||
* <p />
|
* seem to improve performance. Now just a stub.
|
||||||
* Simplistic, and on hash collisions just falls back to creating a new string, vs a full HashMap with Entry list.
|
|
||||||
* That saves both having to create objects as hash keys, and running through the entry list, at the expense of
|
|
||||||
* some more duplicates.
|
|
||||||
*/
|
*/
|
||||||
private func cacheString(_ start: Int, _ count: Int) -> String {
|
private func cacheString(_ start: String.UnicodeScalarView.Index, _ end: String.UnicodeScalarView.Index) -> String {
|
||||||
return String(input[start..<start+count])
|
return String(input[start..<end])
|
||||||
// Too Slow
|
|
||||||
// var cache: [String?] = stringCache
|
|
||||||
//
|
|
||||||
// // limit (no cache):
|
|
||||||
// if (count > CharacterReader.maxCacheLen) {
|
|
||||||
// return String(val[start..<start+count].flatMap { Character($0) })
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // calculate hash:
|
|
||||||
// var hash: Int = 0
|
|
||||||
// var offset = start
|
|
||||||
// for _ in 0..<count {
|
|
||||||
// let ch = val[offset].value
|
|
||||||
// hash = Int.addWithOverflow(Int.multiplyWithOverflow(31, hash).0, Int(ch)).0
|
|
||||||
// offset+=1
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // get from cache
|
|
||||||
// hash = abs(hash)
|
|
||||||
// let i = hash % cache.count
|
|
||||||
// let index: Int = abs(i) //Int(hash & Int(cache.count) - 1)
|
|
||||||
// var cached = cache[index]
|
|
||||||
//
|
|
||||||
// if (cached == nil) { // miss, add
|
|
||||||
// cached = String(val[start..<start+count].flatMap { Character($0) })
|
|
||||||
// //cached = val.string(start, count)
|
|
||||||
// cache[Int(index)] = cached
|
|
||||||
// } else { // hashcode hit, check equality
|
|
||||||
// if (rangeEquals(start, count, cached!)) { // hit
|
|
||||||
// return cached!
|
|
||||||
// } else { // hashcode conflict
|
|
||||||
// cached = String(val[start..<start+count].flatMap { Character($0) })
|
|
||||||
// //cached = val.string(start, count)
|
|
||||||
// cache[index] = cached // update the cache, as recently used strings are more likely to show up again
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return cached!
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// /**
|
|
||||||
// * Check if the value of the provided range equals the string.
|
|
||||||
// */
|
|
||||||
// public func rangeEquals(_ start: Int, _ count: Int, _ cached: String) -> Bool {
|
|
||||||
// if (count == cached.unicodeScalars.count) {
|
|
||||||
// var count = count
|
|
||||||
// let one = input
|
|
||||||
// var i = start
|
|
||||||
// var j = 0
|
|
||||||
// while (count != 0) {
|
|
||||||
// count -= 1
|
|
||||||
// if (one[i] != cached.unicodeScalar(j) ) {
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
// j += 1
|
|
||||||
// i += 1
|
|
||||||
// }
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extension CharacterReader: CustomDebugStringConvertible {
|
extension CharacterReader: CustomDebugStringConvertible {
|
||||||
public var debugDescription: String {
|
public var debugDescription: String {
|
||||||
return self.toString()
|
return toString()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,10 +53,12 @@ class CharacterReaderTest: XCTestCase {
|
||||||
XCTAssertEqual("e", r.consume())
|
XCTAssertEqual("e", r.consume())
|
||||||
XCTAssertTrue(r.isEmpty())
|
XCTAssertTrue(r.isEmpty())
|
||||||
|
|
||||||
XCTAssertEqual(CharacterReader.EOF, r.consume())
|
// Indexes beyond the end are not allowed in native indexing
|
||||||
r.unconsume()
|
//
|
||||||
XCTAssertTrue(r.isEmpty())
|
// XCTAssertEqual(CharacterReader.EOF, r.consume())
|
||||||
XCTAssertEqual(CharacterReader.EOF, r.current())
|
// r.unconsume()
|
||||||
|
// XCTAssertTrue(r.isEmpty())
|
||||||
|
// XCTAssertEqual(CharacterReader.EOF, r.current())
|
||||||
}
|
}
|
||||||
|
|
||||||
func testMark() {
|
func testMark() {
|
||||||
|
@ -82,31 +84,31 @@ class CharacterReaderTest: XCTestCase {
|
||||||
let input = "blah blah"
|
let input = "blah blah"
|
||||||
let r = CharacterReader(input)
|
let r = CharacterReader(input)
|
||||||
|
|
||||||
XCTAssertEqual(-1, r.nextIndexOf("x"))
|
XCTAssertEqual(nil, r.nextIndexOf("x"))
|
||||||
XCTAssertEqual(3, r.nextIndexOf("h"))
|
XCTAssertEqual(input.index(input.startIndex, offsetBy: 3), r.nextIndexOf("h"))
|
||||||
let pull = r.consumeTo("h")
|
let pull = r.consumeTo("h")
|
||||||
XCTAssertEqual("bla", pull)
|
XCTAssertEqual("bla", pull)
|
||||||
XCTAssertEqual("h", r.consume())
|
XCTAssertEqual("h", r.consume())
|
||||||
XCTAssertEqual(2, r.nextIndexOf("l"))
|
XCTAssertEqual(input.index(input.startIndex, offsetBy: 6), r.nextIndexOf("l"))
|
||||||
XCTAssertEqual(" blah", r.consumeToEnd())
|
XCTAssertEqual(" blah", r.consumeToEnd())
|
||||||
XCTAssertEqual(-1, r.nextIndexOf("x"))
|
XCTAssertEqual(nil, r.nextIndexOf("x"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testNextIndexOfString() {
|
func testNextIndexOfString() {
|
||||||
let input = "One Two something Two Three Four"
|
let input = "One Two something Two Three Four"
|
||||||
let r = CharacterReader(input)
|
let r = CharacterReader(input)
|
||||||
|
|
||||||
XCTAssertEqual(-1, r.nextIndexOf("Foo"))
|
XCTAssertEqual(nil, r.nextIndexOf("Foo"))
|
||||||
XCTAssertEqual(4, r.nextIndexOf("Two"))
|
XCTAssertEqual(input.index(input.startIndex, offsetBy: 4), r.nextIndexOf("Two"))
|
||||||
XCTAssertEqual("One Two ", r.consumeTo("something"))
|
XCTAssertEqual("One Two ", r.consumeTo("something"))
|
||||||
XCTAssertEqual(10, r.nextIndexOf("Two"))
|
XCTAssertEqual(input.index(input.startIndex, offsetBy: 18), r.nextIndexOf("Two"))
|
||||||
XCTAssertEqual("something Two Three Four", r.consumeToEnd())
|
XCTAssertEqual("something Two Three Four", r.consumeToEnd())
|
||||||
XCTAssertEqual(-1, r.nextIndexOf("Two"))
|
XCTAssertEqual(nil, r.nextIndexOf("Two"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testNextIndexOfUnmatched() {
|
func testNextIndexOfUnmatched() {
|
||||||
let r = CharacterReader("<[[one]]")
|
let r = CharacterReader("<[[one]]")
|
||||||
XCTAssertEqual(-1, r.nextIndexOf("]]>"))
|
XCTAssertEqual(nil, r.nextIndexOf("]]>"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testConsumeToChar() {
|
func testConsumeToChar() {
|
||||||
|
|
Loading…
Reference in New Issue