From cb49124b9c93e62ca474df72940492f722145d35 Mon Sep 17 00:00:00 2001 From: Simon Fairbairn Date: Fri, 20 Dec 2019 16:38:47 +1300 Subject: [PATCH] Adds better support for different repeating tag combinations and improves character style management --- README.md | 3 +- ...8451F51C-30BE-4B7B-ACFD-E9C42A8D0DC4.plist | 16 +- SwiftyMarkdown/SwiftyMarkdown.swift | 7 + SwiftyMarkdown/SwiftyTokeniser.swift | 213 +++++++++++++----- .../SwiftyMarkdownCharacterTests.swift | 56 ++++- .../SwiftyMarkdownPerformanceTests.swift | 8 + 6 files changed, 237 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 1897f39..9ae5128 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ if let url = Bundle.main.url(forResource: "file", withExtension: "md"), md = Swi } ``` -## Supported Features +## Supported Markdown Features *italics* or _italics_ **bold** or __bold__ @@ -57,7 +57,6 @@ if let url = Bundle.main.url(forResource: "file", withExtension: "md"), md = Swi Indented code blocks - ## Customisation ```swift diff --git a/SwiftyMarkdown.xcodeproj/xcshareddata/xcbaselines/F4CE988A1C8A921300D735C1.xcbaseline/8451F51C-30BE-4B7B-ACFD-E9C42A8D0DC4.plist b/SwiftyMarkdown.xcodeproj/xcshareddata/xcbaselines/F4CE988A1C8A921300D735C1.xcbaseline/8451F51C-30BE-4B7B-ACFD-E9C42A8D0DC4.plist index 837c83b..a7cbb00 100644 --- a/SwiftyMarkdown.xcodeproj/xcshareddata/xcbaselines/F4CE988A1C8A921300D735C1.xcbaseline/8451F51C-30BE-4B7B-ACFD-E9C42A8D0DC4.plist +++ b/SwiftyMarkdown.xcodeproj/xcshareddata/xcbaselines/F4CE988A1C8A921300D735C1.xcbaseline/8451F51C-30BE-4B7B-ACFD-E9C42A8D0DC4.plist @@ -11,9 +11,9 @@ com.apple.XCTPerformanceMetric_WallClockTime baselineAverage - 0.01 + 0.1 baselineIntegrationDisplayName - 20 Dec 2019 at 10:47:22 + Local Baseline maxPercentRelativeStandardDeviation 5 @@ -23,13 +23,23 @@ com.apple.XCTPerformanceMetric_WallClockTime baselineAverage - 0.001 + 0.01 baselineIntegrationDisplayName Local Baseline maxPercentRelativeStandardDeviation 5 + testThatVeryLongStringsAreProcessedQuickly() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.0392 + baselineIntegrationDisplayName + Local Baseline + + diff --git a/SwiftyMarkdown/SwiftyMarkdown.swift b/SwiftyMarkdown/SwiftyMarkdown.swift index fbf2724..c62686c 100644 --- a/SwiftyMarkdown/SwiftyMarkdown.swift +++ b/SwiftyMarkdown/SwiftyMarkdown.swift @@ -19,6 +19,13 @@ enum CharacterStyle : CharacterStyling { case code case link case image + + func isEqualTo(_ other: CharacterStyling) -> Bool { + guard let other = other as? CharacterStyle else { + return false + } + return other == self + } } enum MarkdownLineStyle : LineStyling { diff --git a/SwiftyMarkdown/SwiftyTokeniser.swift b/SwiftyMarkdown/SwiftyTokeniser.swift index 94ce65f..d1dee34 100644 --- a/SwiftyMarkdown/SwiftyTokeniser.swift +++ b/SwiftyMarkdown/SwiftyTokeniser.swift @@ -16,7 +16,7 @@ extension OSLog { // Tag definition public protocol CharacterStyling { - + func isEqualTo( _ other : CharacterStyling ) -> Bool } @@ -87,7 +87,7 @@ public struct Token { get { switch self.type { case .repeatingTag: - if count == 0 { + if count <= 0 { return "" } else { let range = inputString.startIndex.. [Token] { guard rules.count > 0 else { return [Token(type: .string, inputString: inputString)] @@ -138,11 +150,13 @@ public class SwiftyTokeniser { var currentTokens : [Token] = [] var mutableRules = self.rules + + + while !mutableRules.isEmpty { let nextRule = mutableRules.removeFirst() os_log("------------------------------", log: .tokenising, type: .info) os_log("RULE: %@", log: OSLog.tokenising, type:.info , nextRule.description) - os_log("Applying rule to : %@", log: OSLog.tokenising, type:.info , currentTokens.oslogDisplay ) if currentTokens.isEmpty { // This means it's the first time through @@ -159,13 +173,10 @@ public class SwiftyTokeniser { isOuter = false } if nextToken.type == .closeTag { - let ref = UUID().uuidString outerStringTokens.append(Token(type: .replacement, inputString: ref)) - innerStringTokens.append(nextToken) self.replacements[ref] = self.handleReplacementTokens(innerStringTokens, with: nextRule) - innerStringTokens.removeAll() isOuter = true continue @@ -187,7 +198,12 @@ public class SwiftyTokeniser { finalTokens.append(repToken) continue } - repToken.characterStyles.append(contentsOf: token.characterStyles) + for style in token.characterStyles { + if !repToken.characterStyles.contains(where: { $0.isEqualTo(style)}) { + repToken.characterStyles.append(contentsOf: token.characterStyles) + } + } + finalTokens.append(repToken) } } @@ -199,23 +215,38 @@ public class SwiftyTokeniser { // The one string token might then be exploded into multiple more tokens } - - - os_log("Final output: %@", log: .tokenising, type: .info, currentTokens.oslogDisplay) os_log("=====RULE PROCESSING COMPLETE=====", log: .tokenising, type: .info) os_log("==================================", log: .tokenising, type: .info) return currentTokens } - func scanReplacements(_ replacements : [Token], in token : Token ) -> [Token] { - guard !token.outputString.isEmpty && !replacements.isEmpty else { - return [token] + + + /// In order to reinsert the original replacements into the new string token, the replacements + /// need to be searched for in the incoming string one by one. + /// + /// Using the `newToken(fromSubstring:isReplacement:)` function ensures that any metadata and character styles + /// are passed over into the newly created tokens. + /// + /// E.g. A string token that has an `outputString` of "This string AAAAA-BBBBB-CCCCC replacements", with + /// a characterStyle of `bold` for the entire string, needs to be separated into the following tokens: + /// + /// - `string`: "This string " + /// - `replacement`: "AAAAA-BBBBB-CCCCC" + /// - `string`: " replacements" + /// + /// Each of these need to have a character style of `bold`. + /// + /// - Parameters: + /// - replacements: An array of `replacement` tokens + /// - token: The new `string` token that may contain replacement IDs contained in the `replacements` array + func reinsertReplacements(_ replacements : [Token], from stringToken : Token ) -> [Token] { + guard !stringToken.outputString.isEmpty && !replacements.isEmpty else { + return [stringToken] } - - var outputTokens : [Token] = [] - let scanner = Scanner(string: token.outputString) + let scanner = Scanner(string: stringToken.outputString) scanner.charactersToBeSkipped = nil var repTokens = replacements while !scanner.isAtEnd { @@ -228,12 +259,12 @@ public class SwiftyTokeniser { if #available(iOS 13.0, *) { if let nextString = scanner.scanUpToString(testString) { outputString = nextString - outputTokens.append(token.newToken(fromSubstring: outputString, isReplacement: false)) + outputTokens.append(stringToken.newToken(fromSubstring: outputString, isReplacement: false)) if let outputToken = scanner.scanString(testString) { - outputTokens.append(token.newToken(fromSubstring: outputToken, isReplacement: true)) + outputTokens.append(stringToken.newToken(fromSubstring: outputToken, isReplacement: true)) } } else if let outputToken = scanner.scanString(testString) { - outputTokens.append(token.newToken(fromSubstring: outputToken, isReplacement: true)) + outputTokens.append(stringToken.newToken(fromSubstring: outputToken, isReplacement: true)) } } else { var oldString : NSString? = nil @@ -241,15 +272,15 @@ public class SwiftyTokeniser { scanner.scanUpTo(testString, into: &oldString) if let nextString = oldString { outputString = nextString as String - outputTokens.append(token.newToken(fromSubstring: outputString, isReplacement: false)) + outputTokens.append(stringToken.newToken(fromSubstring: outputString, isReplacement: false)) scanner.scanString(testString, into: &tokenString) if let outputToken = tokenString as String? { - outputTokens.append(token.newToken(fromSubstring: outputToken, isReplacement: true)) + outputTokens.append(stringToken.newToken(fromSubstring: outputToken, isReplacement: true)) } } else { scanner.scanString(testString, into: &tokenString) if let outputToken = tokenString as String? { - outputTokens.append(token.newToken(fromSubstring: outputToken, isReplacement: true)) + outputTokens.append(stringToken.newToken(fromSubstring: outputToken, isReplacement: true)) } } } @@ -257,6 +288,44 @@ public class SwiftyTokeniser { return outputTokens } + + /// This function is necessary because a previously tokenised string might have + /// + /// Consider a previously tokenised string, where AAAAA-BBBBB-CCCCC represents a replaced \[link\](url) instance. + /// + /// The incoming tokens will look like this: + /// + /// - `string`: "A \*\*Bold" + /// - `replacement` : "AAAAA-BBBBB-CCCCC" + /// - `string`: " with a trailing string**" + /// + /// However, because the scanner can only tokenise individual strings, passing in the string values + /// of these tokens individually and applying the styles will not correctly detect the starting and + /// ending `repeatingTag` instances. (e.g. the scanner will see "A \*\*Bold", and then "AAAAA-BBBBB-CCCCC", + /// and finally " with a trailing string\*\*") + /// + /// The strings need to be combined, so that they form a single string: + /// A \*\*Bold AAAAA-BBBBB-CCCCC with a trailing string\*\*. + /// This string is then parsed and tokenised so that it looks like this: + /// + /// - `string`: "A " + /// - `repeatingTag`: "\*\*" + /// - `string`: "Bold AAAAA-BBBBB-CCCCC with a trailing string" + /// - `repeatingTag`: "\*\*" + /// + /// Finally, the replacements from the original incoming token array are searched for and pulled out + /// of this new string, so the final result looks like this: + /// + /// - `string`: "A " + /// - `repeatingTag`: "\*\*" + /// - `string`: "Bold " + /// - `replacement`: "AAAAA-BBBBB-CCCCC" + /// - `string`: " with a trailing string" + /// - `repeatingTag`: "\*\*" + /// + /// - Parameters: + /// - tokens: The tokens to be combined, scanned, re-tokenised, and merged + /// - rule: The character rule currently being applied func scanReplacementTokens( _ tokens : [Token], with rule : CharacterRule ) -> [Token] { guard tokens.count > 0 else { return [] @@ -267,6 +336,9 @@ public class SwiftyTokeniser { let nextTokens = self.scan(combinedString, with: rule) var replacedTokens = self.applyStyles(to: nextTokens, usingRule: rule) + /// It's necessary here to check to see if the first token (which will always represent the styles + /// to be applied from previous scans) has any existing metadata or character styles and apply them + /// to *all* the string and replacement tokens found by the new scan. for idx in 0.. [Token] { - // Online combine string and replacements that are next to each other. - os_log("Handling replacements: %@", log: .tokenising, type: .info, incomingTokens.oslogDisplay) - + // Only combine string and replacements that are next to each other. var newTokenSet : [Token] = [] var currentTokenSet : [Token] = [] for i in 0.. 0 else { + return + } + + let startIdx = index + var endIdx : Int? = nil + + let maxCount = (theToken.count > rule.maxTags) ? rule.maxTags : theToken.count + if let nextTokenIdx = tokens.firstIndex(where: { $0.inputString.first == theToken.inputString.first && $0.type == theToken.type && $0.count >= 1 && $0.id != theToken.id && !$0.isProcessed }) { + endIdx = nextTokenIdx + } + guard let existentEnd = endIdx else { + return + } + + + let styles : [CharacterStyling] = rule.styles[maxCount] ?? [] + for i in startIdx.. rule.maxTags) ? rule.maxTags : tokens[existentEnd].count + tokens[index].count = theToken.count - maxEnd + tokens[existentEnd].count = tokens[existentEnd].count - maxEnd + if maxEnd < rule.maxTags { + self.handleClosingTagFromRepeatingTag(withIndex: index, in: &tokens, following: rule) + } else { + tokens[existentEnd].isProcessed = true + tokens[index].isProcessed = true + } + + + } func applyStyles( to tokens : [Token], usingRule rule : CharacterRule ) -> [Token] { var mutableTokens : [Token] = tokens @@ -375,34 +499,7 @@ public class SwiftyTokeniser { case .escape: os_log("Found escape: %@", log: .tokenising, type: .info, token.inputString ) case .repeatingTag: - let theToken = mutableTokens[idx] - os_log("Found repeating tag with tag count: %i, tags: %@, current rule open tag: %@", log: .tokenising, type: .info, theToken.count, theToken.inputString, rule.openTag ) - - guard theToken.count > 0 else { - continue - } - - let startIdx = idx - var endIdx : Int? = nil - - if let nextTokenIdx = mutableTokens.firstIndex(where: { $0.inputString == theToken.inputString && $0.type == theToken.type && $0.count == theToken.count && $0.id != theToken.id }) { - endIdx = nextTokenIdx - } - guard let existentEnd = endIdx else { - continue - } - - let styles : [CharacterStyling] = rule.styles[theToken.count] ?? [] - for i in startIdx..