201 lines
6.3 KiB
Swift
201 lines
6.3 KiB
Swift
//
|
|
// CharacterExt.swift
|
|
// SwifSoup
|
|
//
|
|
// Created by Nabil Chatbi on 08/10/16.
|
|
// Copyright © 2016 Nabil Chatbi.. All rights reserved.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
private let uppercaseSet = CharacterSet.uppercaseLetters
|
|
private let lowercaseSet = CharacterSet.lowercaseLetters
|
|
private let alphaSet = CharacterSet.letters
|
|
private let alphaNumericSet = CharacterSet.alphanumerics
|
|
private let symbolSet = CharacterSet.symbols
|
|
private let digitSet = CharacterSet.decimalDigits
|
|
|
|
extension Character {
|
|
|
|
public static let BackslashF : Character = Character(UnicodeScalar(12))
|
|
|
|
//http://www.unicode.org/glossary/#supplementary_code_point
|
|
public static let MIN_SUPPLEMENTARY_CODE_POINT : UInt32 = 0x010000;
|
|
|
|
/// The first `UnicodeScalar` of `self`.
|
|
var unicodeScalar: UnicodeScalar {
|
|
let unicodes = String(self).unicodeScalars
|
|
return unicodes[unicodes.startIndex]
|
|
}
|
|
|
|
/// True for any space character, and the control characters \t, \n, \r, \f, \v.
|
|
var isWhitespace: Bool {
|
|
|
|
switch self {
|
|
|
|
case " ", "\t", "\n", "\r", "\r\n", Character.BackslashF: return true
|
|
|
|
case "\u{000B}", "\u{000C}": return true // Form Feed, vertical tab
|
|
|
|
default: return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// True for any Unicode space character, and the control characters \t, \n, \r, \f, \v.
|
|
var isUnicodeSpace: Bool {
|
|
|
|
switch self {
|
|
|
|
case " ", "\t", "\n", "\r", "\r\n",Character.BackslashF: return true
|
|
|
|
case "\u{000C}", "\u{000B}", "\u{0085}": return true // Form Feed, vertical tab, next line (nel)
|
|
|
|
case "\u{00A0}", "\u{1680}", "\u{180E}": return true // No-break space, ogham space mark, mongolian vowel
|
|
|
|
case "\u{2000}"..."\u{200D}": return true // En quad, em quad, en space, em space, three-per-em space, four-per-em space, six-per-em space, figure space, ponctuation space, thin space, hair space, zero width space, zero width non-joiner, zero width joiner.
|
|
case "\u{2028}", "\u{2029}": return true // Line separator, paragraph separator.
|
|
|
|
case "\u{202F}", "\u{205F}", "\u{2060}", "\u{3000}", "\u{FEFF}": return true // Narrow no-break space, medium mathematical space, word joiner, ideographic space, zero width no-break space.
|
|
|
|
default: return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in the categories of Uppercase and Titlecase Letters.
|
|
var isUppercase: Bool {
|
|
|
|
return isMemberOfCharacterSet(uppercaseSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in the category of Lowercase Letters.
|
|
var isLowercase: Bool {
|
|
|
|
return isMemberOfCharacterSet(lowercaseSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in the categories of Letters and Marks.
|
|
var isAlpha: Bool {
|
|
|
|
return isMemberOfCharacterSet(alphaSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in th categories of Letters, Marks, and Numbers.
|
|
var isAlphaNumeric: Bool {
|
|
|
|
return isMemberOfCharacterSet(alphaNumericSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in the category of Symbols. These characters include, for example, the dollar sign ($) and the plus (+) sign.
|
|
var isSymbol: Bool {
|
|
|
|
return isMemberOfCharacterSet(symbolSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` normalized contains a single code unit that is in the category of Decimal Numbers.
|
|
var isDigit: Bool {
|
|
|
|
return isMemberOfCharacterSet(digitSet)
|
|
|
|
}
|
|
|
|
/// `true` if `self` is an ASCII decimal digit, i.e. between "0" and "9".
|
|
var isDecimalDigit: Bool {
|
|
|
|
return "0123456789".characters.contains(self)
|
|
|
|
}
|
|
|
|
/// `true` if `self` is an ASCII hexadecimal digit, i.e. "0"..."9", "a"..."f", "A"..."F".
|
|
var isHexadecimalDigit: Bool {
|
|
|
|
return "01234567890abcdefABCDEF".characters.contains(self)
|
|
|
|
}
|
|
|
|
/// `true` if `self` is an ASCII octal digit, i.e. between '0' and '7'.
|
|
var isOctalDigit: Bool {
|
|
|
|
return "01234567".characters.contains(self)
|
|
|
|
}
|
|
|
|
/// Lowercase `self`.
|
|
var lowercase: Character {
|
|
|
|
let str = String(self).lowercased()
|
|
return str[str.startIndex]
|
|
|
|
}
|
|
|
|
public func isChar(inSet set: CharacterSet) -> Bool {
|
|
var found = true
|
|
for ch in String(self).utf16 {
|
|
if !set.contains(UnicodeScalar(ch)!) { found = false }
|
|
}
|
|
return found
|
|
}
|
|
|
|
/// Uppercase `self`.
|
|
var uppercase: Character {
|
|
|
|
let str = String(self).uppercased()
|
|
return str[str.startIndex]
|
|
|
|
}
|
|
|
|
/// Return `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
|
|
///
|
|
/// - parameter set: The `NSCharacterSet` used to test for membership.
|
|
/// - returns: `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
|
|
func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool {
|
|
|
|
let normalized = String(self).precomposedStringWithCanonicalMapping
|
|
let unicodes = normalized.unicodeScalars
|
|
|
|
guard unicodes.count == 1 else { return false }
|
|
return set.contains(UnicodeScalar(unicodes.first!.value)!)
|
|
|
|
}
|
|
|
|
public static func convertFromIntegerLiteral(value: IntegerLiteralType) -> Character {
|
|
return Character(UnicodeScalar(value)!)
|
|
}
|
|
|
|
func unicodeScalarCodePoint() -> UInt32
|
|
{
|
|
return unicodeScalar.value
|
|
}
|
|
|
|
static func charCount(codePoint: UInt32) -> Int {
|
|
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
|
|
}
|
|
|
|
static func isLetter(_ char: Character)->Bool{
|
|
return char.isLetter()
|
|
}
|
|
func isLetter()->Bool{
|
|
return self.isMemberOfCharacterSet(CharacterSet.letters)
|
|
}
|
|
|
|
static func isLetterOrDigit(_ char: Character)->Bool
|
|
{
|
|
return char.isLetterOrDigit()
|
|
}
|
|
func isLetterOrDigit()->Bool
|
|
{
|
|
if(self.isLetter()) {return true}
|
|
return self.isDigit;
|
|
}
|
|
}
|
|
|
|
|