Use Swift built-in Unicode Codecs
This commit is contained in:
parent
4063a440e5
commit
29d0f2b091
|
@ -11,10 +11,10 @@
|
|||
0E0F160A1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
|
||||
0E0F160B1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
|
||||
0E0F160C1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
|
||||
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
|
||||
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
|
||||
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
|
||||
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
|
||||
0E0F160E1D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
|
||||
0E0F160F1D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
|
||||
0E0F16101D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
|
||||
0E0F16111D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
|
||||
0E7E8C8C1D0BC7BB0057A1C1 /* CSV.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */; };
|
||||
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
|
||||
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
|
||||
|
@ -83,7 +83,7 @@
|
|||
|
||||
/* Begin PBXFileReference section */
|
||||
0E0F16081D197D6000C92580 /* AnyIterator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyIterator.swift; sourceTree = "<group>"; };
|
||||
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "String.Encoding+endian.swift"; sourceTree = "<group>"; };
|
||||
0E0F160D1D197DB800C92580 /* Endian.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Endian.swift; sourceTree = "<group>"; };
|
||||
0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = CSV.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
0E7E8C8B1D0BC7BB0057A1C1 /* CSVTests-iOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "CSVTests-iOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CSV.swift; sourceTree = "<group>"; };
|
||||
|
@ -194,8 +194,8 @@
|
|||
0E9317D81D0DB30800AC20A0 /* CSV+subscript.swift */,
|
||||
0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */,
|
||||
0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */,
|
||||
0E0F160D1D197DB800C92580 /* Endian.swift */,
|
||||
0E7E8CAC1D0BC8610057A1C1 /* Info.plist */,
|
||||
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */,
|
||||
0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */,
|
||||
);
|
||||
path = Sources;
|
||||
|
@ -502,7 +502,7 @@
|
|||
0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
|
||||
0E9317DA1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
|
||||
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */,
|
||||
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
|
||||
0E0F160F1D197DB800C92580 /* Endian.swift in Sources */,
|
||||
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */,
|
||||
0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */,
|
||||
);
|
||||
|
@ -527,7 +527,7 @@
|
|||
0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
|
||||
0E9317DC1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
|
||||
0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */,
|
||||
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
|
||||
0E0F16111D197DB800C92580 /* Endian.swift in Sources */,
|
||||
0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */,
|
||||
0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */,
|
||||
);
|
||||
|
@ -542,7 +542,7 @@
|
|||
0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
|
||||
0E9317D91D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
|
||||
0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */,
|
||||
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
|
||||
0E0F160E1D197DB800C92580 /* Endian.swift in Sources */,
|
||||
0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */,
|
||||
0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */,
|
||||
);
|
||||
|
@ -567,7 +567,7 @@
|
|||
0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
|
||||
0E9317DB1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
|
||||
0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */,
|
||||
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
|
||||
0E0F16101D197DB800C92580 /* Endian.swift in Sources */,
|
||||
0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */,
|
||||
0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */,
|
||||
);
|
||||
|
|
|
@ -8,26 +8,32 @@
|
|||
|
||||
import Foundation
|
||||
|
||||
internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (String.Encoding, Int)? {
|
||||
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
|
||||
internal let utf16BigEndianBOM: [UInt8] = [0xfe, 0xff]
|
||||
internal let utf16LittleEndianBOM: [UInt8] = [0xff, 0xfe]
|
||||
internal let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xfe, 0xff]
|
||||
internal let utf32LittleEndianBOM: [UInt8] = [0xff, 0xfe, 0x00, 0x00]
|
||||
|
||||
internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (Endian, Int)? {
|
||||
if length >= 4 {
|
||||
if memcmp(buffer, utf32BigEndianBOM, 4) == 0 {
|
||||
return (String.Encoding.utf32BigEndian, 4)
|
||||
return (.big, 4)
|
||||
}
|
||||
if memcmp(buffer, utf32LittleEndianBOM, 4) == 0 {
|
||||
return (String.Encoding.utf32LittleEndian, 4)
|
||||
return (.little, 4)
|
||||
}
|
||||
}
|
||||
if length >= 3 {
|
||||
if memcmp(buffer, utf8BOM, 3) == 0 {
|
||||
return (String.Encoding.utf8, 3)
|
||||
return (.unknown, 3)
|
||||
}
|
||||
}
|
||||
if length >= 2 {
|
||||
if memcmp(buffer, utf16BigEndianBOM, 2) == 0 {
|
||||
return (String.Encoding.utf16BigEndian, 2)
|
||||
return (.big, 2)
|
||||
}
|
||||
if memcmp(buffer, utf16LittleEndianBOM, 2) == 0 {
|
||||
return (String.Encoding.utf16LittleEndian, 2)
|
||||
return (.little, 2)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -36,15 +42,15 @@ internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (String.Enco
|
|||
internal class BinaryReader {
|
||||
|
||||
private let stream: InputStream
|
||||
private let encoding: String.Encoding
|
||||
private let endian: Endian
|
||||
private let closeOnDeinit: Bool
|
||||
|
||||
private var buffer = [UInt8].init(repeating: 0, count: 4)
|
||||
private let bufferSize = 4
|
||||
private var bufferOffset = 0
|
||||
|
||||
internal init(stream: InputStream, encoding: String.Encoding = .utf8, closeOnDeinit: Bool = true) {
|
||||
var encoding = encoding
|
||||
internal init(stream: InputStream, endian: Endian = .unknown, closeOnDeinit: Bool = true) {
|
||||
var endian = endian
|
||||
|
||||
if stream.streamStatus == .notOpen {
|
||||
stream.open()
|
||||
|
@ -52,12 +58,12 @@ internal class BinaryReader {
|
|||
|
||||
let readCount = stream.read(&buffer, maxLength: bufferSize)
|
||||
if let (e, l) = readBOM(buffer: &buffer, length: readCount) {
|
||||
encoding = e
|
||||
endian = e
|
||||
bufferOffset = l
|
||||
}
|
||||
|
||||
self.stream = stream
|
||||
self.encoding = encoding
|
||||
self.endian = endian
|
||||
self.closeOnDeinit = closeOnDeinit
|
||||
}
|
||||
|
||||
|
@ -116,7 +122,7 @@ internal class BinaryReader {
|
|||
throw NSError(domain: "", code: 0, userInfo: nil)
|
||||
}
|
||||
let tmp = UnsafeMutablePointer<UInt16>(buffer)
|
||||
switch encoding.endian {
|
||||
switch endian {
|
||||
case .big:
|
||||
return CFSwapInt16BigToHost(tmp[0])
|
||||
case .little:
|
||||
|
@ -139,7 +145,7 @@ internal class BinaryReader {
|
|||
throw NSError(domain: "", code: 0, userInfo: nil)
|
||||
}
|
||||
let tmp = UnsafeMutablePointer<UInt32>(buffer)
|
||||
switch encoding.endian {
|
||||
switch endian {
|
||||
case .big:
|
||||
return CFSwapInt32BigToHost(tmp[0])
|
||||
case .little:
|
||||
|
|
|
@ -11,31 +11,18 @@ import Foundation
|
|||
extension CSV {
|
||||
|
||||
public init(
|
||||
fileAtPath path: String,
|
||||
encoding: String.Encoding = defaultEncoding,
|
||||
stream: InputStream,
|
||||
hasHeaderRow: Bool = defaultHasHeaderRow,
|
||||
delimiter: UnicodeScalar = defaultDelimiter)
|
||||
throws
|
||||
{
|
||||
guard let stream = InputStream(fileAtPath: path) else {
|
||||
throw CSVError.StreamError
|
||||
}
|
||||
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
}
|
||||
|
||||
public init(
|
||||
url: URL,
|
||||
encoding: String.Encoding = defaultEncoding,
|
||||
hasHeaderRow: Bool = defaultHasHeaderRow,
|
||||
delimiter: UnicodeScalar = defaultDelimiter)
|
||||
throws
|
||||
{
|
||||
guard let stream = InputStream(url: url) else {
|
||||
throw CSVError.StreamError
|
||||
}
|
||||
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
}
|
||||
}
|
||||
|
||||
extension CSV {
|
||||
|
||||
public init(
|
||||
string: String,
|
||||
hasHeaderRow: Bool = defaultHasHeaderRow,
|
||||
|
|
|
@ -12,16 +12,9 @@ private let LF = "\n".unicodeScalars.first!
|
|||
private let CR = "\r".unicodeScalars.first!
|
||||
private let DQUOTE = "\"".unicodeScalars.first!
|
||||
|
||||
internal let defaultEncoding: String.Encoding = .utf8
|
||||
internal let defaultHasHeaderRow = false
|
||||
internal let defaultDelimiter = ",".unicodeScalars.first!
|
||||
|
||||
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
|
||||
internal let utf16BigEndianBOM: [UInt8] = [0xfe, 0xff]
|
||||
internal let utf16LittleEndianBOM: [UInt8] = [0xff, 0xfe]
|
||||
internal let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xfe, 0xff]
|
||||
internal let utf32LittleEndianBOM: [UInt8] = [0xff, 0xfe, 0x00, 0x00]
|
||||
|
||||
public struct CSV: IteratorProtocol, Sequence {
|
||||
|
||||
private var iterator: AnyIterator<UnicodeScalar>
|
||||
|
@ -37,6 +30,14 @@ public struct CSV: IteratorProtocol, Sequence {
|
|||
public var headerRow: [String]? { return _headerRow }
|
||||
private var _headerRow: [String]? = nil
|
||||
|
||||
/**
|
||||
Create CSV instance with `NSInputStream`.
|
||||
|
||||
- parameter stream: An `NSInputStream` object. If the stream is not open, initializer opens automatically.
|
||||
- parameter encoding: The character encoding for `stream`. Default: `NSUTF8StringEncoding`.
|
||||
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
|
||||
- parameter delimiter: Default: `","`.
|
||||
*/
|
||||
internal init<T: IteratorProtocol where T.Element == UnicodeScalar>(
|
||||
iterator: T,
|
||||
hasHeaderRow: Bool,
|
||||
|
@ -48,12 +49,12 @@ public struct CSV: IteratorProtocol, Sequence {
|
|||
|
||||
if hasHeaderRow {
|
||||
guard let headerRow = next() else {
|
||||
throw CSVError.HeaderReadError
|
||||
throw CSVError.headerReadError
|
||||
}
|
||||
_headerRow = headerRow
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public init<T: UnicodeCodec where T.CodeUnit == UInt8>(
|
||||
stream: InputStream,
|
||||
codecType: T.Type,
|
||||
|
@ -61,51 +62,37 @@ public struct CSV: IteratorProtocol, Sequence {
|
|||
delimiter: UnicodeScalar = defaultDelimiter)
|
||||
throws
|
||||
{
|
||||
let reader = BinaryReader(stream: stream, encoding: .utf8, closeOnDeinit: true)
|
||||
let reader = BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true)
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncoding: codecType)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
}
|
||||
|
||||
/**
|
||||
Create CSV instance with `NSInputStream`.
|
||||
|
||||
- parameter stream: An `NSInputStream` object. If the stream is not open, initializer opens automatically.
|
||||
- parameter encoding: The character encoding for `stream`. Default: `NSUTF8StringEncoding`.
|
||||
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
|
||||
- parameter delimiter: Default: `","`.
|
||||
*/
|
||||
public init(
|
||||
|
||||
public init<T: UnicodeCodec where T.CodeUnit == UInt16>(
|
||||
stream: InputStream,
|
||||
encoding: String.Encoding = defaultEncoding,
|
||||
codecType: T.Type,
|
||||
endian: Endian = .big,
|
||||
hasHeaderRow: Bool = defaultHasHeaderRow,
|
||||
delimiter: UnicodeScalar = defaultDelimiter)
|
||||
throws
|
||||
{
|
||||
let reader = BinaryReader(stream: stream, encoding: encoding, closeOnDeinit: true)
|
||||
|
||||
switch encoding {
|
||||
case String.Encoding.utf32,
|
||||
String.Encoding.utf32BigEndian,
|
||||
String.Encoding.utf32LittleEndian:
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncoding: UTF32.self)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
|
||||
case String.Encoding.utf16,
|
||||
String.Encoding.utf16BigEndian,
|
||||
String.Encoding.utf16LittleEndian:
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncoding: UTF16.self)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
|
||||
case String.Encoding.utf8,
|
||||
String.Encoding.ascii:
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncoding: UTF8.self)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
|
||||
default:
|
||||
throw CSVError.StringEncodingMismatch
|
||||
}
|
||||
let reader = BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncoding: codecType)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
}
|
||||
|
||||
|
||||
public init<T: UnicodeCodec where T.CodeUnit == UInt32>(
|
||||
stream: InputStream,
|
||||
codecType: T.Type,
|
||||
endian: Endian = .big,
|
||||
hasHeaderRow: Bool = defaultHasHeaderRow,
|
||||
delimiter: UnicodeScalar = defaultDelimiter)
|
||||
throws
|
||||
{
|
||||
let reader = BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
|
||||
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncoding: codecType)
|
||||
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
|
||||
}
|
||||
|
||||
// MARK: IteratorProtocol
|
||||
|
||||
/// Advances and returns the next element of the underlying sequence, or
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
import Foundation
|
||||
|
||||
public enum CSVError: ErrorProtocol {
|
||||
case ParameterError
|
||||
case StreamError
|
||||
case HeaderReadError
|
||||
case MemoryAllocationFailed
|
||||
case StringEncodingMismatch
|
||||
// case parameterError
|
||||
// case streamError
|
||||
case headerReadError
|
||||
// case memoryAllocationFailed
|
||||
// case stringEncodingMismatch
|
||||
}
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
//
|
||||
// Endian.swift
|
||||
// CSV
|
||||
//
|
||||
// Created by Yasuhiro Hatta on 2016/06/21.
|
||||
// Copyright © 2016年 yaslab. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum Endian {
|
||||
case big
|
||||
case little
|
||||
case unknown
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
//
|
||||
// String.Encoding+endian.swift
|
||||
// CSV
|
||||
//
|
||||
// Created by Yasuhiro Hatta on 2016/06/21.
|
||||
// Copyright © 2016年 yaslab. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
internal enum Endian {
|
||||
case big
|
||||
case little
|
||||
case unknown
|
||||
}
|
||||
|
||||
extension String.Encoding {
|
||||
|
||||
internal var endian: Endian {
|
||||
switch self {
|
||||
case String.Encoding.utf16: return .big
|
||||
case String.Encoding.utf16BigEndian: return .big
|
||||
case String.Encoding.utf16LittleEndian: return .little
|
||||
case String.Encoding.utf32: return .big
|
||||
case String.Encoding.utf32BigEndian: return .big
|
||||
case String.Encoding.utf32LittleEndian: return .little
|
||||
default: return .unknown
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -13,69 +13,60 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func test1Line() {
|
||||
let csv = "abab,cdcd,efef"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
|
||||
}
|
||||
|
||||
func testQuoted() {
|
||||
let csv = "abab,\"cdcd\",efef"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
|
||||
}
|
||||
|
||||
func testLF() {
|
||||
let csv = "abab,cdcd,efef\nzxcv,asdf,qwer"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
|
||||
}
|
||||
|
||||
func testCommaInQuotationMarks() {
|
||||
let csv = "abab,\"cd,cd\",efef"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cd,cd", "efef"])
|
||||
}
|
||||
|
||||
func testCRLF() {
|
||||
let csv = "abab,cdcd,efef\r\nzxcv,asdf,qwer"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
|
||||
}
|
||||
|
||||
func testEscapedQuotationMark() {
|
||||
func testEscapedQuotationMark1() {
|
||||
let csv = "abab,\"\"\"cdcd\",efef\r\nzxcv,asdf,qwer"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "\"cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
|
||||
}
|
||||
|
||||
func testQuotationMark2() {
|
||||
func testEscapedQuotationMark2() {
|
||||
let csv = "abab,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\""
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er"])
|
||||
}
|
||||
|
||||
func testEmptyField() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testLastCR() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\r"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
|
@ -83,8 +74,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLastCRLF() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\r\n"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
|
@ -92,8 +82,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLastLF() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\n"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
|
@ -101,8 +90,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLFInQuotationMarks() {
|
||||
let csv = "abab,,\"\rcdcd\n\",efef\r\nzxcv,asdf,\"qw\"\"er\",\n"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["abab", "", "\rcdcd\n", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
|
@ -110,8 +98,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakLF() {
|
||||
let csv = "qwe,asd\nzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], ["zxc", "rty"])
|
||||
|
@ -119,8 +106,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakCR() {
|
||||
let csv = "qwe,asd\rzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], ["zxc", "rty"])
|
||||
|
@ -128,8 +114,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakCRLF() {
|
||||
let csv = "qwe,asd\r\nzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 2)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], ["zxc", "rty"])
|
||||
|
@ -137,8 +122,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakLFLF() {
|
||||
let csv = "qwe,asd\n\nzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 3)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], [""])
|
||||
|
@ -147,8 +131,7 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakCRCR() {
|
||||
let csv = "qwe,asd\r\rzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 3)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], [""])
|
||||
|
@ -157,125 +140,159 @@ class CSVReaderTests: XCTestCase {
|
|||
|
||||
func testLineBreakCRLFCRLF() {
|
||||
let csv = "qwe,asd\r\n\r\nzxc,rty"
|
||||
let encoding = String.Encoding.utf8
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
let records = parse(csv: csv)
|
||||
XCTAssertEqual(records.count, 3)
|
||||
XCTAssertEqual(records[0], ["qwe", "asd"])
|
||||
XCTAssertEqual(records[1], [""])
|
||||
XCTAssertEqual(records[2], ["zxc", "rty"])
|
||||
}
|
||||
|
||||
func testEncodingWithoutBOM() {
|
||||
var index = 0
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
for encoding in allEncodings() {
|
||||
print("index: \(index)")
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
// func testEncodingWithoutBOM() {
|
||||
// var index = 0
|
||||
// let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
// for encoding in allEncodings() {
|
||||
// print("index: \(index)")
|
||||
// let records = parse(csv: csv, encoding: encoding)
|
||||
// XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
// XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
// index += 1
|
||||
// }
|
||||
// }
|
||||
|
||||
func testUTF8WithBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf8
|
||||
var mutableData = Data()
|
||||
mutableData.append(utf8BOM, count: utf8BOM.count)
|
||||
mutableData.append(csv.data(using: encoding)!)
|
||||
let records = parse(data: mutableData, encoding: encoding)
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF8.self)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF16WithNativeEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf16
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
var mutableData = Data()
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF16.self)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF16WithBigEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf16
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf16BigEndian
|
||||
var mutableData = Data()
|
||||
mutableData.append(utf16BigEndianBOM, count: utf16BigEndianBOM.count)
|
||||
mutableData.append(csv.data(using: String.Encoding.utf16BigEndian)!)
|
||||
let records = parse(data: mutableData, encoding: encoding)
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF16.self, endian: .unknown)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF16WithLittleEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf16
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf16LittleEndian
|
||||
var mutableData = Data()
|
||||
mutableData.append(utf16LittleEndianBOM, count: utf16LittleEndianBOM.count)
|
||||
mutableData.append(csv.data(using: String.Encoding.utf16LittleEndian)!)
|
||||
let records = parse(data: mutableData, encoding: encoding)
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF16.self, endian: .unknown)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF32WithNativeEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf32
|
||||
let records = parse(csv: csv, encoding: encoding)
|
||||
var mutableData = Data()
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF32.self)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF32WithBigEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf32
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf32BigEndian
|
||||
var mutableData = Data()
|
||||
mutableData.append(utf32BigEndianBOM, count: utf32BigEndianBOM.count)
|
||||
mutableData.append(csv.data(using: String.Encoding.utf32BigEndian)!)
|
||||
let records = parse(data: mutableData, encoding: encoding)
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF32.self, endian: .unknown)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func testUTF32WithLittleEndianBOM() {
|
||||
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf32
|
||||
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
|
||||
let encoding = String.Encoding.utf32LittleEndian
|
||||
var mutableData = Data()
|
||||
mutableData.append(utf32LittleEndianBOM, count: utf32LittleEndianBOM.count)
|
||||
mutableData.append(csv.data(using: String.Encoding.utf32LittleEndian)!)
|
||||
let records = parse(data: mutableData, encoding: encoding)
|
||||
mutableData.append(csvString.data(using: encoding)!)
|
||||
let stream = InputStream(data: mutableData)
|
||||
let csv = try! CSV(stream: stream, codecType: UTF32.self, endian: .unknown)
|
||||
let records = getRecords(csv: csv)
|
||||
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
|
||||
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
|
||||
}
|
||||
|
||||
func allEncodings() -> [String.Encoding] {
|
||||
return [
|
||||
// multi-byte character encodings
|
||||
//String.Encoding.shiftJIS,
|
||||
//String.Encoding.japaneseEUC,
|
||||
String.Encoding.utf8,
|
||||
// wide character encodings
|
||||
String.Encoding.utf16BigEndian,
|
||||
String.Encoding.utf16LittleEndian,
|
||||
String.Encoding.utf32BigEndian,
|
||||
String.Encoding.utf32LittleEndian,
|
||||
]
|
||||
}
|
||||
|
||||
func parse(csv: String, encoding: String.Encoding) -> [[String]] {
|
||||
let data = csv.data(using: encoding)!
|
||||
return parse(data: data, encoding: encoding)
|
||||
}
|
||||
|
||||
func parse(data: Data, encoding: String.Encoding) -> [[String]] {
|
||||
let stream = InputStream(data: data)
|
||||
let reader = try! CSV(stream: stream, encoding: encoding)
|
||||
// func allEncodings() -> [String.Encoding] {
|
||||
// return [
|
||||
// // multi-byte character encodings
|
||||
// //String.Encoding.shiftJIS,
|
||||
// //String.Encoding.japaneseEUC,
|
||||
// String.Encoding.utf8,
|
||||
// // wide character encodings
|
||||
// String.Encoding.utf16BigEndian,
|
||||
// String.Encoding.utf16LittleEndian,
|
||||
// String.Encoding.utf32BigEndian,
|
||||
// String.Encoding.utf32LittleEndian,
|
||||
// ]
|
||||
// }
|
||||
|
||||
func parse(csv: String) -> [[String]] {
|
||||
let reader = try! CSV(string: csv)
|
||||
var records = [[String]]()
|
||||
for row in reader {
|
||||
records.append(row)
|
||||
}
|
||||
return records
|
||||
}
|
||||
|
||||
func getRecords(csv: CSV) -> [[String]] {
|
||||
var records = [[String]]()
|
||||
for row in csv {
|
||||
records.append(row)
|
||||
}
|
||||
return records
|
||||
}
|
||||
|
||||
// func parse(csv: String, encoding: String.Encoding) -> [[String]] {
|
||||
// let data = csv.data(using: encoding)!
|
||||
// return parse(data: data, encoding: encoding)
|
||||
// }
|
||||
//
|
||||
// func parse(data: Data, encoding: String.Encoding) -> [[String]] {
|
||||
// let stream = InputStream(data: data)
|
||||
// let reader = try! CSV(stream: stream, encoding: encoding)
|
||||
// var records = [[String]]()
|
||||
// for row in reader {
|
||||
// records.append(row)
|
||||
// }
|
||||
// return records
|
||||
// }
|
||||
|
||||
static var allTests : [(String, (CSVReaderTests) -> () throws -> Void)] {
|
||||
return [
|
||||
|
|
Loading…
Reference in New Issue