Use Swift built-in Unicode Codecs

This commit is contained in:
Yasuhiro Hatta 2016-06-26 02:35:13 +09:00
parent 4063a440e5
commit 29d0f2b091
8 changed files with 197 additions and 216 deletions

View File

@ -11,10 +11,10 @@
0E0F160A1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160B1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160C1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F160E1D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
0E0F160F1D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
0E0F16101D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
0E0F16111D197DB800C92580 /* Endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* Endian.swift */; };
0E7E8C8C1D0BC7BB0057A1C1 /* CSV.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */; };
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
@ -83,7 +83,7 @@
/* Begin PBXFileReference section */
0E0F16081D197D6000C92580 /* AnyIterator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyIterator.swift; sourceTree = "<group>"; };
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "String.Encoding+endian.swift"; sourceTree = "<group>"; };
0E0F160D1D197DB800C92580 /* Endian.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Endian.swift; sourceTree = "<group>"; };
0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = CSV.framework; sourceTree = BUILT_PRODUCTS_DIR; };
0E7E8C8B1D0BC7BB0057A1C1 /* CSVTests-iOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "CSVTests-iOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CSV.swift; sourceTree = "<group>"; };
@ -194,8 +194,8 @@
0E9317D81D0DB30800AC20A0 /* CSV+subscript.swift */,
0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */,
0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */,
0E0F160D1D197DB800C92580 /* Endian.swift */,
0E7E8CAC1D0BC8610057A1C1 /* Info.plist */,
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */,
0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */,
);
path = Sources;
@ -502,7 +502,7 @@
0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DA1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */,
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E0F160F1D197DB800C92580 /* Endian.swift in Sources */,
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */,
0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
@ -527,7 +527,7 @@
0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DC1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */,
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E0F16111D197DB800C92580 /* Endian.swift in Sources */,
0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */,
0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
@ -542,7 +542,7 @@
0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317D91D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */,
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E0F160E1D197DB800C92580 /* Endian.swift in Sources */,
0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */,
0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
@ -567,7 +567,7 @@
0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DB1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */,
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E0F16101D197DB800C92580 /* Endian.swift in Sources */,
0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */,
0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */,
);

View File

@ -8,26 +8,32 @@
import Foundation
internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (String.Encoding, Int)? {
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
internal let utf16BigEndianBOM: [UInt8] = [0xfe, 0xff]
internal let utf16LittleEndianBOM: [UInt8] = [0xff, 0xfe]
internal let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xfe, 0xff]
internal let utf32LittleEndianBOM: [UInt8] = [0xff, 0xfe, 0x00, 0x00]
internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (Endian, Int)? {
if length >= 4 {
if memcmp(buffer, utf32BigEndianBOM, 4) == 0 {
return (String.Encoding.utf32BigEndian, 4)
return (.big, 4)
}
if memcmp(buffer, utf32LittleEndianBOM, 4) == 0 {
return (String.Encoding.utf32LittleEndian, 4)
return (.little, 4)
}
}
if length >= 3 {
if memcmp(buffer, utf8BOM, 3) == 0 {
return (String.Encoding.utf8, 3)
return (.unknown, 3)
}
}
if length >= 2 {
if memcmp(buffer, utf16BigEndianBOM, 2) == 0 {
return (String.Encoding.utf16BigEndian, 2)
return (.big, 2)
}
if memcmp(buffer, utf16LittleEndianBOM, 2) == 0 {
return (String.Encoding.utf16LittleEndian, 2)
return (.little, 2)
}
}
return nil
@ -36,15 +42,15 @@ internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (String.Enco
internal class BinaryReader {
private let stream: InputStream
private let encoding: String.Encoding
private let endian: Endian
private let closeOnDeinit: Bool
private var buffer = [UInt8].init(repeating: 0, count: 4)
private let bufferSize = 4
private var bufferOffset = 0
internal init(stream: InputStream, encoding: String.Encoding = .utf8, closeOnDeinit: Bool = true) {
var encoding = encoding
internal init(stream: InputStream, endian: Endian = .unknown, closeOnDeinit: Bool = true) {
var endian = endian
if stream.streamStatus == .notOpen {
stream.open()
@ -52,12 +58,12 @@ internal class BinaryReader {
let readCount = stream.read(&buffer, maxLength: bufferSize)
if let (e, l) = readBOM(buffer: &buffer, length: readCount) {
encoding = e
endian = e
bufferOffset = l
}
self.stream = stream
self.encoding = encoding
self.endian = endian
self.closeOnDeinit = closeOnDeinit
}
@ -116,7 +122,7 @@ internal class BinaryReader {
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt16>(buffer)
switch encoding.endian {
switch endian {
case .big:
return CFSwapInt16BigToHost(tmp[0])
case .little:
@ -139,7 +145,7 @@ internal class BinaryReader {
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt32>(buffer)
switch encoding.endian {
switch endian {
case .big:
return CFSwapInt32BigToHost(tmp[0])
case .little:

View File

@ -11,31 +11,18 @@ import Foundation
extension CSV {
public init(
fileAtPath path: String,
encoding: String.Encoding = defaultEncoding,
stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
guard let stream = InputStream(fileAtPath: path) else {
throw CSVError.StreamError
}
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
public init(
url: URL,
encoding: String.Encoding = defaultEncoding,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
guard let stream = InputStream(url: url) else {
throw CSVError.StreamError
}
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
}
extension CSV {
public init(
string: String,
hasHeaderRow: Bool = defaultHasHeaderRow,

View File

@ -12,16 +12,9 @@ private let LF = "\n".unicodeScalars.first!
private let CR = "\r".unicodeScalars.first!
private let DQUOTE = "\"".unicodeScalars.first!
internal let defaultEncoding: String.Encoding = .utf8
internal let defaultHasHeaderRow = false
internal let defaultDelimiter = ",".unicodeScalars.first!
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
internal let utf16BigEndianBOM: [UInt8] = [0xfe, 0xff]
internal let utf16LittleEndianBOM: [UInt8] = [0xff, 0xfe]
internal let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xfe, 0xff]
internal let utf32LittleEndianBOM: [UInt8] = [0xff, 0xfe, 0x00, 0x00]
public struct CSV: IteratorProtocol, Sequence {
private var iterator: AnyIterator<UnicodeScalar>
@ -37,6 +30,14 @@ public struct CSV: IteratorProtocol, Sequence {
public var headerRow: [String]? { return _headerRow }
private var _headerRow: [String]? = nil
/**
Create CSV instance with `NSInputStream`.
- parameter stream: An `NSInputStream` object. If the stream is not open, initializer opens automatically.
- parameter encoding: The character encoding for `stream`. Default: `NSUTF8StringEncoding`.
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
- parameter delimiter: Default: `","`.
*/
internal init<T: IteratorProtocol where T.Element == UnicodeScalar>(
iterator: T,
hasHeaderRow: Bool,
@ -48,12 +49,12 @@ public struct CSV: IteratorProtocol, Sequence {
if hasHeaderRow {
guard let headerRow = next() else {
throw CSVError.HeaderReadError
throw CSVError.headerReadError
}
_headerRow = headerRow
}
}
public init<T: UnicodeCodec where T.CodeUnit == UInt8>(
stream: InputStream,
codecType: T.Type,
@ -61,51 +62,37 @@ public struct CSV: IteratorProtocol, Sequence {
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
let reader = BinaryReader(stream: stream, encoding: .utf8, closeOnDeinit: true)
let reader = BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncoding: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
/**
Create CSV instance with `NSInputStream`.
- parameter stream: An `NSInputStream` object. If the stream is not open, initializer opens automatically.
- parameter encoding: The character encoding for `stream`. Default: `NSUTF8StringEncoding`.
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
- parameter delimiter: Default: `","`.
*/
public init(
public init<T: UnicodeCodec where T.CodeUnit == UInt16>(
stream: InputStream,
encoding: String.Encoding = defaultEncoding,
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
let reader = BinaryReader(stream: stream, encoding: encoding, closeOnDeinit: true)
switch encoding {
case String.Encoding.utf32,
String.Encoding.utf32BigEndian,
String.Encoding.utf32LittleEndian:
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncoding: UTF32.self)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
case String.Encoding.utf16,
String.Encoding.utf16BigEndian,
String.Encoding.utf16LittleEndian:
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncoding: UTF16.self)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
case String.Encoding.utf8,
String.Encoding.ascii:
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncoding: UTF8.self)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
default:
throw CSVError.StringEncodingMismatch
}
let reader = BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncoding: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
public init<T: UnicodeCodec where T.CodeUnit == UInt32>(
stream: InputStream,
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
let reader = BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncoding: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
// MARK: IteratorProtocol
/// Advances and returns the next element of the underlying sequence, or

View File

@ -9,9 +9,9 @@
import Foundation
public enum CSVError: ErrorProtocol {
case ParameterError
case StreamError
case HeaderReadError
case MemoryAllocationFailed
case StringEncodingMismatch
// case parameterError
// case streamError
case headerReadError
// case memoryAllocationFailed
// case stringEncodingMismatch
}

15
Sources/Endian.swift Normal file
View File

@ -0,0 +1,15 @@
//
// Endian.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/21.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
public enum Endian {
case big
case little
case unknown
}

View File

@ -1,31 +0,0 @@
//
// String.Encoding+endian.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/21.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
internal enum Endian {
case big
case little
case unknown
}
extension String.Encoding {
internal var endian: Endian {
switch self {
case String.Encoding.utf16: return .big
case String.Encoding.utf16BigEndian: return .big
case String.Encoding.utf16LittleEndian: return .little
case String.Encoding.utf32: return .big
case String.Encoding.utf32BigEndian: return .big
case String.Encoding.utf32LittleEndian: return .little
default: return .unknown
}
}
}

View File

@ -13,69 +13,60 @@ class CSVReaderTests: XCTestCase {
func test1Line() {
let csv = "abab,cdcd,efef"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
}
func testQuoted() {
let csv = "abab,\"cdcd\",efef"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
}
func testLF() {
let csv = "abab,cdcd,efef\nzxcv,asdf,qwer"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
}
func testCommaInQuotationMarks() {
let csv = "abab,\"cd,cd\",efef"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cd,cd", "efef"])
}
func testCRLF() {
let csv = "abab,cdcd,efef\r\nzxcv,asdf,qwer"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
}
func testEscapedQuotationMark() {
func testEscapedQuotationMark1() {
let csv = "abab,\"\"\"cdcd\",efef\r\nzxcv,asdf,qwer"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "\"cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qwer"])
}
func testQuotationMark2() {
func testEscapedQuotationMark2() {
let csv = "abab,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\""
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er"])
}
func testEmptyField() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testLastCR() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\r"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
@ -83,8 +74,7 @@ class CSVReaderTests: XCTestCase {
func testLastCRLF() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\r\n"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
@ -92,8 +82,7 @@ class CSVReaderTests: XCTestCase {
func testLastLF() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\",\n"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
@ -101,8 +90,7 @@ class CSVReaderTests: XCTestCase {
func testLFInQuotationMarks() {
let csv = "abab,,\"\rcdcd\n\",efef\r\nzxcv,asdf,\"qw\"\"er\",\n"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["abab", "", "\rcdcd\n", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
@ -110,8 +98,7 @@ class CSVReaderTests: XCTestCase {
func testLineBreakLF() {
let csv = "qwe,asd\nzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], ["zxc", "rty"])
@ -119,8 +106,7 @@ class CSVReaderTests: XCTestCase {
func testLineBreakCR() {
let csv = "qwe,asd\rzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], ["zxc", "rty"])
@ -128,8 +114,7 @@ class CSVReaderTests: XCTestCase {
func testLineBreakCRLF() {
let csv = "qwe,asd\r\nzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 2)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], ["zxc", "rty"])
@ -137,8 +122,7 @@ class CSVReaderTests: XCTestCase {
func testLineBreakLFLF() {
let csv = "qwe,asd\n\nzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 3)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], [""])
@ -147,8 +131,7 @@ class CSVReaderTests: XCTestCase {
func testLineBreakCRCR() {
let csv = "qwe,asd\r\rzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 3)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], [""])
@ -157,125 +140,159 @@ class CSVReaderTests: XCTestCase {
func testLineBreakCRLFCRLF() {
let csv = "qwe,asd\r\n\r\nzxc,rty"
let encoding = String.Encoding.utf8
let records = parse(csv: csv, encoding: encoding)
let records = parse(csv: csv)
XCTAssertEqual(records.count, 3)
XCTAssertEqual(records[0], ["qwe", "asd"])
XCTAssertEqual(records[1], [""])
XCTAssertEqual(records[2], ["zxc", "rty"])
}
func testEncodingWithoutBOM() {
var index = 0
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
for encoding in allEncodings() {
print("index: \(index)")
let records = parse(csv: csv, encoding: encoding)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
index += 1
}
}
// func testEncodingWithoutBOM() {
// var index = 0
// let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
// for encoding in allEncodings() {
// print("index: \(index)")
// let records = parse(csv: csv, encoding: encoding)
// XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
// XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
// index += 1
// }
// }
func testUTF8WithBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf8
var mutableData = Data()
mutableData.append(utf8BOM, count: utf8BOM.count)
mutableData.append(csv.data(using: encoding)!)
let records = parse(data: mutableData, encoding: encoding)
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF8.self)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF16WithNativeEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf16
let records = parse(csv: csv, encoding: encoding)
var mutableData = Data()
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF16.self)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF16WithBigEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf16
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf16BigEndian
var mutableData = Data()
mutableData.append(utf16BigEndianBOM, count: utf16BigEndianBOM.count)
mutableData.append(csv.data(using: String.Encoding.utf16BigEndian)!)
let records = parse(data: mutableData, encoding: encoding)
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF16.self, endian: .unknown)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF16WithLittleEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf16
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf16LittleEndian
var mutableData = Data()
mutableData.append(utf16LittleEndianBOM, count: utf16LittleEndianBOM.count)
mutableData.append(csv.data(using: String.Encoding.utf16LittleEndian)!)
let records = parse(data: mutableData, encoding: encoding)
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF16.self, endian: .unknown)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF32WithNativeEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf32
let records = parse(csv: csv, encoding: encoding)
var mutableData = Data()
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF32.self)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF32WithBigEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf32
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf32BigEndian
var mutableData = Data()
mutableData.append(utf32BigEndianBOM, count: utf32BigEndianBOM.count)
mutableData.append(csv.data(using: String.Encoding.utf32BigEndian)!)
let records = parse(data: mutableData, encoding: encoding)
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF32.self, endian: .unknown)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func testUTF32WithLittleEndianBOM() {
let csv = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf32
let csvString = "abab,,cdcd,efef\r\nzxcv,asdf,\"qw\"\"er\","
let encoding = String.Encoding.utf32LittleEndian
var mutableData = Data()
mutableData.append(utf32LittleEndianBOM, count: utf32LittleEndianBOM.count)
mutableData.append(csv.data(using: String.Encoding.utf32LittleEndian)!)
let records = parse(data: mutableData, encoding: encoding)
mutableData.append(csvString.data(using: encoding)!)
let stream = InputStream(data: mutableData)
let csv = try! CSV(stream: stream, codecType: UTF32.self, endian: .unknown)
let records = getRecords(csv: csv)
XCTAssertEqual(records[0], ["abab", "", "cdcd", "efef"])
XCTAssertEqual(records[1], ["zxcv", "asdf", "qw\"er", ""])
}
func allEncodings() -> [String.Encoding] {
return [
// multi-byte character encodings
//String.Encoding.shiftJIS,
//String.Encoding.japaneseEUC,
String.Encoding.utf8,
// wide character encodings
String.Encoding.utf16BigEndian,
String.Encoding.utf16LittleEndian,
String.Encoding.utf32BigEndian,
String.Encoding.utf32LittleEndian,
]
}
func parse(csv: String, encoding: String.Encoding) -> [[String]] {
let data = csv.data(using: encoding)!
return parse(data: data, encoding: encoding)
}
func parse(data: Data, encoding: String.Encoding) -> [[String]] {
let stream = InputStream(data: data)
let reader = try! CSV(stream: stream, encoding: encoding)
// func allEncodings() -> [String.Encoding] {
// return [
// // multi-byte character encodings
// //String.Encoding.shiftJIS,
// //String.Encoding.japaneseEUC,
// String.Encoding.utf8,
// // wide character encodings
// String.Encoding.utf16BigEndian,
// String.Encoding.utf16LittleEndian,
// String.Encoding.utf32BigEndian,
// String.Encoding.utf32LittleEndian,
// ]
// }
func parse(csv: String) -> [[String]] {
let reader = try! CSV(string: csv)
var records = [[String]]()
for row in reader {
records.append(row)
}
return records
}
func getRecords(csv: CSV) -> [[String]] {
var records = [[String]]()
for row in csv {
records.append(row)
}
return records
}
// func parse(csv: String, encoding: String.Encoding) -> [[String]] {
// let data = csv.data(using: encoding)!
// return parse(data: data, encoding: encoding)
// }
//
// func parse(data: Data, encoding: String.Encoding) -> [[String]] {
// let stream = InputStream(data: data)
// let reader = try! CSV(stream: stream, encoding: encoding)
// var records = [[String]]()
// for row in reader {
// records.append(row)
// }
// return records
// }
static var allTests : [(String, (CSVReaderTests) -> () throws -> Void)] {
return [