Code refactoring

This commit is contained in:
Yasuhiro Hatta 2016-06-22 00:30:17 +09:00
parent db64ab561f
commit 58f9399e57
14 changed files with 315 additions and 495 deletions

32
CSV.xcodeproj/project.pbxproj Normal file → Executable file
View File

@ -7,26 +7,30 @@
objects = {
/* Begin PBXBuildFile section */
0E0F16091D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160A1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160B1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160C1D197D6000C92580 /* AnyIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F16081D197D6000C92580 /* AnyIterator.swift */; };
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */; };
0E7E8C8C1D0BC7BB0057A1C1 /* CSV.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */; };
0E7E8CA01D0BC7F10057A1C1 /* ByteOrder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */; };
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
0E7E8CA31D0BC7F10057A1C1 /* CSVVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */; settings = {ATTRIBUTES = (Public, ); }; };
0E7E8CA91D0BC8050057A1C1 /* CSVReaderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8CA61D0BC8050057A1C1 /* CSVReaderTests.swift */; };
0E7E8CAA1D0BC8050057A1C1 /* CSVTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8CA71D0BC8050057A1C1 /* CSVTests.swift */; };
0E7E8CBD1D0BC9D70057A1C1 /* ByteOrder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */; };
0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
0E7E8CC01D0BC9D70057A1C1 /* CSVVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */; settings = {ATTRIBUTES = (Public, ); }; };
0E7E8CD01D0BCA2A0057A1C1 /* CSV.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0E7E8CC61D0BCA2A0057A1C1 /* CSV.framework */; };
0E7E8CDD1D0BCA840057A1C1 /* CSVReaderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8CA61D0BC8050057A1C1 /* CSVReaderTests.swift */; };
0E7E8CDE1D0BCA840057A1C1 /* CSVTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8CA71D0BC8050057A1C1 /* CSVTests.swift */; };
0E7E8CDF1D0BCA8E0057A1C1 /* ByteOrder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */; };
0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
0E7E8CE21D0BCA8E0057A1C1 /* CSVVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */; settings = {ATTRIBUTES = (Public, ); }; };
0E7E8CF21D0BCD0B0057A1C1 /* CSV.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0E7E8CE81D0BCD0B0057A1C1 /* CSV.framework */; };
0E7E8CFF1D0BCDCF0057A1C1 /* ByteOrder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */; };
0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */; };
0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */; };
0E7E8D021D0BCDCF0057A1C1 /* CSVVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */; settings = {ATTRIBUTES = (Public, ); }; };
@ -78,9 +82,10 @@
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
0E0F16081D197D6000C92580 /* AnyIterator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyIterator.swift; sourceTree = "<group>"; };
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "String.Encoding+endian.swift"; sourceTree = "<group>"; };
0E7E8C811D0BC7BB0057A1C1 /* CSV.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = CSV.framework; sourceTree = BUILT_PRODUCTS_DIR; };
0E7E8C8B1D0BC7BB0057A1C1 /* CSVTests-iOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "CSVTests-iOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ByteOrder.swift; sourceTree = "<group>"; };
0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CSV.swift; sourceTree = "<group>"; };
0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CSVError.swift; sourceTree = "<group>"; };
0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CSVVersion.h; sourceTree = "<group>"; };
@ -182,14 +187,15 @@
0E7E8C9B1D0BC7F10057A1C1 /* Sources */ = {
isa = PBXGroup;
children = (
0E7E8C9C1D0BC7F10057A1C1 /* ByteOrder.swift */,
0E0F16081D197D6000C92580 /* AnyIterator.swift */,
0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */,
0E7E8C9D1D0BC7F10057A1C1 /* CSV.swift */,
0E9317D31D0DB2F200AC20A0 /* CSV+init.swift */,
0E9317D81D0DB30800AC20A0 /* CSV+subscript.swift */,
0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */,
0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */,
0E7E8CAC1D0BC8610057A1C1 /* Info.plist */,
0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */,
0E0F160D1D197DB800C92580 /* String.Encoding+endian.swift */,
0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */,
);
path = Sources;
@ -492,11 +498,12 @@
buildActionMask = 2147483647;
files = (
0E9317D51D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0E0F160A1D197D6000C92580 /* AnyIterator.swift in Sources */,
0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DA1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */,
0E0F160F1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */,
0E7E8CA01D0BC7F10057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -516,11 +523,12 @@
buildActionMask = 2147483647;
files = (
0E9317D71D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0E0F160C1D197D6000C92580 /* AnyIterator.swift in Sources */,
0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DC1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */,
0E0F16111D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */,
0E7E8CBD1D0BC9D70057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -530,11 +538,12 @@
buildActionMask = 2147483647;
files = (
0E9317D41D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0E0F16091D197D6000C92580 /* AnyIterator.swift in Sources */,
0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317D91D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */,
0E0F160E1D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */,
0E7E8CDF1D0BCA8E0057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -554,11 +563,12 @@
buildActionMask = 2147483647;
files = (
0E9317D61D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0E0F160B1D197D6000C92580 /* AnyIterator.swift in Sources */,
0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DB1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */,
0E0F16101D197DB800C92580 /* String.Encoding+endian.swift in Sources */,
0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */,
0E7E8CFF1D0BCDCF0057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;

23
Sources/AnyIterator.swift Normal file
View File

@ -0,0 +1,23 @@
//
// AnyIterator.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/21.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
struct AnyIterator<T>: IteratorProtocol {
private var _base_next: (() -> T?)
init<U: IteratorProtocol where U.Element == T>(base: inout U) {
_base_next = { base.next() }
}
mutating func next() -> T? {
return _base_next()
}
}

131
Sources/BinaryReader.swift Normal file → Executable file
View File

@ -8,45 +8,90 @@
import Foundation
class BinaryReader {
enum Endian {
case big
case little
internal func readBOM(buffer: UnsafePointer<UInt8>, length: Int) -> (String.Encoding, Int)? {
if length >= 4 {
if memcmp(buffer, utf32BigEndianBOM, 4) == 0 {
return (String.Encoding.utf32BigEndian, 4)
}
if memcmp(buffer, utf32LittleEndianBOM, 4) == 0 {
return (String.Encoding.utf32LittleEndian, 4)
}
}
if length >= 3 {
if memcmp(buffer, utf8BOM, 3) == 0 {
return (String.Encoding.utf8, 3)
}
}
if length >= 2 {
if memcmp(buffer, utf16BigEndianBOM, 2) == 0 {
return (String.Encoding.utf16BigEndian, 2)
}
if memcmp(buffer, utf16LittleEndianBOM, 2) == 0 {
return (String.Encoding.utf16LittleEndian, 2)
}
}
return nil
}
internal class BinaryReader {
private let stream: InputStream
private let encoding: String.Encoding
private let closeOnDeinit: Bool
private var buffer = [UInt8].init(repeating: 0, count: 4)
private let bufferSize = 4
private var bufferOffset = 0
let stream: InputStream
let endian: Endian
let closeOnDeinit: Bool
init(stream: InputStream, endian: Endian = .big, closeOnDeinit: Bool = true) {
self.stream = stream
self.endian = endian
self.closeOnDeinit = closeOnDeinit
init(stream: InputStream, encoding: String.Encoding = .utf8, closeOnDeinit: Bool = true) {
var encoding = encoding
if stream.streamStatus == .notOpen {
stream.open()
}
let readCount = stream.read(&buffer, maxLength: bufferSize)
if let (e, l) = readBOM(buffer: &buffer, length: readCount) {
encoding = e
bufferOffset = l
}
self.stream = stream
self.encoding = encoding
self.closeOnDeinit = closeOnDeinit
}
deinit {
if closeOnDeinit && stream.streamStatus == .open {
if closeOnDeinit && stream.streamStatus != .closed {
stream.close()
}
}
private func readStream(_ buffer: UnsafeMutablePointer<UInt8>, maxLength: Int) -> Int {
var i = 0
while bufferOffset < bufferSize {
buffer[i] = self.buffer[bufferOffset]
i += 1
bufferOffset += 1
if i >= maxLength {
return i
}
}
return stream.read(buffer + i, maxLength: maxLength - i)
}
func readUInt8() throws -> UInt8 {
// if stream.streamStatus == .Closed {
// // ObjectDisposedException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
// if stream.streamStatus == .AtEnd {
// // EndOfStreamException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
// if stream.streamStatus == .Closed {
// // ObjectDisposedException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
// if stream.streamStatus == .AtEnd {
// // EndOfStreamException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
let bufferSize = 1
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
let length = readStream(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
@ -61,7 +106,7 @@ class BinaryReader {
func readUInt16() throws -> UInt16 {
let bufferSize = 2
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
let length = readStream(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
@ -71,18 +116,20 @@ class BinaryReader {
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt16>(buffer)
switch endian {
switch encoding.endian {
case .big:
return CFSwapInt16BigToHost(tmp[0])
case .little:
return CFSwapInt16LittleToHost(tmp[0])
default:
throw NSError(domain: "", code: 0, userInfo: nil)
}
}
func readUInt32() throws -> UInt32 {
let bufferSize = 4
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
let length = readStream(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
@ -92,11 +139,13 @@ class BinaryReader {
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt32>(buffer)
switch endian {
switch encoding.endian {
case .big:
return CFSwapInt32BigToHost(tmp[0])
case .little:
return CFSwapInt32LittleToHost(tmp[0])
default:
throw NSError(domain: "", code: 0, userInfo: nil)
}
}
@ -108,7 +157,7 @@ extension BinaryReader {
let reader: BinaryReader
private init(reader: BinaryReader) {
init(reader: BinaryReader) {
self.reader = reader
}
@ -126,11 +175,11 @@ extension BinaryReader {
extension BinaryReader {
struct UInt16Iterator: IteratorProtocol {
struct UInt16Iterator: Sequence, IteratorProtocol {
let reader: BinaryReader
private init(reader: BinaryReader) {
init(reader: BinaryReader) {
self.reader = reader
}
@ -145,3 +194,25 @@ extension BinaryReader {
}
}
extension BinaryReader {
struct UInt32Iterator: Sequence, IteratorProtocol {
let reader: BinaryReader
init(reader: BinaryReader) {
self.reader = reader
}
mutating func next() -> UInt32? {
return try? reader.readUInt32()
}
}
func makeUInt32Iterator() -> UInt32Iterator {
return UInt32Iterator(reader: self)
}
}

View File

@ -1,37 +0,0 @@
//
// ByteOrder.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/11.
//
//
import CoreFoundation
internal func ReadBigInt16(base: UnsafePointer<Void>, byteOffset: Int) -> UInt16 {
let bytes = UnsafePointer<UInt8>(base).advanced(by: byteOffset)
let int16Array = UnsafePointer<UInt16>(bytes)
return CFSwapInt16BigToHost(int16Array[0])
}
internal func ReadBigInt32(base: UnsafePointer<Void>, byteOffset: Int) -> UInt32 {
let bytes = UnsafePointer<UInt8>(base).advanced(by: byteOffset)
let int32Array = UnsafePointer<UInt32>(bytes)
return CFSwapInt32BigToHost(int32Array[0])
}
internal func ReadLittleInt16(base: UnsafePointer<Void>, byteOffset: Int) -> UInt16 {
let bytes = UnsafePointer<UInt8>(base).advanced(by: byteOffset)
let int16Array = UnsafePointer<UInt16>(bytes)
return CFSwapInt16LittleToHost(int16Array[0])
}
internal func ReadLittleInt32(base: UnsafePointer<Void>, byteOffset: Int) -> UInt32 {
let bytes = UnsafePointer<UInt8>(base).advanced(by: byteOffset)
let int32Array = UnsafePointer<UInt32>(bytes)
return CFSwapInt32LittleToHost(int32Array[0])
}
internal func IsBigEndian() -> Bool {
return CFByteOrderGetCurrent() == CFByteOrder(CFByteOrderBigEndian.rawValue)
}

51
Sources/CSV+init.swift Normal file → Executable file
View File

@ -9,62 +9,41 @@
import Foundation
extension CSV {
public convenience init(
public init(
path: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
guard let stream = InputStream(fileAtPath: path) else {
throw CSVError.StreamError
}
try self.init(
stream: stream,
hasHeaderRow: hasHeaderRow,
encoding: encoding,
delimiter: delimiter,
bufferSize: bufferSize)
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
public convenience init(
public init(
url: URL,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
guard let stream = InputStream(url: url) else {
throw CSVError.StreamError
}
try self.init(
stream: stream,
hasHeaderRow: hasHeaderRow,
encoding: encoding,
delimiter: delimiter,
bufferSize: bufferSize)
try self.init(stream: stream, encoding: encoding, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
public convenience init(
public init(
string: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
let encoding = defaultEncoding
guard let data = string.data(using: encoding) else {
throw CSVError.StringEncodingMismatch
}
try self.init(
stream: InputStream(data: data),
hasHeaderRow: hasHeaderRow,
encoding: encoding,
delimiter: delimiter,
bufferSize: bufferSize)
var iterator = string.unicodeScalars.makeIterator()
try self.init(iterator: &iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
}
}

5
Sources/CSV+subscript.swift Normal file → Executable file
View File

@ -12,15 +12,12 @@ extension CSV {
public subscript(key: String) -> String? {
get {
guard let headerRow = headerRow else {
guard let headerRow = headerRow, currentRow = currentRow else {
return nil
}
guard let index = headerRow.index(of: key) else {
return nil
}
guard let currentRow = currentRow else {
return nil
}
if index >= currentRow.count {
return nil
}

396
Sources/CSV.swift Normal file → Executable file
View File

@ -3,19 +3,18 @@
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/11.
//
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
private let LF = UnicodeScalar(UInt32(0x0a)) //'\n'
private let CR = UnicodeScalar(UInt32(0x0d)) //'\r'
private let DQUOTE = UnicodeScalar(UInt32(0x22)) //'"'
private let LF = "\n".unicodeScalars.first!
private let CR = "\r".unicodeScalars.first!
private let DQUOTE = "\"".unicodeScalars.first!
internal let defaultHasHeaderRow = false
internal let defaultEncoding: String.Encoding = .utf8
internal let defaultDelimiter = UnicodeScalar(UInt32(0x2c)) //','
internal let defaultBufferSize = 8192
internal let defaultHasHeaderRow = false
internal let defaultDelimiter = ",".unicodeScalars.first!
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
internal let utf16BigEndianBOM: [UInt8] = [0xfe, 0xff]
@ -23,22 +22,13 @@ internal let utf16LittleEndianBOM: [UInt8] = [0xff, 0xfe]
internal let utf32BigEndianBOM: [UInt8] = [0x00, 0x00, 0xfe, 0xff]
internal let utf32LittleEndianBOM: [UInt8] = [0xff, 0xfe, 0x00, 0x00]
public class CSV: Sequence, IteratorProtocol {
public struct CSV: IteratorProtocol, Sequence {
internal let stream: InputStream
internal let encoding: String.Encoding
internal let delimiter: UnicodeScalar
internal let bufferSize: Int
internal var buffer: UnsafeMutablePointer<UInt8>!
internal var bufferOffset: Int
internal var lastReadCount: Int
internal let charWidth: Int
internal var fieldBuffer: Data
internal var closed: Bool = false
private var iterator: AnyIterator<UnicodeScalar>
private var back: UnicodeScalar? = nil
private var innerStream: InputStream? = nil
private let delimiter: UnicodeScalar
internal var currentRow: [String]? = nil
@ -48,342 +38,94 @@ public class CSV: Sequence, IteratorProtocol {
public var headerRow: [String]? { return _headerRow }
private var _headerRow: [String]? = nil
/**
The value is set when an error occurs.
*/
public private(set) var lastError: CSVError? = nil
internal init<T: IteratorProtocol where T.Element == UnicodeScalar>(
iterator: inout T,
hasHeaderRow: Bool,
delimiter: UnicodeScalar)
throws
{
self.iterator = AnyIterator(base: &iterator)
self.delimiter = delimiter
if hasHeaderRow {
guard let headerRow = next() else {
throw CSVError.HeaderReadError
}
_headerRow = headerRow
}
}
/**
Create CSV instance with `NSInputStream`.
- parameter stream: An `NSInputStream` object. If the stream is not open, initializer opens automatically.
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
- parameter encoding: The character encoding for `stream`. Default: `NSUTF8StringEncoding`.
- parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
- parameter delimiter: Default: `","`.
- parameter bufferSize: Size in bytes to be read at a time from the stream. Default: `8192`.
*/
public init(
stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
self.stream = stream
var bs = bufferSize
if bs < 0 {
throw CSVError.ParameterError
}
if bs < 8 {
bs = 8
}
let mod = bs % 4
if mod != 0 {
bs += 4 - mod
}
self.bufferSize = bs
self.delimiter = UnicodeScalar(UInt32(delimiter))
let b = malloc(bufferSize)
if b == nil {
throw CSVError.MemoryAllocationFailed
}
self.buffer = UnsafeMutablePointer<UInt8>(b)
self.bufferOffset = 0
self.fieldBuffer = Data()
if stream.streamStatus == .notOpen {
stream.open()
}
if stream.streamStatus != .open {
throw CSVError.StreamError
}
self.lastReadCount = stream.read(self.buffer, maxLength: bufferSize)
var e = encoding
let reader = BinaryReader(stream: stream, encoding: encoding, closeOnDeinit: true)
switch encoding {
case String.Encoding.utf16,
String.Encoding.utf16BigEndian,
String.Encoding.utf16LittleEndian:
charWidth = 2
if encoding == .utf16 {
let nativeEndian: String.Encoding = IsBigEndian()
? .utf16BigEndian
: .utf16LittleEndian
e = nativeEndian
if lastReadCount >= charWidth {
if memcmp(buffer, utf16BigEndianBOM, charWidth) == 0 {
e = .utf16BigEndian
self.bufferOffset += charWidth
}
else if memcmp(buffer, utf16LittleEndianBOM, charWidth) == 0 {
e = .utf16LittleEndian
self.bufferOffset += charWidth
}
}
}
case String.Encoding.utf32,
String.Encoding.utf32BigEndian,
String.Encoding.utf32LittleEndian:
var iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncoding: UTF32.self)
try self.init(iterator: &iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
charWidth = 4
if encoding == .utf32 {
let nativeEndian: String.Encoding = IsBigEndian()
? .utf32BigEndian
: .utf32LittleEndian
e = nativeEndian
if lastReadCount >= charWidth {
if memcmp(buffer, utf32BigEndianBOM, charWidth) == 0 {
e = .utf32BigEndian
self.bufferOffset += charWidth
}
else if memcmp(buffer, utf32LittleEndianBOM, charWidth) == 0 {
e = .utf32LittleEndian
self.bufferOffset += charWidth
}
}
}
case String.Encoding.utf16,
String.Encoding.utf16BigEndian,
String.Encoding.utf16LittleEndian:
var iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncoding: UTF16.self)
try self.init(iterator: &iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
case String.Encoding.utf8,
String.Encoding.ascii:
var iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncoding: UTF8.self)
try self.init(iterator: &iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
default:
charWidth = 1
if encoding == .utf8 {
let bomSize = 3
if lastReadCount >= bomSize {
if memcmp(buffer, utf8BOM, charWidth) == 0 {
self.bufferOffset += bomSize
}
}
}
throw CSVError.StringEncodingMismatch
}
self.encoding = e
if hasHeaderRow {
guard let nextRow = next() else {
throw CSVError.HeaderReadError
}
_headerRow = nextRow
currentRow = nil
}
}
deinit {
close()
innerStream = stream
}
/**
Close stream.
*/
public func close() {
if !closed {
stream.close()
if buffer != nil {
free(buffer)
buffer = nil
}
closed = true
}
}
// MARK: GeneratorType
// public mutating func close() {
// if !closed {
// if let stream = innerStream {
// stream.close()
// }
// closed = true
// }
// }
public func next() -> [String]? {
fieldBuffer.count = 0
currentRow = nil
if closed {
return nil
}
if lastReadCount <= 0 {
return nil
}
// MARK: IteratorProtocol
var fields = [String]()
var fieldStart = bufferOffset
var charLength = 0
var escaping = false
var quotationCount = 0
var prev = UnicodeScalar(0)
while true {
if bufferOffset >= lastReadCount {
if charLength > 0 {
fieldBuffer.append(buffer + fieldStart, count: charWidth * charLength)
}
bufferOffset = 0
fieldStart = 0
charLength = 0
lastReadCount = stream.read(buffer, maxLength: bufferSize)
if lastReadCount < 0 {
// bad end
lastError = CSVError.StreamError
return nil
}
if lastReadCount == 0 {
// true end
break
}
}
var c = UnicodeScalar(0)
switch encoding {
case String.Encoding.utf16BigEndian:
let _c = ReadBigInt16(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf16LittleEndian:
let _c = ReadLittleInt16(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf32BigEndian:
let _c = ReadBigInt32(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf32LittleEndian:
let _c = ReadLittleInt32(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
default: // multi-byte character encodings
let _c = (buffer + bufferOffset)[0]
c = UnicodeScalar(UInt32(_c))
}
if c == DQUOTE {
quotationCount += 1
}
if c == DQUOTE && charLength == 0 {
escaping = true
}
if escaping && prev == DQUOTE && (c == delimiter || c == CR || c == LF) && (quotationCount % 2 == 0) {
escaping = false
}
if !escaping && prev == CR && c != LF {
fieldBuffer.append(buffer + fieldStart, count: charWidth * charLength)
break
}
prev = c
bufferOffset += charWidth
if !escaping {
if c == CR {
continue
}
if c == LF {
fieldBuffer.append(buffer + fieldStart, count: charWidth * charLength)
break
}
}
//
if !escaping && c == delimiter {
fieldBuffer.append(buffer + fieldStart, count: charWidth * charLength)
guard let field = getField(quotationCount: quotationCount) else {
return nil
}
fields.append(field)
// reset
fieldBuffer.count = 0
quotationCount = 0
charLength = 0
fieldStart = bufferOffset
}
else {
charLength += 1
}
}
guard let field = getField(quotationCount: quotationCount) else {
return nil
}
//
if isBufferEOF && fields.count == 0 && field.isEmpty {
return nil
}
fields.append(field)
currentRow = fields
return fields
}
// MARK: Utility
private var isBufferEOF: Bool {
if stream.hasBytesAvailable {
return false
}
return bufferOffset >= (lastReadCount - 1)
}
private func getField(quotationCount: Int) -> String? {
guard var field = String(data: fieldBuffer, encoding: encoding) else {
lastError = CSVError.StringEncodingMismatch
return nil
}
if quotationCount >= 2
&& field.hasPrefix("\"")
&& field.hasSuffix("\"")
{
let start = field.index(field.startIndex, offsetBy: 1)
let end = field.index(field.endIndex, offsetBy: -1)
field = field[start..<end]
}
if quotationCount >= 4 {
field = field.replacingOccurrences(of: "\"\"", with: "\"")
}
return field
}
}
public struct CSVState<T: IteratorProtocol where T.Element == UnicodeScalar>: IteratorProtocol {
private var it: T
private let delimiter: UnicodeScalar
private var back: T.Element? = nil
public init(it: inout T, delimiter: UnicodeScalar) {
self.it = it
self.delimiter = delimiter
}
public mutating func next() -> [String]? {
return readRow()
}
mutating func moveNext() -> T.Element? {
internal mutating func moveNext() -> UnicodeScalar? {
if back != nil {
defer { back = nil }
return back
}
return it.next()
return iterator.next()
}
mutating func readRow() -> [String]? {
internal mutating func readRow() -> [String]? {
currentRow = nil
var next = moveNext()
if next == nil {
return nil
@ -409,21 +151,25 @@ public struct CSVState<T: IteratorProtocol where T.Element == UnicodeScalar>: It
}
next = moveNext()
}
currentRow = row
return row
}
mutating func readField(quoted: Bool) -> (String, Bool) {
internal mutating func readField(quoted: Bool) -> (String, Bool) {
var next = moveNext()
var field = ""
//var end = false
while let c = next {
if quoted {
switch c {
case DQUOTE:
let n = moveNext()
if n == DQUOTE {
if n == nil {
// END ROW
return (field, true)
}
else if n == DQUOTE {
// ESC
field.append(c)
}

View File

@ -3,7 +3,7 @@
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/11.
//
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation

2
Sources/CSVVersion.h Normal file → Executable file
View File

@ -3,7 +3,7 @@
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/11.
//
// Copyright © 2016 yaslab. All rights reserved.
//
@import Foundation;

View File

@ -0,0 +1,31 @@
//
// String.Encoding+endian.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/21.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
enum Endian {
case big
case little
case unknown
}
extension String.Encoding {
var endian: Endian {
switch self {
case String.Encoding.utf16: return .big
case String.Encoding.utf16BigEndian: return .big
case String.Encoding.utf16LittleEndian: return .little
case String.Encoding.utf32: return .big
case String.Encoding.utf32BigEndian: return .big
case String.Encoding.utf32LittleEndian: return .little
default: return .unknown
}
}
}

0
Sources/UnicodeIterator.swift Normal file → Executable file
View File

View File

@ -251,8 +251,8 @@ class CSVReaderTests: XCTestCase {
func allEncodings() -> [String.Encoding] {
return [
// multi-byte character encodings
String.Encoding.shiftJIS,
String.Encoding.japaneseEUC,
//String.Encoding.shiftJIS,
//String.Encoding.japaneseEUC,
String.Encoding.utf8,
// wide character encodings
String.Encoding.utf16BigEndian,

94
Tests/CSV/CSVTests.swift Normal file → Executable file
View File

@ -97,53 +97,53 @@ class CSVTests: XCTestCase {
XCTAssertEqual(i, 3)
}
func testBufferSizeMod0() {
let csvString = "0,1,2,3,4,5,6,7,8,9\n"
let csv = try! CSV(string: csvString, bufferSize: 12)
XCTAssertEqual(csv.bufferSize, 12)
}
func testBufferSizeMod1() {
let csvString = "0,1,2,3,4,5,6,7,8,9\n"
let csv = try! CSV(string: csvString, bufferSize: 13)
XCTAssertEqual(csv.bufferSize, 16)
}
func testBufferSizeMod2() {
let csvString = "0,1,2,3,4,5,6,7,8,9\n"
let csv = try! CSV(string: csvString, bufferSize: 14)
XCTAssertEqual(csv.bufferSize, 16)
}
func testBufferSizeMod3() {
let csvString = "0,1,2,3,4,5,6,7,8,9\n"
let csv = try! CSV(string: csvString, bufferSize: 15)
XCTAssertEqual(csv.bufferSize, 16)
}
func testBufferSizeMod4() {
let csvString = "0,1,2,3,4,5,6,7,8,9\n"
let csv = try! CSV(string: csvString, bufferSize: 16)
XCTAssertEqual(csv.bufferSize, 16)
}
func testBigDataAndSmallBufferSize() {
let line = "0,1,2,3,4,5,6,7,8,9\n"
var csv = ""
for _ in 0..<10000 {
csv += line
}
var i = 0
for row in try! CSV(string: csv, bufferSize: 10) {
XCTAssertEqual(row, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
i += 1
}
XCTAssertEqual(i, 10000)
}
// func testBufferSizeMod0() {
// let csvString = "0,1,2,3,4,5,6,7,8,9\n"
// let csv = try! CSV(string: csvString, bufferSize: 12)
// XCTAssertEqual(csv.bufferSize, 12)
// }
//
// func testBufferSizeMod1() {
// let csvString = "0,1,2,3,4,5,6,7,8,9\n"
// let csv = try! CSV(string: csvString, bufferSize: 13)
// XCTAssertEqual(csv.bufferSize, 16)
// }
//
// func testBufferSizeMod2() {
// let csvString = "0,1,2,3,4,5,6,7,8,9\n"
// let csv = try! CSV(string: csvString, bufferSize: 14)
// XCTAssertEqual(csv.bufferSize, 16)
// }
//
// func testBufferSizeMod3() {
// let csvString = "0,1,2,3,4,5,6,7,8,9\n"
// let csv = try! CSV(string: csvString, bufferSize: 15)
// XCTAssertEqual(csv.bufferSize, 16)
// }
//
// func testBufferSizeMod4() {
// let csvString = "0,1,2,3,4,5,6,7,8,9\n"
// let csv = try! CSV(string: csvString, bufferSize: 16)
// XCTAssertEqual(csv.bufferSize, 16)
// }
//
// func testBigDataAndSmallBufferSize() {
// let line = "0,1,2,3,4,5,6,7,8,9\n"
// var csv = ""
// for _ in 0..<10000 {
// csv += line
// }
// var i = 0
// for row in try! CSV(string: csv, bufferSize: 10) {
// XCTAssertEqual(row, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
// i += 1
// }
// XCTAssertEqual(i, 10000)
// }
func testSubscript() {
let csvString = "id,name\n001,hoge\n002,fuga"
let csv = try! CSV(string: csvString, hasHeaderRow: true)
var csv = try! CSV(string: csvString, hasHeaderRow: true)
var i = 0
while csv.next() != nil {
switch i {
@ -163,11 +163,11 @@ class CSVTests: XCTestCase {
func testCSVState1() {
var it = "あ,い1,\"\",えお\n,,x,".unicodeScalars.makeIterator()
var state = CSVState(it: &it, delimiter: defaultDelimiter)
var csv = try! CSV(iterator: &it, hasHeaderRow: defaultHasHeaderRow, delimiter: defaultDelimiter)
var rows = [[String]]()
while let row = state.next() {
while let row = csv.next() {
rows.append(row)
}
XCTAssertEqual(rows.count, 2)

2
Tests/CSV/ReadmeTests.swift Normal file → Executable file
View File

@ -41,7 +41,7 @@ class ReadmeTests: XCTestCase {
}
func testGetTheFieldValueUsingSubscript() {
let csv = try! CSV(
var csv = try! CSV(
string: "id,name\n1,foo",
hasHeaderRow: true) // It must be true.