diff --git a/CSV.xcodeproj/project.pbxproj b/CSV.xcodeproj/project.pbxproj index ddec1b3..f644c8f 100644 --- a/CSV.xcodeproj/project.pbxproj +++ b/CSV.xcodeproj/project.pbxproj @@ -43,6 +43,14 @@ 0E9317DE1D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; }; 0E9317DF1D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; }; 0E9317E01D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; }; + 0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; }; + 0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; }; + 0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; }; + 0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; }; + 0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; }; + 0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; }; + 0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; }; + 0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -88,6 +96,8 @@ 0E9317D31D0DB2F200AC20A0 /* CSV+init.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CSV+init.swift"; sourceTree = ""; }; 0E9317D81D0DB30800AC20A0 /* CSV+subscript.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CSV+subscript.swift"; sourceTree = ""; }; 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReadmeTests.swift; sourceTree = ""; }; + 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BinaryReader.swift; sourceTree = ""; }; + 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UnicodeIterator.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -179,6 +189,8 @@ 0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */, 0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */, 0E7E8CAC1D0BC8610057A1C1 /* Info.plist */, + 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */, + 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */, ); path = Sources; sourceTree = ""; @@ -480,10 +492,12 @@ buildActionMask = 2147483647; files = ( 0E9317D51D0DB2F200AC20A0 /* CSV+init.swift in Sources */, + 0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */, 0E9317DA1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */, 0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */, 0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */, 0E7E8CA01D0BC7F10057A1C1 /* ByteOrder.swift in Sources */, + 0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -502,10 +516,12 @@ buildActionMask = 2147483647; files = ( 0E9317D71D0DB2F200AC20A0 /* CSV+init.swift in Sources */, + 0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */, 0E9317DC1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */, 0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */, 0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */, 0E7E8CBD1D0BC9D70057A1C1 /* ByteOrder.swift in Sources */, + 0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -514,10 +530,12 @@ buildActionMask = 2147483647; files = ( 0E9317D41D0DB2F200AC20A0 /* CSV+init.swift in Sources */, + 0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */, 0E9317D91D0DB30800AC20A0 /* CSV+subscript.swift in Sources */, 0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */, 0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */, 0E7E8CDF1D0BCA8E0057A1C1 /* ByteOrder.swift in Sources */, + 0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -536,10 +554,12 @@ buildActionMask = 2147483647; files = ( 0E9317D61D0DB2F200AC20A0 /* CSV+init.swift in Sources */, + 0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */, 0E9317DB1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */, 0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */, 0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */, 0E7E8CFF1D0BCDCF0057A1C1 /* ByteOrder.swift in Sources */, + 0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/BinaryReader.swift b/Sources/BinaryReader.swift new file mode 100644 index 0000000..92c04dd --- /dev/null +++ b/Sources/BinaryReader.swift @@ -0,0 +1,147 @@ +// +// BinaryReader.swift +// CSV +// +// Created by Yasuhiro Hatta on 2016/06/20. +// Copyright © 2016年 yaslab. All rights reserved. +// + +import Foundation + +class BinaryReader { + + enum Endian { + case big + case little + } + + let stream: InputStream + let endian: Endian + let closeOnDeinit: Bool + + init(stream: InputStream, endian: Endian = .big, closeOnDeinit: Bool = true) { + self.stream = stream + self.endian = endian + self.closeOnDeinit = closeOnDeinit + + if stream.streamStatus == .notOpen { + stream.open() + } + } + + deinit { + if closeOnDeinit && stream.streamStatus == .open { + stream.close() + } + } + + func readUInt8() throws -> UInt8 { + // if stream.streamStatus == .Closed { + // // ObjectDisposedException + // throw NSError(domain: "", code: 0, userInfo: nil) + // } + // if stream.streamStatus == .AtEnd { + // // EndOfStreamException + // throw NSError(domain: "", code: 0, userInfo: nil) + // } + let bufferSize = 1 + var buffer = [UInt8](repeating: 0, count: bufferSize) + let length = stream.read(&buffer, maxLength: bufferSize) + if length < 0 { + // IOException + throw NSError(domain: "", code: 0, userInfo: nil) + } + if length != bufferSize { + // EndOfStreamException + throw NSError(domain: "", code: 0, userInfo: nil) + } + return buffer[0] + } + + func readUInt16() throws -> UInt16 { + let bufferSize = 2 + var buffer = [UInt8](repeating: 0, count: bufferSize) + let length = stream.read(&buffer, maxLength: bufferSize) + if length < 0 { + // IOException + throw NSError(domain: "", code: 0, userInfo: nil) + } + if length != bufferSize { + // EndOfStreamException + throw NSError(domain: "", code: 0, userInfo: nil) + } + let tmp = UnsafeMutablePointer(buffer) + switch endian { + case .big: + return CFSwapInt16BigToHost(tmp[0]) + case .little: + return CFSwapInt16LittleToHost(tmp[0]) + } + } + + func readUInt32() throws -> UInt32 { + let bufferSize = 4 + var buffer = [UInt8](repeating: 0, count: bufferSize) + let length = stream.read(&buffer, maxLength: bufferSize) + if length < 0 { + // IOException + throw NSError(domain: "", code: 0, userInfo: nil) + } + if length != 4 { + // EndOfStreamException + throw NSError(domain: "", code: 0, userInfo: nil) + } + let tmp = UnsafeMutablePointer(buffer) + switch endian { + case .big: + return CFSwapInt32BigToHost(tmp[0]) + case .little: + return CFSwapInt32LittleToHost(tmp[0]) + } + } + +} + +extension BinaryReader { + + struct UInt8Iterator: Sequence, IteratorProtocol { + + let reader: BinaryReader + + private init(reader: BinaryReader) { + self.reader = reader + } + + mutating func next() -> UInt8? { + return try? reader.readUInt8() + } + + } + + func makeUInt8Iterator() -> UInt8Iterator { + return UInt8Iterator(reader: self) + } + +} + +extension BinaryReader { + + struct UInt16Iterator: IteratorProtocol { + + let reader: BinaryReader + + private init(reader: BinaryReader) { + self.reader = reader + } + + mutating func next() -> UInt16? { + return try? reader.readUInt16() + } + + } + + func makeUInt16Iterator() -> UInt16Iterator { + return UInt16Iterator(reader: self) + } + +} diff --git a/Sources/CSV+init.swift b/Sources/CSV+init.swift index 471646e..624e949 100644 --- a/Sources/CSV+init.swift +++ b/Sources/CSV+init.swift @@ -14,7 +14,7 @@ extension CSV { path: String, hasHeaderRow: Bool = defaultHasHeaderRow, encoding: String.Encoding = defaultEncoding, - delimiter: CChar = defaultDelimiter, + delimiter: UnicodeScalar = defaultDelimiter, bufferSize: Int = defaultBufferSize) throws { @@ -33,7 +33,7 @@ extension CSV { url: URL, hasHeaderRow: Bool = defaultHasHeaderRow, encoding: String.Encoding = defaultEncoding, - delimiter: CChar = defaultDelimiter, + delimiter: UnicodeScalar = defaultDelimiter, bufferSize: Int = defaultBufferSize) throws { @@ -51,7 +51,7 @@ extension CSV { public convenience init( string: String, hasHeaderRow: Bool = defaultHasHeaderRow, - delimiter: CChar = defaultDelimiter, + delimiter: UnicodeScalar = defaultDelimiter, bufferSize: Int = defaultBufferSize) throws { diff --git a/Sources/CSV.swift b/Sources/CSV.swift index 795c566..608c5e2 100644 --- a/Sources/CSV.swift +++ b/Sources/CSV.swift @@ -8,13 +8,13 @@ import Foundation -private let LF: UInt32 = 0x0a //'\n' -private let CR: UInt32 = 0x0d //'\r' -private let DQUOTE: UInt32 = 0x22 //'"' +private let LF = UnicodeScalar(UInt32(0x0a)) //'\n' +private let CR = UnicodeScalar(UInt32(0x0d)) //'\r' +private let DQUOTE = UnicodeScalar(UInt32(0x22)) //'"' internal let defaultHasHeaderRow = false internal let defaultEncoding: String.Encoding = .utf8 -internal let defaultDelimiter: CChar = 0x2c //',' +internal let defaultDelimiter = UnicodeScalar(UInt32(0x2c)) //',' internal let defaultBufferSize = 8192 internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf] @@ -27,7 +27,7 @@ public class CSV: Sequence, IteratorProtocol { internal let stream: InputStream internal let encoding: String.Encoding - internal let delimiter: UInt32 + internal let delimiter: UnicodeScalar internal let bufferSize: Int internal var buffer: UnsafeMutablePointer! @@ -66,7 +66,7 @@ public class CSV: Sequence, IteratorProtocol { stream: InputStream, hasHeaderRow: Bool = defaultHasHeaderRow, encoding: String.Encoding = defaultEncoding, - delimiter: CChar = defaultDelimiter, + delimiter: UnicodeScalar = defaultDelimiter, bufferSize: Int = defaultBufferSize) throws { @@ -85,7 +85,7 @@ public class CSV: Sequence, IteratorProtocol { } self.bufferSize = bs - self.delimiter = UInt32(delimiter) + self.delimiter = UnicodeScalar(UInt32(delimiter)) let b = malloc(bufferSize) if b == nil { @@ -213,7 +213,7 @@ public class CSV: Sequence, IteratorProtocol { var escaping = false var quotationCount = 0 - var prev: UInt32 = 0 + var prev = UnicodeScalar(0) while true { if bufferOffset >= lastReadCount { @@ -236,26 +236,28 @@ public class CSV: Sequence, IteratorProtocol { } } - var c: UInt32 = 0 + var c = UnicodeScalar(0) switch encoding { case String.Encoding.utf16BigEndian: let _c = ReadBigInt16(base: buffer, byteOffset: bufferOffset) - c = UInt32(_c) + c = UnicodeScalar(UInt32(_c)) case String.Encoding.utf16LittleEndian: let _c = ReadLittleInt16(base: buffer, byteOffset: bufferOffset) - c = UInt32(_c) + c = UnicodeScalar(UInt32(_c)) case String.Encoding.utf32BigEndian: - c = ReadBigInt32(base: buffer, byteOffset: bufferOffset) + let _c = ReadBigInt32(base: buffer, byteOffset: bufferOffset) + c = UnicodeScalar(UInt32(_c)) case String.Encoding.utf32LittleEndian: - c = ReadLittleInt32(base: buffer, byteOffset: bufferOffset) + let _c = ReadLittleInt32(base: buffer, byteOffset: bufferOffset) + c = UnicodeScalar(UInt32(_c)) default: // multi-byte character encodings let _c = (buffer + bufferOffset)[0] - c = UInt32(_c) + c = UnicodeScalar(UInt32(_c)) } if c == DQUOTE { @@ -356,3 +358,121 @@ public class CSV: Sequence, IteratorProtocol { } } + +public struct CSVState: IteratorProtocol { + + private var it: T + private let delimiter: UnicodeScalar + + private var back: T.Element? = nil + + public init(it: inout T, delimiter: UnicodeScalar) { + self.it = it + self.delimiter = delimiter + } + + public mutating func next() -> [String]? { + return readRow() + } + + mutating func moveNext() -> T.Element? { + if back != nil { + defer { back = nil } + return back + } + return it.next() + } + + mutating func readRow() -> [String]? { + var next = moveNext() + if next == nil { + return nil + } + + var row = [String]() + while true { + var field: String + var end: Bool + if next == nil { + (field, end) = ("", true) + } + else if next == DQUOTE { + (field, end) = readField(quoted: true) + } + else { + back = next + (field, end) = readField(quoted: false) + } + row.append(field) + if end { + break + } + next = moveNext() + } + return row + } + + mutating func readField(quoted: Bool) -> (String, Bool) { + var next = moveNext() + + var field = "" + //var end = false + + while let c = next { + if quoted { + switch c { + case DQUOTE: + let n = moveNext() + if n == DQUOTE { + // ESC + field.append(c) + } + else if n == delimiter { + // END FIELD + return (field, false) + } + else if n == CR || n == LF { + if n == CR { + let nn = moveNext() + if nn != LF { + back = nn + } + } + // END ROW + return (field, true) + } + else { + // ERROR?? + field.append(c) + } + default: + field.append(c) + } + } + else { + switch c { + case CR: + let nn = moveNext() + if nn != LF { + back = nn + } + // END ROW + return (field, true) + case LF: + // END ROW + return (field, true) + case delimiter: + // END FIELD + return (field, false) + default: + field.append(c) + } + } + + next = moveNext() + } + + return (field, true) + } + +} diff --git a/Sources/UnicodeIterator.swift b/Sources/UnicodeIterator.swift new file mode 100644 index 0000000..4c7106b --- /dev/null +++ b/Sources/UnicodeIterator.swift @@ -0,0 +1,33 @@ +// +// UnicodeIterator.swift +// CSV +// +// Created by Yasuhiro Hatta on 2016/06/20. +// Copyright © 2016年 yaslab. All rights reserved. +// + +import Foundation + +struct UnicodeIterator< + Input: IteratorProtocol, + InputEncoding: UnicodeCodec + where InputEncoding.CodeUnit == Input.Element> + : IteratorProtocol { + + var input: Input + var inputEncoding: InputEncoding + + init(input: Input, inputEncoding: InputEncoding.Type) { + self.input = input + self.inputEncoding = inputEncoding.init() + } + + mutating func next() -> UnicodeScalar? { + switch inputEncoding.decode(&input) { + case .scalarValue(let c): return c + case .emptyInput: return nil + case .error: return nil + } + } + +} diff --git a/Tests/CSV/CSVTests.swift b/Tests/CSV/CSVTests.swift index fd97491..7290937 100644 --- a/Tests/CSV/CSVTests.swift +++ b/Tests/CSV/CSVTests.swift @@ -161,4 +161,18 @@ class CSVTests: XCTestCase { XCTAssertEqual(i, 2) } + func testCSVState1() { + var it = "あ,い1,\"う\",えお\n,,x,".unicodeScalars.makeIterator() + var state = CSVState(it: &it, delimiter: defaultDelimiter) + + var rows = [[String]]() + + while let row = state.next() { + rows.append(row) + } + XCTAssertEqual(rows.count, 2) + XCTAssertEqual(rows[0], ["あ", "い1", "う", "えお"]) + XCTAssertEqual(rows[1], ["", "", "x", ""]) + } + }