Add new algorithm

This commit is contained in:
Yasuhiro Hatta 2016-06-21 01:23:20 +09:00
parent 69ea597f6d
commit db64ab561f
6 changed files with 351 additions and 17 deletions

View File

@ -43,6 +43,14 @@
0E9317DE1D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; };
0E9317DF1D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; };
0E9317E01D0DBCC500AC20A0 /* ReadmeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */; };
0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; };
0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; };
0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; };
0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */; };
0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; };
0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; };
0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; };
0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@ -88,6 +96,8 @@
0E9317D31D0DB2F200AC20A0 /* CSV+init.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CSV+init.swift"; sourceTree = "<group>"; };
0E9317D81D0DB30800AC20A0 /* CSV+subscript.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CSV+subscript.swift"; sourceTree = "<group>"; };
0E9317DD1D0DBCC500AC20A0 /* ReadmeTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReadmeTests.swift; sourceTree = "<group>"; };
0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BinaryReader.swift; sourceTree = "<group>"; };
0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UnicodeIterator.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -179,6 +189,8 @@
0E7E8C9E1D0BC7F10057A1C1 /* CSVError.swift */,
0E7E8C9F1D0BC7F10057A1C1 /* CSVVersion.h */,
0E7E8CAC1D0BC8610057A1C1 /* Info.plist */,
0EA2AB7B1D183B45003EC967 /* BinaryReader.swift */,
0EA2AB801D183BA9003EC967 /* UnicodeIterator.swift */,
);
path = Sources;
sourceTree = "<group>";
@ -480,10 +492,12 @@
buildActionMask = 2147483647;
files = (
0E9317D51D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0EA2AB821D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DA1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CA11D0BC7F10057A1C1 /* CSV.swift in Sources */,
0E7E8CA21D0BC7F10057A1C1 /* CSVError.swift in Sources */,
0E7E8CA01D0BC7F10057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7D1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -502,10 +516,12 @@
buildActionMask = 2147483647;
files = (
0E9317D71D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0EA2AB841D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DC1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CBE1D0BC9D70057A1C1 /* CSV.swift in Sources */,
0E7E8CBF1D0BC9D70057A1C1 /* CSVError.swift in Sources */,
0E7E8CBD1D0BC9D70057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7F1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -514,10 +530,12 @@
buildActionMask = 2147483647;
files = (
0E9317D41D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0EA2AB811D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317D91D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8CE01D0BCA8E0057A1C1 /* CSV.swift in Sources */,
0E7E8CE11D0BCA8E0057A1C1 /* CSVError.swift in Sources */,
0E7E8CDF1D0BCA8E0057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7C1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -536,10 +554,12 @@
buildActionMask = 2147483647;
files = (
0E9317D61D0DB2F200AC20A0 /* CSV+init.swift in Sources */,
0EA2AB831D183BA9003EC967 /* UnicodeIterator.swift in Sources */,
0E9317DB1D0DB30800AC20A0 /* CSV+subscript.swift in Sources */,
0E7E8D001D0BCDCF0057A1C1 /* CSV.swift in Sources */,
0E7E8D011D0BCDCF0057A1C1 /* CSVError.swift in Sources */,
0E7E8CFF1D0BCDCF0057A1C1 /* ByteOrder.swift in Sources */,
0EA2AB7E1D183B45003EC967 /* BinaryReader.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

147
Sources/BinaryReader.swift Normal file
View File

@ -0,0 +1,147 @@
//
// BinaryReader.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/20.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
class BinaryReader {
enum Endian {
case big
case little
}
let stream: InputStream
let endian: Endian
let closeOnDeinit: Bool
init(stream: InputStream, endian: Endian = .big, closeOnDeinit: Bool = true) {
self.stream = stream
self.endian = endian
self.closeOnDeinit = closeOnDeinit
if stream.streamStatus == .notOpen {
stream.open()
}
}
deinit {
if closeOnDeinit && stream.streamStatus == .open {
stream.close()
}
}
func readUInt8() throws -> UInt8 {
// if stream.streamStatus == .Closed {
// // ObjectDisposedException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
// if stream.streamStatus == .AtEnd {
// // EndOfStreamException
// throw NSError(domain: "", code: 0, userInfo: nil)
// }
let bufferSize = 1
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
}
if length != bufferSize {
// EndOfStreamException
throw NSError(domain: "", code: 0, userInfo: nil)
}
return buffer[0]
}
func readUInt16() throws -> UInt16 {
let bufferSize = 2
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
}
if length != bufferSize {
// EndOfStreamException
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt16>(buffer)
switch endian {
case .big:
return CFSwapInt16BigToHost(tmp[0])
case .little:
return CFSwapInt16LittleToHost(tmp[0])
}
}
func readUInt32() throws -> UInt32 {
let bufferSize = 4
var buffer = [UInt8](repeating: 0, count: bufferSize)
let length = stream.read(&buffer, maxLength: bufferSize)
if length < 0 {
// IOException
throw NSError(domain: "", code: 0, userInfo: nil)
}
if length != 4 {
// EndOfStreamException
throw NSError(domain: "", code: 0, userInfo: nil)
}
let tmp = UnsafeMutablePointer<UInt32>(buffer)
switch endian {
case .big:
return CFSwapInt32BigToHost(tmp[0])
case .little:
return CFSwapInt32LittleToHost(tmp[0])
}
}
}
extension BinaryReader {
struct UInt8Iterator: Sequence, IteratorProtocol {
let reader: BinaryReader
private init(reader: BinaryReader) {
self.reader = reader
}
mutating func next() -> UInt8? {
return try? reader.readUInt8()
}
}
func makeUInt8Iterator() -> UInt8Iterator {
return UInt8Iterator(reader: self)
}
}
extension BinaryReader {
struct UInt16Iterator: IteratorProtocol {
let reader: BinaryReader
private init(reader: BinaryReader) {
self.reader = reader
}
mutating func next() -> UInt16? {
return try? reader.readUInt16()
}
}
func makeUInt16Iterator() -> UInt16Iterator {
return UInt16Iterator(reader: self)
}
}

View File

@ -14,7 +14,7 @@ extension CSV {
path: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: CChar = defaultDelimiter,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
throws
{
@ -33,7 +33,7 @@ extension CSV {
url: URL,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: CChar = defaultDelimiter,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
throws
{
@ -51,7 +51,7 @@ extension CSV {
public convenience init(
string: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
delimiter: CChar = defaultDelimiter,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
throws
{

View File

@ -8,13 +8,13 @@
import Foundation
private let LF: UInt32 = 0x0a //'\n'
private let CR: UInt32 = 0x0d //'\r'
private let DQUOTE: UInt32 = 0x22 //'"'
private let LF = UnicodeScalar(UInt32(0x0a)) //'\n'
private let CR = UnicodeScalar(UInt32(0x0d)) //'\r'
private let DQUOTE = UnicodeScalar(UInt32(0x22)) //'"'
internal let defaultHasHeaderRow = false
internal let defaultEncoding: String.Encoding = .utf8
internal let defaultDelimiter: CChar = 0x2c //','
internal let defaultDelimiter = UnicodeScalar(UInt32(0x2c)) //','
internal let defaultBufferSize = 8192
internal let utf8BOM: [UInt8] = [0xef, 0xbb, 0xbf]
@ -27,7 +27,7 @@ public class CSV: Sequence, IteratorProtocol {
internal let stream: InputStream
internal let encoding: String.Encoding
internal let delimiter: UInt32
internal let delimiter: UnicodeScalar
internal let bufferSize: Int
internal var buffer: UnsafeMutablePointer<UInt8>!
@ -66,7 +66,7 @@ public class CSV: Sequence, IteratorProtocol {
stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow,
encoding: String.Encoding = defaultEncoding,
delimiter: CChar = defaultDelimiter,
delimiter: UnicodeScalar = defaultDelimiter,
bufferSize: Int = defaultBufferSize)
throws
{
@ -85,7 +85,7 @@ public class CSV: Sequence, IteratorProtocol {
}
self.bufferSize = bs
self.delimiter = UInt32(delimiter)
self.delimiter = UnicodeScalar(UInt32(delimiter))
let b = malloc(bufferSize)
if b == nil {
@ -213,7 +213,7 @@ public class CSV: Sequence, IteratorProtocol {
var escaping = false
var quotationCount = 0
var prev: UInt32 = 0
var prev = UnicodeScalar(0)
while true {
if bufferOffset >= lastReadCount {
@ -236,26 +236,28 @@ public class CSV: Sequence, IteratorProtocol {
}
}
var c: UInt32 = 0
var c = UnicodeScalar(0)
switch encoding {
case String.Encoding.utf16BigEndian:
let _c = ReadBigInt16(base: buffer, byteOffset: bufferOffset)
c = UInt32(_c)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf16LittleEndian:
let _c = ReadLittleInt16(base: buffer, byteOffset: bufferOffset)
c = UInt32(_c)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf32BigEndian:
c = ReadBigInt32(base: buffer, byteOffset: bufferOffset)
let _c = ReadBigInt32(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
case String.Encoding.utf32LittleEndian:
c = ReadLittleInt32(base: buffer, byteOffset: bufferOffset)
let _c = ReadLittleInt32(base: buffer, byteOffset: bufferOffset)
c = UnicodeScalar(UInt32(_c))
default: // multi-byte character encodings
let _c = (buffer + bufferOffset)[0]
c = UInt32(_c)
c = UnicodeScalar(UInt32(_c))
}
if c == DQUOTE {
@ -356,3 +358,121 @@ public class CSV: Sequence, IteratorProtocol {
}
}
public struct CSVState<T: IteratorProtocol where T.Element == UnicodeScalar>: IteratorProtocol {
private var it: T
private let delimiter: UnicodeScalar
private var back: T.Element? = nil
public init(it: inout T, delimiter: UnicodeScalar) {
self.it = it
self.delimiter = delimiter
}
public mutating func next() -> [String]? {
return readRow()
}
mutating func moveNext() -> T.Element? {
if back != nil {
defer { back = nil }
return back
}
return it.next()
}
mutating func readRow() -> [String]? {
var next = moveNext()
if next == nil {
return nil
}
var row = [String]()
while true {
var field: String
var end: Bool
if next == nil {
(field, end) = ("", true)
}
else if next == DQUOTE {
(field, end) = readField(quoted: true)
}
else {
back = next
(field, end) = readField(quoted: false)
}
row.append(field)
if end {
break
}
next = moveNext()
}
return row
}
mutating func readField(quoted: Bool) -> (String, Bool) {
var next = moveNext()
var field = ""
//var end = false
while let c = next {
if quoted {
switch c {
case DQUOTE:
let n = moveNext()
if n == DQUOTE {
// ESC
field.append(c)
}
else if n == delimiter {
// END FIELD
return (field, false)
}
else if n == CR || n == LF {
if n == CR {
let nn = moveNext()
if nn != LF {
back = nn
}
}
// END ROW
return (field, true)
}
else {
// ERROR??
field.append(c)
}
default:
field.append(c)
}
}
else {
switch c {
case CR:
let nn = moveNext()
if nn != LF {
back = nn
}
// END ROW
return (field, true)
case LF:
// END ROW
return (field, true)
case delimiter:
// END FIELD
return (field, false)
default:
field.append(c)
}
}
next = moveNext()
}
return (field, true)
}
}

View File

@ -0,0 +1,33 @@
//
// UnicodeIterator.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/20.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
struct UnicodeIterator<
Input: IteratorProtocol,
InputEncoding: UnicodeCodec
where InputEncoding.CodeUnit == Input.Element>
: IteratorProtocol {
var input: Input
var inputEncoding: InputEncoding
init(input: Input, inputEncoding: InputEncoding.Type) {
self.input = input
self.inputEncoding = inputEncoding.init()
}
mutating func next() -> UnicodeScalar? {
switch inputEncoding.decode(&input) {
case .scalarValue(let c): return c
case .emptyInput: return nil
case .error: return nil
}
}
}

View File

@ -161,4 +161,18 @@ class CSVTests: XCTestCase {
XCTAssertEqual(i, 2)
}
func testCSVState1() {
var it = "あ,い1,\"\",えお\n,,x,".unicodeScalars.makeIterator()
var state = CSVState(it: &it, delimiter: defaultDelimiter)
var rows = [[String]]()
while let row = state.next() {
rows.append(row)
}
XCTAssertEqual(rows.count, 2)
XCTAssertEqual(rows[0], ["", "い1", "", "えお"])
XCTAssertEqual(rows[1], ["", "", "x", ""])
}
}