CSV.swift/Sources/CSV/CSVReader.swift

399 lines
13 KiB
Swift

//
// CSVReader.swift
// CSV
//
// Created by Yasuhiro Hatta on 2016/06/11.
// Copyright © 2016 yaslab. All rights reserved.
//
import Foundation
internal let LF: UnicodeScalar = "\n"
internal let CR: UnicodeScalar = "\r"
internal let DQUOTE: UnicodeScalar = "\""
internal let DQUOTE_STR: String = "\""
internal let DQUOTE2_STR: String = "\"\""
/// No overview available.
public class CSVReader {
/// No overview available.
public struct Configuration {
/// `true` if the CSV has a header row, otherwise `false`. Default: `false`.
public var hasHeaderRow: Bool
/// No overview available.
public var trimFields: Bool
/// Default: `","`.
public var delimiter: UnicodeScalar
/// No overview available.
public var whitespaces: CharacterSet
/// No overview available.
internal init(
hasHeaderRow: Bool,
trimFields: Bool,
delimiter: UnicodeScalar,
whitespaces: CharacterSet) {
self.hasHeaderRow = hasHeaderRow
self.trimFields = trimFields
self.delimiter = delimiter
var whitespaces = whitespaces
_ = whitespaces.remove(delimiter)
self.whitespaces = whitespaces
}
}
fileprivate var iterator: AnyIterator<UnicodeScalar>
public let configuration: Configuration
public fileprivate (set) var error: Error?
fileprivate var back: UnicodeScalar?
fileprivate var currentRowIndex: Int = 0
fileprivate var currentFieldIndex: Int = 0
/// CSV header row. To set a value for this property,
/// you set `true` to `headerRow` in initializer.
public private (set) var headerRow: [String]?
public fileprivate (set) var currentRow: [String]?
internal init<T: IteratorProtocol>(
iterator: T,
configuration: Configuration
) throws where T.Element == UnicodeScalar {
self.iterator = AnyIterator(iterator)
self.configuration = configuration
if configuration.hasHeaderRow {
guard let headerRow = readRow() else {
throw CSVError.cannotReadHeaderRow
}
self.headerRow = headerRow
}
}
}
extension CSVReader {
public static let defaultHasHeaderRow: Bool = false
public static let defaultTrimFields: Bool = false
public static let defaultDelimiter: UnicodeScalar = ","
public static let defaultWhitespaces: CharacterSet = .whitespaces
/// Create an instance with `InputStream`.
///
/// - parameter stream: An `InputStream` object. If the stream is not open,
/// initializer opens automatically.
/// - parameter codecType: A `UnicodeCodec` type for `stream`.
/// - parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
/// - parameter delimiter: Default: `","`.
public convenience init<T: UnicodeCodec>(
stream: InputStream,
codecType: T.Type,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter,
whitespaces: CharacterSet = defaultWhitespaces
) throws where T.CodeUnit == UInt8 {
let reader = try BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true)
let input = reader.makeUInt8Iterator()
let iterator = UnicodeIterator(input: input, inputEncodingType: codecType)
let config = Configuration(hasHeaderRow: hasHeaderRow,
trimFields: trimFields,
delimiter: delimiter,
whitespaces: whitespaces)
try self.init(iterator: iterator, configuration: config)
input.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
iterator.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
}
/// Create an instance with `InputStream`.
///
/// - parameter stream: An `InputStream` object. If the stream is not open,
/// initializer opens automatically.
/// - parameter codecType: A `UnicodeCodec` type for `stream`.
/// - parameter endian: Endian to use when reading a stream. Default: `.big`.
/// - parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
/// - parameter delimiter: Default: `","`.
public convenience init<T: UnicodeCodec>(
stream: InputStream,
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter,
whitespaces: CharacterSet = defaultWhitespaces
) throws where T.CodeUnit == UInt16 {
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let input = reader.makeUInt16Iterator()
let iterator = UnicodeIterator(input: input, inputEncodingType: codecType)
let config = Configuration(hasHeaderRow: hasHeaderRow,
trimFields: trimFields,
delimiter: delimiter,
whitespaces: whitespaces)
try self.init(iterator: iterator, configuration: config)
input.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
iterator.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
}
/// Create an instance with `InputStream`.
///
/// - parameter stream: An `InputStream` object. If the stream is not open,
/// initializer opens automatically.
/// - parameter codecType: A `UnicodeCodec` type for `stream`.
/// - parameter endian: Endian to use when reading a stream. Default: `.big`.
/// - parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
/// - parameter delimiter: Default: `","`.
public convenience init<T: UnicodeCodec>(
stream: InputStream,
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter,
whitespaces: CharacterSet = defaultWhitespaces
) throws where T.CodeUnit == UInt32 {
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let input = reader.makeUInt32Iterator()
let iterator = UnicodeIterator(input: input, inputEncodingType: codecType)
let config = Configuration(hasHeaderRow: hasHeaderRow,
trimFields: trimFields,
delimiter: delimiter,
whitespaces: whitespaces)
try self.init(iterator: iterator, configuration: config)
input.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
iterator.errorHandler = { [unowned self] in self.errorHandler(error: $0) }
}
/// Create an instance with `InputStream`.
///
/// - parameter stream: An `InputStream` object. If the stream is not open,
/// initializer opens automatically.
/// - parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
/// - parameter delimiter: Default: `","`.
public convenience init(
stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter,
whitespaces: CharacterSet = defaultWhitespaces
) throws {
try self.init(
stream: stream,
codecType: UTF8.self,
hasHeaderRow: hasHeaderRow,
trimFields: trimFields,
delimiter: delimiter,
whitespaces: whitespaces)
}
/// Create an instance with CSV string.
///
/// - parameter string: An CSV string.
/// - parameter hasHeaderRow: `true` if the CSV has a header row, otherwise `false`. Default: `false`.
/// - parameter delimiter: Default: `","`.
public convenience init(
string: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter,
whitespaces: CharacterSet = defaultWhitespaces
) throws {
let iterator = string.unicodeScalars.makeIterator()
let config = Configuration(hasHeaderRow: hasHeaderRow,
trimFields: trimFields,
delimiter: delimiter,
whitespaces: whitespaces)
try self.init(iterator: iterator, configuration: config)
}
private func errorHandler(error: Error) {
//configuration.fileInputErrorHandler?(error, currentRowIndex, currentFieldIndex)
self.error = error
}
}
// MARK: - Parse CSV
extension CSVReader {
fileprivate func readRow() -> [String]? {
currentFieldIndex = 0
var c = moveNext()
if c == nil {
return nil
}
var row = [String]()
var field: String
var end: Bool
while true {
if configuration.trimFields {
// Trim the leading spaces
while c != nil && configuration.whitespaces.contains(c!) {
c = moveNext()
}
}
if c == nil {
(field, end) = ("", true)
} else if c == DQUOTE {
(field, end) = readField(quoted: true)
} else {
back = c
(field, end) = readField(quoted: false)
if configuration.trimFields {
// Trim the trailing spaces
field = field.trimmingCharacters(in: configuration.whitespaces)
}
}
row.append(field)
if end {
break
}
currentFieldIndex += 1
c = moveNext()
}
currentRowIndex += 1
currentRow = row
return row
}
private func readField(quoted: Bool) -> (String, Bool) {
var fieldBuffer = String.UnicodeScalarView()
while let c = moveNext() {
if quoted {
if c == DQUOTE {
var cNext = moveNext()
if configuration.trimFields {
// Trim the trailing spaces
while cNext != nil && configuration.whitespaces.contains(cNext!) {
cNext = moveNext()
}
}
if cNext == nil || cNext == CR || cNext == LF {
if cNext == CR {
let cNextNext = moveNext()
if cNextNext != LF {
back = cNextNext
}
}
// END ROW
return (String(fieldBuffer), true)
} else if cNext == configuration.delimiter {
// END FIELD
return (String(fieldBuffer), false)
} else if cNext == DQUOTE {
// ESC
fieldBuffer.append(DQUOTE)
} else {
// ERROR?
fieldBuffer.append(c)
}
} else {
fieldBuffer.append(c)
}
} else {
if c == CR || c == LF {
if c == CR {
let cNext = moveNext()
if cNext != LF {
back = cNext
}
}
// END ROW
return (String(fieldBuffer), true)
} else if c == configuration.delimiter {
// END FIELD
return (String(fieldBuffer), false)
} else {
fieldBuffer.append(c)
}
}
}
// END FILE
return (String(fieldBuffer), true)
}
private func moveNext() -> UnicodeScalar? {
if back != nil {
defer {
back = nil
}
return back
}
return iterator.next()
}
}
//extension CSVReader {
//
// public func enumerateRows(_ block: ((CSVReader, inout Bool) throws -> Void)) throws {
// var stop = false
// while next() != nil {
// try block(self, &stop)
// if stop {
// break
// }
// }
// if let error = error {
// throw error
// }
// }
//
//}
extension CSVReader: IteratorProtocol {
@discardableResult
public func next() -> [String]? {
return readRow()
}
}
extension CSVReader {
public subscript(key: String) -> String? {
guard let header = headerRow else {
fatalError("CSVReader.headerRow must not be nil")
}
guard let index = header.firstIndex(of: key) else {
return nil
}
guard let row = currentRow else {
fatalError("CSVReader.currentRow must not be nil")
}
if index >= row.count {
return nil
}
return row[index]
}
}