Organize data and decode with byte (UInt8) arrays instead of Data. Support quote escapes for newlines and commas

This commit is contained in:
Caleb Kleveter 2018-06-01 17:42:33 -05:00
parent 60eca9a4cc
commit 23bd35d662
No known key found for this signature in database
GPG Key ID: B38DBD5CF2C98D69
6 changed files with 135 additions and 93 deletions

View File

@ -1,72 +1,79 @@
import Foundation
extension CSV {
public static func parse(_ data: Data) -> [String: [String?]] {
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
let rowLength = cells[0].count - 1
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [String: [String?]] {
let end = data.endIndex
for count in 1...cells.count - 1 {
if cells[cells.count - count].count < rowLength {
_ = cells.removeLast()
} else {
break
var columns: [(title: String, cells: [String?])] = []
var columnIndex = 0
var iterator = data.startIndex
var inQuotes = false
var currentCell: Bytes = []
header: while iterator < end {
let byte = data[iterator]
switch byte {
case .quote: inQuotes = !inQuotes
case .comma, .newLine:
if inQuotes { currentCell.append(byte); break }
guard let title = String(data: Data(currentCell), encoding: stringEncoding) else { return [:] }
columns.append((title, []))
currentCell = []
if byte == .newLine { iterator += 1; break header }
default: currentCell.append(byte)
}
iterator += 1
}
var columns: [String: [String?]] = [:]
(0...rowLength).forEach { (cellIndex) in
var column = cells.map({ (row) -> String? in
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
})
let title = column.removeFirst()!
columns[title] = column
while iterator < end {
let byte = data[iterator]
switch byte {
case .quote: inQuotes = !inQuotes
case .comma:
if inQuotes { currentCell.append(.comma); break }
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : String(data: Data(currentCell), encoding: stringEncoding))
columnIndex += 1
currentCell = []
case .newLine:
if inQuotes { currentCell.append(.newLine); break }
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : String(data: Data(currentCell), encoding: stringEncoding))
columnIndex = 0
currentCell = []
default: currentCell.append(byte)
}
iterator += 1
}
return columns
var dictionaryResult: [String: [String?]] = [:]
var resultIterator = columns.startIndex
while resultIterator < columns.endIndex {
let column = columns[resultIterator]
dictionaryResult[column.title] = column.cells
resultIterator += 1
}
return dictionaryResult
}
public static func parse(_ data: Data) -> [String: Column] {
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
let rowLength = cells[0].count - 1
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [String: Column] {
let elements: [String: [String?]] = self.parse(data, stringEncoding: stringEncoding)
for count in 1...cells.count - 1 {
if cells[cells.count - count].count < rowLength {
_ = cells.removeLast()
} else {
break
}
return elements.reduce(into: [:]) { columns, element in
columns[element.key] = Column(header: element.key, fields: element.value)
}
var columns: [String: Column] = [:]
(0...rowLength).forEach { (cellIndex) in
var column = cells.map({ (row) -> String? in
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
})
let title = column.removeFirst()!
columns[title] = CSV.Column(header: title, fields: column)
}
return columns
}
public static func parse(_ data: Data) -> [Column] {
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
let rowLength = cells[0].count - 1
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [Column] {
let elements: [String: [String?]] = self.parse(data, stringEncoding: stringEncoding)
for count in 1...cells.count - 1 {
if cells[cells.count - count].count < rowLength {
_ = cells.removeLast()
} else {
break
}
}
return (0...rowLength).map { (cellIndex) -> CSV.Column in
var column = cells.map({ (row) -> String? in
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
})
return CSV.Column(header: column.removeFirst()!, fields: column)
return elements.reduce(into: []) { columns, element in
columns.append(Column(header: element.key, fields: element.value))
}
}
}

View File

@ -1,6 +1,6 @@
import Foundation
extension Data {
extension Array where Element == UInt8 {
var int: Int? {
let count: Int = self.endIndex
var result: Int = 0

View File

@ -7,11 +7,11 @@ final class _CSVDecoder: Decoder {
let userInfo: [CodingUserInfoKey : Any]
let stringDecoding: String.Encoding
let csv: [String: [Data?]]?
let row: [String: Data]?
let cell: Data?
let csv: [String: [Bytes?]]?
let row: [String: Bytes]?
let cell: Bytes?
init(csv: [String: [Data?]], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
init(csv: [String: [Bytes?]], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
self.codingPath = path
self.userInfo = info
self.stringDecoding = stringDecoding
@ -20,7 +20,7 @@ final class _CSVDecoder: Decoder {
self.cell = nil
}
init(row: [String: Data], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
init(row: [String: Bytes], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
self.codingPath = path
self.userInfo = info
self.stringDecoding = stringDecoding
@ -29,7 +29,7 @@ final class _CSVDecoder: Decoder {
self.cell = nil
}
init(cell: Data?, path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
init(cell: Bytes?, path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
self.codingPath = path
self.userInfo = info
self.stringDecoding = stringDecoding
@ -77,34 +77,69 @@ final class _CSVDecoder: Decoder {
}
static func decode<T>(_ type: T.Type, from data: Data, stringDecoding: String.Encoding)throws -> [T] where T: Decodable {
let csv: [String: [Data?]] = try _CSVDecoder.organize(data, stringDecoding: stringDecoding)
let csv: [String: [Bytes?]] = try _CSVDecoder.organize(data, stringDecoding: stringDecoding)
let decoder = _CSVDecoder(csv: csv, stringDecoding: stringDecoding)
return try Array<T>(from: decoder)
}
static func organize(_ data: Data, stringDecoding: String.Encoding)throws -> [String: [Data?]] {
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
let rowLength = cells[0].count - 1
static func organize(_ data: Data, stringDecoding: String.Encoding)throws -> [String: [Bytes?]] {
let end = data.endIndex
for count in 1...cells.count - 1 {
if cells[cells.count - count].count < rowLength {
_ = cells.removeLast()
} else {
break
var columns: [(title: String, cells: [Bytes?])] = []
var columnIndex = 0
var iterator = data.startIndex
var inQuotes = false
var currentCell: Bytes = []
header: while iterator < end {
let byte = data[iterator]
switch byte {
case .quote: inQuotes = !inQuotes
case .comma, .newLine:
if inQuotes { currentCell.append(byte); break }
guard let title = String(data: Data(currentCell), encoding: stringDecoding) else {
fatalError()
}
columns.append((title, []))
currentCell = []
if byte == .newLine { iterator += 1; break header }
default: currentCell.append(byte)
}
iterator += 1
}
var columns: [String: [Data?]] = [:]
try (0...rowLength).forEach { (cellIndex) in
var column = cells.map({ (row) -> Data? in
return row[cellIndex].count > 0 ? row[cellIndex] : nil
})
guard let title = String(data: column.removeFirst()!, encoding: stringDecoding) else {
throw DecodingError.dataCorrupted(DecodingError.Context(codingPath: [], debugDescription: "Found colunm title with \(stringDecoding) incompatible character"))
while iterator < end {
let byte = data[iterator]
switch byte {
case .quote: inQuotes = !inQuotes
case .comma:
if inQuotes { currentCell.append(.comma); break }
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : currentCell)
columnIndex += 1
currentCell = []
case .newLine:
if inQuotes { currentCell.append(.newLine); break }
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : currentCell)
columnIndex = 0
currentCell = []
default: currentCell.append(byte)
}
columns[title] = column
iterator += 1
}
return columns
var dictionaryResult: [String: [Bytes?]] = [:]
var resultIterator = columns.startIndex
while resultIterator < columns.endIndex {
let column = columns[resultIterator]
dictionaryResult[column.title] = column.cells
resultIterator += 1
}
return dictionaryResult
}
}

View File

@ -4,9 +4,9 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
let codingPath: [CodingKey]
let allKeys: [K]
let stringDecoding: String.Encoding
let row: [String: Data]
let row: [String: Bytes]
init(path: CodingPath, row: [String: Data], stringDecoding: String.Encoding) {
init(path: CodingPath, row: [String: Bytes], stringDecoding: String.Encoding) {
self.codingPath = path
self.allKeys = Array(row.keys).compactMap(K.init)
self.stringDecoding = stringDecoding
@ -19,12 +19,12 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
func decodeNil(forKey key: K) throws -> Bool {
let cell = row[key.stringValue]
return cell == nil || cell == Data([.N, .forwardSlash, .A]) || cell == Data([.N, .A])
return cell == nil || cell == [.N, .forwardSlash, .A] || cell == [.N, .A]
}
func decode(_ type: Bool.Type, forKey key: K) throws -> Bool {
guard let cell = row[key.stringValue] else { throw DecodingError.badKey(key, at: self.codingPath + [key]) }
guard let value = String(data: cell, encoding: self.stringDecoding) else {
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
throw DecodingError.dataToStringFailed(path: self.codingPath + [key], encoding: self.stringDecoding)
}
switch value.lowercased() {
@ -36,7 +36,7 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
func decode(_ type: String.Type, forKey key: K) throws -> String {
guard let cell = row[key.stringValue] else { throw DecodingError.badKey(key, at: self.codingPath + [key]) }
guard let value = String(data: cell, encoding: self.stringDecoding) else {
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
throw DecodingError.dataToStringFailed(path: self.codingPath + [key], encoding: self.stringDecoding)
}
return value

View File

@ -3,21 +3,21 @@ import Foundation
final class _CSVSingleValueDecoder: SingleValueDecodingContainer {
let codingPath: [CodingKey]
let stringDecoding: String.Encoding
let value: Data?
let value: Bytes?
init(value: Data?, path: CodingPath, stringDecoding: String.Encoding) {
init(value: Bytes?, path: CodingPath, stringDecoding: String.Encoding) {
self.codingPath = path
self.stringDecoding = stringDecoding
self.value = value
}
func decodeNil() -> Bool {
return value == nil || value == Data([.N, .forwardSlash, .A]) || value == Data([.N, .A])
return value == nil || value == [.N, .forwardSlash, .A] || value == [.N, .A]
}
func decode(_ type: Bool.Type) throws -> Bool {
guard let cell = self.value else { throw DecodingError.nilValue(type: type, at: self.codingPath) }
guard let value = String(data: cell, encoding: self.stringDecoding) else {
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
throw DecodingError.dataToStringFailed(path: self.codingPath, encoding: self.stringDecoding)
}
switch value.lowercased() {
@ -29,7 +29,7 @@ final class _CSVSingleValueDecoder: SingleValueDecodingContainer {
func decode(_ type: String.Type) throws -> String {
guard let cell = self.value else { throw DecodingError.nilValue(type: type, at: self.codingPath) }
guard let value = String(data: cell, encoding: self.stringDecoding) else {
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
throw DecodingError.dataToStringFailed(path: self.codingPath, encoding: self.stringDecoding)
}
return value

View File

@ -7,10 +7,10 @@ final class _CSVUnkeyedDecoder: UnkeyedDecodingContainer {
var currentIndex: Int
let stringDecoding: String.Encoding
let columns: [String: [Data?]]
let next: () -> [String: Data]?
let columns: [String: [Bytes?]]
let next: () -> [String: Bytes]?
init(columns: [String: [Data?]], path: CodingPath = [], stringDecoding: String.Encoding) {
init(columns: [String: [Bytes?]], path: CodingPath = [], stringDecoding: String.Encoding) {
self.codingPath = path
self.count = columns.first?.value.count
self.currentIndex = 0