Organize data and decode with byte (UInt8) arrays instead of Data. Support quote escapes for newlines and commas
This commit is contained in:
parent
60eca9a4cc
commit
23bd35d662
|
@ -1,72 +1,79 @@
|
|||
import Foundation
|
||||
|
||||
extension CSV {
|
||||
public static func parse(_ data: Data) -> [String: [String?]] {
|
||||
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
|
||||
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
|
||||
let rowLength = cells[0].count - 1
|
||||
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [String: [String?]] {
|
||||
let end = data.endIndex
|
||||
|
||||
for count in 1...cells.count - 1 {
|
||||
if cells[cells.count - count].count < rowLength {
|
||||
_ = cells.removeLast()
|
||||
} else {
|
||||
break
|
||||
var columns: [(title: String, cells: [String?])] = []
|
||||
var columnIndex = 0
|
||||
var iterator = data.startIndex
|
||||
var inQuotes = false
|
||||
var currentCell: Bytes = []
|
||||
|
||||
header: while iterator < end {
|
||||
let byte = data[iterator]
|
||||
switch byte {
|
||||
case .quote: inQuotes = !inQuotes
|
||||
case .comma, .newLine:
|
||||
if inQuotes { currentCell.append(byte); break }
|
||||
guard let title = String(data: Data(currentCell), encoding: stringEncoding) else { return [:] }
|
||||
columns.append((title, []))
|
||||
|
||||
currentCell = []
|
||||
if byte == .newLine { iterator += 1; break header }
|
||||
default: currentCell.append(byte)
|
||||
}
|
||||
iterator += 1
|
||||
}
|
||||
|
||||
var columns: [String: [String?]] = [:]
|
||||
(0...rowLength).forEach { (cellIndex) in
|
||||
var column = cells.map({ (row) -> String? in
|
||||
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
|
||||
})
|
||||
let title = column.removeFirst()!
|
||||
columns[title] = column
|
||||
while iterator < end {
|
||||
let byte = data[iterator]
|
||||
switch byte {
|
||||
case .quote: inQuotes = !inQuotes
|
||||
case .comma:
|
||||
if inQuotes { currentCell.append(.comma); break }
|
||||
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : String(data: Data(currentCell), encoding: stringEncoding))
|
||||
|
||||
columnIndex += 1
|
||||
currentCell = []
|
||||
case .newLine:
|
||||
if inQuotes { currentCell.append(.newLine); break }
|
||||
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : String(data: Data(currentCell), encoding: stringEncoding))
|
||||
|
||||
columnIndex = 0
|
||||
currentCell = []
|
||||
default: currentCell.append(byte)
|
||||
}
|
||||
iterator += 1
|
||||
}
|
||||
return columns
|
||||
|
||||
var dictionaryResult: [String: [String?]] = [:]
|
||||
var resultIterator = columns.startIndex
|
||||
|
||||
while resultIterator < columns.endIndex {
|
||||
let column = columns[resultIterator]
|
||||
dictionaryResult[column.title] = column.cells
|
||||
|
||||
resultIterator += 1
|
||||
}
|
||||
|
||||
return dictionaryResult
|
||||
|
||||
}
|
||||
|
||||
public static func parse(_ data: Data) -> [String: Column] {
|
||||
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
|
||||
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
|
||||
let rowLength = cells[0].count - 1
|
||||
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [String: Column] {
|
||||
let elements: [String: [String?]] = self.parse(data, stringEncoding: stringEncoding)
|
||||
|
||||
for count in 1...cells.count - 1 {
|
||||
if cells[cells.count - count].count < rowLength {
|
||||
_ = cells.removeLast()
|
||||
} else {
|
||||
break
|
||||
}
|
||||
return elements.reduce(into: [:]) { columns, element in
|
||||
columns[element.key] = Column(header: element.key, fields: element.value)
|
||||
}
|
||||
|
||||
var columns: [String: Column] = [:]
|
||||
(0...rowLength).forEach { (cellIndex) in
|
||||
var column = cells.map({ (row) -> String? in
|
||||
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
|
||||
})
|
||||
let title = column.removeFirst()!
|
||||
columns[title] = CSV.Column(header: title, fields: column)
|
||||
}
|
||||
return columns
|
||||
}
|
||||
|
||||
public static func parse(_ data: Data) -> [Column] {
|
||||
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
|
||||
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
|
||||
let rowLength = cells[0].count - 1
|
||||
public static func parse(_ data: Data, stringEncoding: String.Encoding = .utf8) -> [Column] {
|
||||
let elements: [String: [String?]] = self.parse(data, stringEncoding: stringEncoding)
|
||||
|
||||
for count in 1...cells.count - 1 {
|
||||
if cells[cells.count - count].count < rowLength {
|
||||
_ = cells.removeLast()
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return (0...rowLength).map { (cellIndex) -> CSV.Column in
|
||||
var column = cells.map({ (row) -> String? in
|
||||
return row[cellIndex].count > 0 ? String(data: row[cellIndex], encoding: .utf8) : nil
|
||||
})
|
||||
return CSV.Column(header: column.removeFirst()!, fields: column)
|
||||
return elements.reduce(into: []) { columns, element in
|
||||
columns.append(Column(header: element.key, fields: element.value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import Foundation
|
||||
|
||||
extension Data {
|
||||
extension Array where Element == UInt8 {
|
||||
var int: Int? {
|
||||
let count: Int = self.endIndex
|
||||
var result: Int = 0
|
||||
|
|
|
@ -7,11 +7,11 @@ final class _CSVDecoder: Decoder {
|
|||
let userInfo: [CodingUserInfoKey : Any]
|
||||
let stringDecoding: String.Encoding
|
||||
|
||||
let csv: [String: [Data?]]?
|
||||
let row: [String: Data]?
|
||||
let cell: Data?
|
||||
let csv: [String: [Bytes?]]?
|
||||
let row: [String: Bytes]?
|
||||
let cell: Bytes?
|
||||
|
||||
init(csv: [String: [Data?]], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
init(csv: [String: [Bytes?]], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.userInfo = info
|
||||
self.stringDecoding = stringDecoding
|
||||
|
@ -20,7 +20,7 @@ final class _CSVDecoder: Decoder {
|
|||
self.cell = nil
|
||||
}
|
||||
|
||||
init(row: [String: Data], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
init(row: [String: Bytes], path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.userInfo = info
|
||||
self.stringDecoding = stringDecoding
|
||||
|
@ -29,7 +29,7 @@ final class _CSVDecoder: Decoder {
|
|||
self.cell = nil
|
||||
}
|
||||
|
||||
init(cell: Data?, path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
init(cell: Bytes?, path: CodingPath = [], info: [CodingUserInfoKey : Any] = [:], stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.userInfo = info
|
||||
self.stringDecoding = stringDecoding
|
||||
|
@ -77,34 +77,69 @@ final class _CSVDecoder: Decoder {
|
|||
}
|
||||
|
||||
static func decode<T>(_ type: T.Type, from data: Data, stringDecoding: String.Encoding)throws -> [T] where T: Decodable {
|
||||
let csv: [String: [Data?]] = try _CSVDecoder.organize(data, stringDecoding: stringDecoding)
|
||||
let csv: [String: [Bytes?]] = try _CSVDecoder.organize(data, stringDecoding: stringDecoding)
|
||||
let decoder = _CSVDecoder(csv: csv, stringDecoding: stringDecoding)
|
||||
return try Array<T>(from: decoder)
|
||||
}
|
||||
|
||||
static func organize(_ data: Data, stringDecoding: String.Encoding)throws -> [String: [Data?]] {
|
||||
let rows = data.split(separator: .newLine, omittingEmptySubsequences: false)
|
||||
var cells = rows.map({ $0.split(separator: .comma, omittingEmptySubsequences: false) })
|
||||
let rowLength = cells[0].count - 1
|
||||
static func organize(_ data: Data, stringDecoding: String.Encoding)throws -> [String: [Bytes?]] {
|
||||
let end = data.endIndex
|
||||
|
||||
for count in 1...cells.count - 1 {
|
||||
if cells[cells.count - count].count < rowLength {
|
||||
_ = cells.removeLast()
|
||||
} else {
|
||||
break
|
||||
var columns: [(title: String, cells: [Bytes?])] = []
|
||||
var columnIndex = 0
|
||||
var iterator = data.startIndex
|
||||
var inQuotes = false
|
||||
var currentCell: Bytes = []
|
||||
|
||||
header: while iterator < end {
|
||||
let byte = data[iterator]
|
||||
switch byte {
|
||||
case .quote: inQuotes = !inQuotes
|
||||
case .comma, .newLine:
|
||||
if inQuotes { currentCell.append(byte); break }
|
||||
guard let title = String(data: Data(currentCell), encoding: stringDecoding) else {
|
||||
fatalError()
|
||||
}
|
||||
columns.append((title, []))
|
||||
|
||||
currentCell = []
|
||||
if byte == .newLine { iterator += 1; break header }
|
||||
default: currentCell.append(byte)
|
||||
}
|
||||
iterator += 1
|
||||
}
|
||||
|
||||
var columns: [String: [Data?]] = [:]
|
||||
try (0...rowLength).forEach { (cellIndex) in
|
||||
var column = cells.map({ (row) -> Data? in
|
||||
return row[cellIndex].count > 0 ? row[cellIndex] : nil
|
||||
})
|
||||
guard let title = String(data: column.removeFirst()!, encoding: stringDecoding) else {
|
||||
throw DecodingError.dataCorrupted(DecodingError.Context(codingPath: [], debugDescription: "Found colunm title with \(stringDecoding) incompatible character"))
|
||||
while iterator < end {
|
||||
let byte = data[iterator]
|
||||
switch byte {
|
||||
case .quote: inQuotes = !inQuotes
|
||||
case .comma:
|
||||
if inQuotes { currentCell.append(.comma); break }
|
||||
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : currentCell)
|
||||
|
||||
columnIndex += 1
|
||||
currentCell = []
|
||||
case .newLine:
|
||||
if inQuotes { currentCell.append(.newLine); break }
|
||||
columns[columnIndex].cells.append(currentCell.count > 0 ? nil : currentCell)
|
||||
|
||||
columnIndex = 0
|
||||
currentCell = []
|
||||
default: currentCell.append(byte)
|
||||
}
|
||||
columns[title] = column
|
||||
iterator += 1
|
||||
}
|
||||
return columns
|
||||
|
||||
var dictionaryResult: [String: [Bytes?]] = [:]
|
||||
var resultIterator = columns.startIndex
|
||||
|
||||
while resultIterator < columns.endIndex {
|
||||
let column = columns[resultIterator]
|
||||
dictionaryResult[column.title] = column.cells
|
||||
|
||||
resultIterator += 1
|
||||
}
|
||||
|
||||
return dictionaryResult
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,9 +4,9 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
|
|||
let codingPath: [CodingKey]
|
||||
let allKeys: [K]
|
||||
let stringDecoding: String.Encoding
|
||||
let row: [String: Data]
|
||||
let row: [String: Bytes]
|
||||
|
||||
init(path: CodingPath, row: [String: Data], stringDecoding: String.Encoding) {
|
||||
init(path: CodingPath, row: [String: Bytes], stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.allKeys = Array(row.keys).compactMap(K.init)
|
||||
self.stringDecoding = stringDecoding
|
||||
|
@ -19,12 +19,12 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
|
|||
|
||||
func decodeNil(forKey key: K) throws -> Bool {
|
||||
let cell = row[key.stringValue]
|
||||
return cell == nil || cell == Data([.N, .forwardSlash, .A]) || cell == Data([.N, .A])
|
||||
return cell == nil || cell == [.N, .forwardSlash, .A] || cell == [.N, .A]
|
||||
}
|
||||
|
||||
func decode(_ type: Bool.Type, forKey key: K) throws -> Bool {
|
||||
guard let cell = row[key.stringValue] else { throw DecodingError.badKey(key, at: self.codingPath + [key]) }
|
||||
guard let value = String(data: cell, encoding: self.stringDecoding) else {
|
||||
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
|
||||
throw DecodingError.dataToStringFailed(path: self.codingPath + [key], encoding: self.stringDecoding)
|
||||
}
|
||||
switch value.lowercased() {
|
||||
|
@ -36,7 +36,7 @@ final class _CSVKeyedDecoder<K>: KeyedDecodingContainerProtocol where K: CodingK
|
|||
|
||||
func decode(_ type: String.Type, forKey key: K) throws -> String {
|
||||
guard let cell = row[key.stringValue] else { throw DecodingError.badKey(key, at: self.codingPath + [key]) }
|
||||
guard let value = String(data: cell, encoding: self.stringDecoding) else {
|
||||
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
|
||||
throw DecodingError.dataToStringFailed(path: self.codingPath + [key], encoding: self.stringDecoding)
|
||||
}
|
||||
return value
|
||||
|
|
|
@ -3,21 +3,21 @@ import Foundation
|
|||
final class _CSVSingleValueDecoder: SingleValueDecodingContainer {
|
||||
let codingPath: [CodingKey]
|
||||
let stringDecoding: String.Encoding
|
||||
let value: Data?
|
||||
let value: Bytes?
|
||||
|
||||
init(value: Data?, path: CodingPath, stringDecoding: String.Encoding) {
|
||||
init(value: Bytes?, path: CodingPath, stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.stringDecoding = stringDecoding
|
||||
self.value = value
|
||||
}
|
||||
|
||||
func decodeNil() -> Bool {
|
||||
return value == nil || value == Data([.N, .forwardSlash, .A]) || value == Data([.N, .A])
|
||||
return value == nil || value == [.N, .forwardSlash, .A] || value == [.N, .A]
|
||||
}
|
||||
|
||||
func decode(_ type: Bool.Type) throws -> Bool {
|
||||
guard let cell = self.value else { throw DecodingError.nilValue(type: type, at: self.codingPath) }
|
||||
guard let value = String(data: cell, encoding: self.stringDecoding) else {
|
||||
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
|
||||
throw DecodingError.dataToStringFailed(path: self.codingPath, encoding: self.stringDecoding)
|
||||
}
|
||||
switch value.lowercased() {
|
||||
|
@ -29,7 +29,7 @@ final class _CSVSingleValueDecoder: SingleValueDecodingContainer {
|
|||
|
||||
func decode(_ type: String.Type) throws -> String {
|
||||
guard let cell = self.value else { throw DecodingError.nilValue(type: type, at: self.codingPath) }
|
||||
guard let value = String(data: cell, encoding: self.stringDecoding) else {
|
||||
guard let value = String(data: Data(cell), encoding: self.stringDecoding) else {
|
||||
throw DecodingError.dataToStringFailed(path: self.codingPath, encoding: self.stringDecoding)
|
||||
}
|
||||
return value
|
||||
|
|
|
@ -7,10 +7,10 @@ final class _CSVUnkeyedDecoder: UnkeyedDecodingContainer {
|
|||
var currentIndex: Int
|
||||
|
||||
let stringDecoding: String.Encoding
|
||||
let columns: [String: [Data?]]
|
||||
let next: () -> [String: Data]?
|
||||
let columns: [String: [Bytes?]]
|
||||
let next: () -> [String: Bytes]?
|
||||
|
||||
init(columns: [String: [Data?]], path: CodingPath = [], stringDecoding: String.Encoding) {
|
||||
init(columns: [String: [Bytes?]], path: CodingPath = [], stringDecoding: String.Encoding) {
|
||||
self.codingPath = path
|
||||
self.count = columns.first?.value.count
|
||||
self.currentIndex = 0
|
||||
|
|
Loading…
Reference in New Issue