Merge pull request #8 from yaslab/feature/trim-fields

Trim each field
This commit is contained in:
Yasuhiro Hatta 2016-10-17 00:37:26 +09:00 committed by GitHub
commit 078c5be1c6
7 changed files with 159 additions and 15 deletions

View File

@ -1,6 +1,6 @@
Pod::Spec.new do |s| Pod::Spec.new do |s|
s.name = 'CSV.swift' s.name = 'CSV.swift'
s.version = '1.0.0' s.version = '1.1.0'
s.license = 'MIT' s.license = 'MIT'
s.summary = 'CSV reading library written in Swift.' s.summary = 'CSV reading library written in Swift.'
s.homepage = 'https://github.com/yaslab/CSV.swift' s.homepage = 'https://github.com/yaslab/CSV.swift'

View File

@ -58,7 +58,7 @@ var csv = try! CSV(
string: "id,name\n1,foo", string: "id,name\n1,foo",
hasHeaderRow: true) // It must be true. hasHeaderRow: true) // It must be true.
while csv.next() != nil { while let _ = csv.next() {
print("\(csv["id"]!)") // => "1" print("\(csv["id"]!)") // => "1"
print("\(csv["name"]!)") // => "foo" print("\(csv["name"]!)") // => "foo"
} }
@ -83,13 +83,13 @@ let csv = try! CSV(
### CocoaPods ### CocoaPods
```ruby ```ruby
pod 'CSV.swift', '~> 1.0' pod 'CSV.swift', '~> 1.1'
``` ```
### Carthage ### Carthage
``` ```
github "yaslab/CSV.swift" ~> 1.0 github "yaslab/CSV.swift" ~> 1.1
``` ```
### Swift Package Manager ### Swift Package Manager
@ -100,7 +100,7 @@ import PackageDescription
let package = Package( let package = Package(
name: "PackageName", name: "PackageName",
dependencies: [ dependencies: [
.Package(url: "https://github.com/yaslab/CSV.swift", majorVersion: 1, minor: 0) .Package(url: "https://github.com/yaslab/CSV.swift", majorVersion: 1, minor: 1)
] ]
) )
``` ```

View File

@ -13,10 +13,11 @@ extension CSV {
public init( public init(
stream: InputStream, stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow, hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter) delimiter: UnicodeScalar = defaultDelimiter)
throws throws
{ {
try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, delimiter: delimiter) try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
} }
} }
@ -26,11 +27,12 @@ extension CSV {
public init( public init(
string: String, string: String,
hasHeaderRow: Bool = defaultHasHeaderRow, hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter) delimiter: UnicodeScalar = defaultDelimiter)
throws throws
{ {
let iterator = string.unicodeScalars.makeIterator() let iterator = string.unicodeScalars.makeIterator()
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter) try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
} }
} }

View File

@ -13,11 +13,13 @@ private let CR = UnicodeScalar("\r")!
private let DQUOTE = UnicodeScalar("\"")! private let DQUOTE = UnicodeScalar("\"")!
internal let defaultHasHeaderRow = false internal let defaultHasHeaderRow = false
internal let defaultTrimFields = false
internal let defaultDelimiter = UnicodeScalar(",")! internal let defaultDelimiter = UnicodeScalar(",")!
public struct CSV: IteratorProtocol, Sequence { public struct CSV: IteratorProtocol, Sequence {
private var iterator: AnyIterator<UnicodeScalar> private var iterator: AnyIterator<UnicodeScalar>
private let trimFields: Bool
private let delimiter: UnicodeScalar private let delimiter: UnicodeScalar
private var back: UnicodeScalar? = nil private var back: UnicodeScalar? = nil
@ -27,16 +29,24 @@ public struct CSV: IteratorProtocol, Sequence {
/// CSV header row. To set a value for this property, you set `true` to `hasHeaerRow` in initializer. /// CSV header row. To set a value for this property, you set `true` to `hasHeaerRow` in initializer.
public var headerRow: [String]? { return _headerRow } public var headerRow: [String]? { return _headerRow }
private var _headerRow: [String]? = nil private var _headerRow: [String]? = nil
private let whitespaces: CharacterSet
internal init<T: IteratorProtocol>( internal init<T: IteratorProtocol>(
iterator: T, iterator: T,
hasHeaderRow: Bool, hasHeaderRow: Bool,
trimFields: Bool,
delimiter: UnicodeScalar) delimiter: UnicodeScalar)
throws where T.Element == UnicodeScalar throws where T.Element == UnicodeScalar
{ {
self.iterator = AnyIterator(base: iterator) self.iterator = AnyIterator(base: iterator)
self.trimFields = trimFields
self.delimiter = delimiter self.delimiter = delimiter
var whitespaces = CharacterSet.whitespaces
whitespaces.remove(delimiter)
self.whitespaces = whitespaces
if hasHeaderRow { if hasHeaderRow {
guard let headerRow = next() else { guard let headerRow = next() else {
throw CSVError.cannotReadHeaderRow throw CSVError.cannotReadHeaderRow
@ -55,13 +65,14 @@ public struct CSV: IteratorProtocol, Sequence {
stream: InputStream, stream: InputStream,
codecType: T.Type, codecType: T.Type,
hasHeaderRow: Bool = defaultHasHeaderRow, hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter) delimiter: UnicodeScalar = defaultDelimiter)
throws throws
where T.CodeUnit == UInt8 where T.CodeUnit == UInt8
{ {
let reader = try BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true) let reader = try BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncodingType: codecType) let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter) try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
} }
/// Create an instance with `InputStream`. /// Create an instance with `InputStream`.
@ -76,13 +87,14 @@ public struct CSV: IteratorProtocol, Sequence {
codecType: T.Type, codecType: T.Type,
endian: Endian = .big, endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow, hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter) delimiter: UnicodeScalar = defaultDelimiter)
throws throws
where T.CodeUnit == UInt16 where T.CodeUnit == UInt16
{ {
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true) let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncodingType: codecType) let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter) try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
} }
/// Create an instance with `InputStream`. /// Create an instance with `InputStream`.
@ -97,13 +109,14 @@ public struct CSV: IteratorProtocol, Sequence {
codecType: T.Type, codecType: T.Type,
endian: Endian = .big, endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow, hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter) delimiter: UnicodeScalar = defaultDelimiter)
throws throws
where T.CodeUnit == UInt32 where T.CodeUnit == UInt32
{ {
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true) let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncodingType: codecType) let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter) try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
} }
// MARK: IteratorProtocol // MARK: IteratorProtocol
@ -160,6 +173,13 @@ public struct CSV: IteratorProtocol, Sequence {
var field: String var field: String
var end: Bool var end: Bool
while true { while true {
if trimFields {
// Trim the leading spaces
while next != nil && whitespaces.contains(next!) {
next = moveNext()
}
}
if next == nil { if next == nil {
(field, end) = ("", true) (field, end) = ("", true)
} }
@ -169,6 +189,11 @@ public struct CSV: IteratorProtocol, Sequence {
else { else {
back = next back = next
(field, end) = readField(quoted: false) (field, end) = readField(quoted: false)
if trimFields {
// Trim the trailing spaces
field = field.trimmingCharacters(in: whitespaces)
}
} }
row.append(field) row.append(field)
if end { if end {
@ -188,7 +213,15 @@ public struct CSV: IteratorProtocol, Sequence {
while let c = next { while let c = next {
if quoted { if quoted {
if c == DQUOTE { if c == DQUOTE {
let cNext = moveNext() var cNext = moveNext()
if trimFields {
// Trim the trailing spaces
while cNext != nil && whitespaces.contains(cNext!) {
cNext = moveNext()
}
}
if cNext == nil || cNext == CR || cNext == LF { if cNext == nil || cNext == CR || cNext == LF {
if cNext == CR { if cNext == CR {
let cNextNext = moveNext() let cNextNext = moveNext()
@ -208,7 +241,7 @@ public struct CSV: IteratorProtocol, Sequence {
field.append(String(DQUOTE)) field.append(String(DQUOTE))
} }
else { else {
// ERROR?? // ERROR?
field.append(String(c)) field.append(String(c))
} }
} }
@ -239,6 +272,7 @@ public struct CSV: IteratorProtocol, Sequence {
next = moveNext() next = moveNext()
} }
// END FILE
return (field, true) return (field, true)
} }

View File

@ -15,7 +15,7 @@
<key>CFBundlePackageType</key> <key>CFBundlePackageType</key>
<string>FMWK</string> <string>FMWK</string>
<key>CFBundleShortVersionString</key> <key>CFBundleShortVersionString</key>
<string>1.0.0</string> <string>1.1.0</string>
<key>CFBundleSignature</key> <key>CFBundleSignature</key>
<string>????</string> <string>????</string>
<key>CFBundleVersion</key> <key>CFBundleVersion</key>

View File

@ -163,7 +163,7 @@ class CSVTests: XCTestCase {
func testCSVState1() { func testCSVState1() {
let it = "あ,い1,\"\",えお\n,,x,".unicodeScalars.makeIterator() let it = "あ,い1,\"\",えお\n,,x,".unicodeScalars.makeIterator()
var csv = try! CSV(iterator: it, hasHeaderRow: defaultHasHeaderRow, delimiter: defaultDelimiter) var csv = try! CSV(iterator: it, hasHeaderRow: defaultHasHeaderRow, trimFields: defaultTrimFields, delimiter: defaultDelimiter)
var rows = [[String]]() var rows = [[String]]()
@ -175,4 +175,112 @@ class CSVTests: XCTestCase {
XCTAssertEqual(rows[1], ["", "", "x", ""]) XCTAssertEqual(rows[1], ["", "", "x", ""])
} }
func testTrimFields1() {
let csvString = "abc,def,ghi"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields2() {
let csvString = " abc, def, ghi"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields3() {
let csvString = "abc ,def ,ghi "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields4() {
let csvString = " abc , def , ghi "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields5() {
let csvString = "\"abc\",\"def\",\"ghi\""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields6() {
let csvString = " \"abc\", \"def\", \"ghi\""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields7() {
let csvString = "\"abc\" ,\"def\" ,\"ghi\" "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields8() {
let csvString = " \"abc\" , \"def\" , \"ghi\" "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields9() {
let csvString = "\" abc \",\" def \",\" ghi \""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, [" abc ", " def ", " ghi "])
}
}
func testTrimFields10() {
let csvString = "\tabc,\t\tdef\t,ghi\t"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields11() {
let csvString = " abc \n def "
var csv = try! CSV(string: csvString, trimFields: true)
let row1 = csv.next()!
XCTAssertEqual(row1, ["abc"])
let row2 = csv.next()!
XCTAssertEqual(row2, ["def"])
}
func testTrimFields12() {
let csvString = " \"abc \" \n \" def\" "
var csv = try! CSV(string: csvString, trimFields: true)
let row1 = csv.next()!
XCTAssertEqual(row1, ["abc "])
let row2 = csv.next()!
XCTAssertEqual(row2, [" def"])
}
func testTrimFields13() {
let csvString = " abc \t\tdef\t ghi "
let csv = try! CSV(string: csvString, trimFields: true, delimiter: UnicodeScalar("\t")!)
for row in csv {
XCTAssertEqual(row, ["abc", "", "def", "ghi"])
}
}
} }

View File

@ -46,7 +46,7 @@ class ReadmeTests: XCTestCase {
string: "id,name\n1,foo", string: "id,name\n1,foo",
hasHeaderRow: true) // It must be true. hasHeaderRow: true) // It must be true.
while csv.next() != nil { while let _ = csv.next() {
print("\(csv["id"]!)") // => "1" print("\(csv["id"]!)") // => "1"
print("\(csv["name"]!)") // => "foo" print("\(csv["name"]!)") // => "foo"
} }