Merge pull request #8 from yaslab/feature/trim-fields

Trim each field
This commit is contained in:
Yasuhiro Hatta 2016-10-17 00:37:26 +09:00 committed by GitHub
commit 078c5be1c6
7 changed files with 159 additions and 15 deletions

View File

@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'CSV.swift'
s.version = '1.0.0'
s.version = '1.1.0'
s.license = 'MIT'
s.summary = 'CSV reading library written in Swift.'
s.homepage = 'https://github.com/yaslab/CSV.swift'

View File

@ -58,7 +58,7 @@ var csv = try! CSV(
string: "id,name\n1,foo",
hasHeaderRow: true) // It must be true.
while csv.next() != nil {
while let _ = csv.next() {
print("\(csv["id"]!)") // => "1"
print("\(csv["name"]!)") // => "foo"
}
@ -83,13 +83,13 @@ let csv = try! CSV(
### CocoaPods
```ruby
pod 'CSV.swift', '~> 1.0'
pod 'CSV.swift', '~> 1.1'
```
### Carthage
```
github "yaslab/CSV.swift" ~> 1.0
github "yaslab/CSV.swift" ~> 1.1
```
### Swift Package Manager
@ -100,7 +100,7 @@ import PackageDescription
let package = Package(
name: "PackageName",
dependencies: [
.Package(url: "https://github.com/yaslab/CSV.swift", majorVersion: 1, minor: 0)
.Package(url: "https://github.com/yaslab/CSV.swift", majorVersion: 1, minor: 1)
]
)
```

View File

@ -13,10 +13,11 @@ extension CSV {
public init(
stream: InputStream,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(stream: stream, codecType: UTF8.self, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
}
}
@ -26,11 +27,12 @@ extension CSV {
public init(
string: String,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter)
throws
{
let iterator = string.unicodeScalars.makeIterator()
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
}
}

View File

@ -13,11 +13,13 @@ private let CR = UnicodeScalar("\r")!
private let DQUOTE = UnicodeScalar("\"")!
internal let defaultHasHeaderRow = false
internal let defaultTrimFields = false
internal let defaultDelimiter = UnicodeScalar(",")!
public struct CSV: IteratorProtocol, Sequence {
private var iterator: AnyIterator<UnicodeScalar>
private let trimFields: Bool
private let delimiter: UnicodeScalar
private var back: UnicodeScalar? = nil
@ -28,15 +30,23 @@ public struct CSV: IteratorProtocol, Sequence {
public var headerRow: [String]? { return _headerRow }
private var _headerRow: [String]? = nil
private let whitespaces: CharacterSet
internal init<T: IteratorProtocol>(
iterator: T,
hasHeaderRow: Bool,
trimFields: Bool,
delimiter: UnicodeScalar)
throws where T.Element == UnicodeScalar
{
self.iterator = AnyIterator(base: iterator)
self.trimFields = trimFields
self.delimiter = delimiter
var whitespaces = CharacterSet.whitespaces
whitespaces.remove(delimiter)
self.whitespaces = whitespaces
if hasHeaderRow {
guard let headerRow = next() else {
throw CSVError.cannotReadHeaderRow
@ -55,13 +65,14 @@ public struct CSV: IteratorProtocol, Sequence {
stream: InputStream,
codecType: T.Type,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter)
throws
where T.CodeUnit == UInt8
{
let reader = try BinaryReader(stream: stream, endian: .unknown, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt8Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
}
/// Create an instance with `InputStream`.
@ -76,13 +87,14 @@ public struct CSV: IteratorProtocol, Sequence {
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter)
throws
where T.CodeUnit == UInt16
{
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt16Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
}
/// Create an instance with `InputStream`.
@ -97,13 +109,14 @@ public struct CSV: IteratorProtocol, Sequence {
codecType: T.Type,
endian: Endian = .big,
hasHeaderRow: Bool = defaultHasHeaderRow,
trimFields: Bool = defaultTrimFields,
delimiter: UnicodeScalar = defaultDelimiter)
throws
where T.CodeUnit == UInt32
{
let reader = try BinaryReader(stream: stream, endian: endian, closeOnDeinit: true)
let iterator = UnicodeIterator(input: reader.makeUInt32Iterator(), inputEncodingType: codecType)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, delimiter: delimiter)
try self.init(iterator: iterator, hasHeaderRow: hasHeaderRow, trimFields: trimFields, delimiter: delimiter)
}
// MARK: IteratorProtocol
@ -160,6 +173,13 @@ public struct CSV: IteratorProtocol, Sequence {
var field: String
var end: Bool
while true {
if trimFields {
// Trim the leading spaces
while next != nil && whitespaces.contains(next!) {
next = moveNext()
}
}
if next == nil {
(field, end) = ("", true)
}
@ -169,6 +189,11 @@ public struct CSV: IteratorProtocol, Sequence {
else {
back = next
(field, end) = readField(quoted: false)
if trimFields {
// Trim the trailing spaces
field = field.trimmingCharacters(in: whitespaces)
}
}
row.append(field)
if end {
@ -188,7 +213,15 @@ public struct CSV: IteratorProtocol, Sequence {
while let c = next {
if quoted {
if c == DQUOTE {
let cNext = moveNext()
var cNext = moveNext()
if trimFields {
// Trim the trailing spaces
while cNext != nil && whitespaces.contains(cNext!) {
cNext = moveNext()
}
}
if cNext == nil || cNext == CR || cNext == LF {
if cNext == CR {
let cNextNext = moveNext()
@ -208,7 +241,7 @@ public struct CSV: IteratorProtocol, Sequence {
field.append(String(DQUOTE))
}
else {
// ERROR??
// ERROR?
field.append(String(c))
}
}
@ -239,6 +272,7 @@ public struct CSV: IteratorProtocol, Sequence {
next = moveNext()
}
// END FILE
return (field, true)
}

View File

@ -15,7 +15,7 @@
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>1.0.0</string>
<string>1.1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>

View File

@ -163,7 +163,7 @@ class CSVTests: XCTestCase {
func testCSVState1() {
let it = "あ,い1,\"\",えお\n,,x,".unicodeScalars.makeIterator()
var csv = try! CSV(iterator: it, hasHeaderRow: defaultHasHeaderRow, delimiter: defaultDelimiter)
var csv = try! CSV(iterator: it, hasHeaderRow: defaultHasHeaderRow, trimFields: defaultTrimFields, delimiter: defaultDelimiter)
var rows = [[String]]()
@ -175,4 +175,112 @@ class CSVTests: XCTestCase {
XCTAssertEqual(rows[1], ["", "", "x", ""])
}
func testTrimFields1() {
let csvString = "abc,def,ghi"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields2() {
let csvString = " abc, def, ghi"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields3() {
let csvString = "abc ,def ,ghi "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields4() {
let csvString = " abc , def , ghi "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields5() {
let csvString = "\"abc\",\"def\",\"ghi\""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields6() {
let csvString = " \"abc\", \"def\", \"ghi\""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields7() {
let csvString = "\"abc\" ,\"def\" ,\"ghi\" "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields8() {
let csvString = " \"abc\" , \"def\" , \"ghi\" "
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields9() {
let csvString = "\" abc \",\" def \",\" ghi \""
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, [" abc ", " def ", " ghi "])
}
}
func testTrimFields10() {
let csvString = "\tabc,\t\tdef\t,ghi\t"
let csv = try! CSV(string: csvString, trimFields: true)
for row in csv {
XCTAssertEqual(row, ["abc", "def", "ghi"])
}
}
func testTrimFields11() {
let csvString = " abc \n def "
var csv = try! CSV(string: csvString, trimFields: true)
let row1 = csv.next()!
XCTAssertEqual(row1, ["abc"])
let row2 = csv.next()!
XCTAssertEqual(row2, ["def"])
}
func testTrimFields12() {
let csvString = " \"abc \" \n \" def\" "
var csv = try! CSV(string: csvString, trimFields: true)
let row1 = csv.next()!
XCTAssertEqual(row1, ["abc "])
let row2 = csv.next()!
XCTAssertEqual(row2, [" def"])
}
func testTrimFields13() {
let csvString = " abc \t\tdef\t ghi "
let csv = try! CSV(string: csvString, trimFields: true, delimiter: UnicodeScalar("\t")!)
for row in csv {
XCTAssertEqual(row, ["abc", "", "def", "ghi"])
}
}
}

View File

@ -46,7 +46,7 @@ class ReadmeTests: XCTestCase {
string: "id,name\n1,foo",
hasHeaderRow: true) // It must be true.
while csv.next() != nil {
while let _ = csv.next() {
print("\(csv["id"]!)") // => "1"
print("\(csv["name"]!)") // => "foo"
}