Swift: what is the right way to split up a [String] resulting in a [[String]] with a given subarray size?
IosSwiftAlgorithmIos Problem Overview
Starting with a large [String] and a given subarray size, what is the best way I could go about splitting up this array into smaller arrays? (The last array will be smaller than the given subarray size).
Concrete example:
> Split up ["1","2","3","4","5","6","7"] with max split size 2 > > The code would produce [["1","2"],["3","4"],["5","6"],["7"]]
Obviously I could do this a little more manually, but I feel like in swift something like map() or reduce() may do what I want really beautifully.
Ios Solutions
Solution 1 - Ios
In Swift 3/4 this would look like the following:
let numbers = ["1","2","3","4","5","6","7"]
let chunkSize = 2
let chunks = stride(from: 0, to: numbers.count, by: chunkSize).map {
Array(numbers[$0..<min($0 + chunkSize, numbers.count)])
}
// prints as [["1", "2"], ["3", "4"], ["5", "6"], ["7"]]
As an extension to Array:
extension Array {
func chunked(by chunkSize: Int) -> [[Element]] {
return stride(from: 0, to: self.count, by: chunkSize).map {
Array(self[$0..<Swift.min($0 + chunkSize, self.count)])
}
}
}
Or the slightly more verbose, yet more general:
let numbers = ["1","2","3","4","5","6","7"]
let chunkSize = 2
let chunks: [[String]] = stride(from: 0, to: numbers.count, by: chunkSize).map {
let end = numbers.endIndex
let chunkEnd = numbers.index($0, offsetBy: chunkSize, limitedBy: end) ?? end
return Array(numbers[$0..<chunkEnd])
}
This is more general because I am making fewer assumptions about the type of the index into the collection. In the previous implementation I assumed that they could be could be compared and added.
Note that in Swift 3 the functionality of advancing indices has been transferred from the indices themselves to the collection.
Solution 2 - Ios
With Swift 5, according to your needs, you can choose one of the five following ways in order to solve your problem.
AnyIterator
in a Collection
extension method
1. Using AnyIterator
is a good candidate to iterate over the indices of an object that conforms to Collection
protocol in order to return subsequences of this object. In a Collection
protocol extension, you can declare a chunked(by:)
method with the following implementation:
extension Collection {
func chunked(by distance: Int) -> [[Element]] {
precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop
var index = startIndex
let iterator: AnyIterator<Array<Element>> = AnyIterator({
let newIndex = self.index(index, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex
defer { index = newIndex }
let range = index ..< newIndex
return index != self.endIndex ? Array(self[range]) : nil
})
return Array(iterator)
}
}
Usage:
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
stride(from:to:by:)
function in an Array
extension method
2. Using Array
indices are of type Int
and conform to Strideable
protocol. Therefore, you can use stride(from:to:by:)
and advanced(by:)
with them. In an Array
extension, you can declare a chunked(by:)
method with the following implementation:
extension Array {
func chunked(by distance: Int) -> [[Element]] {
let indicesSequence = stride(from: startIndex, to: endIndex, by: distance)
let array: [[Element]] = indicesSequence.map {
let newIndex = $0.advanced(by: distance) > endIndex ? endIndex : $0.advanced(by: distance)
//let newIndex = self.index($0, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex // also works
return Array(self[$0 ..< newIndex])
}
return array
}
}
Usage:
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
Array
extension method
3. Using a recursive approach in an Based on Nate Cook recursive code, you can declare a chunked(by:)
method in an Array
extension with the following implementation:
extension Array {
func chunked(by distance: Int) -> [[Element]] {
precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop
if self.count <= distance {
return [self]
} else {
let head = [Array(self[0 ..< distance])]
let tail = Array(self[distance ..< self.count])
return head + tail.chunked(by: distance)
}
}
}
Usage:
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
Collection
extension method
4. Using a for loop and batches in a Chris Eidhof and Florian Kugler show in Swift Talk #33 - Sequence & Iterator (Collections #2) video how to use a simple for loop to fill batches of sequence elements and append them on completion to an array. In a Sequence
extension, you can declare a chunked(by:)
method with the following implementation:
extension Collection {
func chunked(by distance: Int) -> [[Element]] {
var result: [[Element]] = []
var batch: [Element] = []
for element in self {
batch.append(element)
if batch.count == distance {
result.append(batch)
batch = []
}
}
if !batch.isEmpty {
result.append(batch)
}
return result
}
}
Usage:
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
struct
that conforms to Sequence
and IteratorProtocol
protocols
5. Using a custom If you don't want to create extensions of Sequence
, Collection
or Array
, you can create a custom struct
that conforms to Sequence
and IteratorProtocol
protocols. This struct
should have the following implementation:
struct BatchSequence<T>: Sequence, IteratorProtocol {
private let array: [T]
private let distance: Int
private var index = 0
init(array: [T], distance: Int) {
precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop
self.array = array
self.distance = distance
}
mutating func next() -> [T]? {
guard index < array.endIndex else { return nil }
let newIndex = index.advanced(by: distance) > array.endIndex ? array.endIndex : index.advanced(by: distance)
defer { index = newIndex }
return Array(array[index ..< newIndex])
}
}
Usage:
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let batchSequence = BatchSequence(array: array, distance: 2)
let newArray = Array(batchSequence)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
Solution 3 - Ios
I wouldn't call it beautiful, but here's a method using map
:
let numbers = ["1","2","3","4","5","6","7"]
let splitSize = 2
let chunks = numbers.startIndex.stride(to: numbers.count, by: splitSize).map {
numbers[$0 ..< $0.advancedBy(splitSize, limit: numbers.endIndex)]
}
The stride(to:by:)
method gives you the indices for the first element of each chunk, so you can map those indices to a slice of the source array using advancedBy(distance:limit:)
.
A more "functional" approach would simply be to recurse over the array, like so:
func chunkArray<T>(s: [T], splitSize: Int) -> [[T]] {
if countElements(s) <= splitSize {
return [s]
} else {
return [Array<T>(s[0..<splitSize])] + chunkArray(Array<T>(s[splitSize..<s.count]), splitSize)
}
}
Solution 4 - Ios
I like Nate Cook's answer, it looks like Swift has moved on since it was written, here's my take on this as an extension to Array:
extension Array {
func chunk(chunkSize : Int) -> Array<Array<Element>> {
return 0.stride(to: self.count, by: chunkSize)
.map { Array(self[$0..<$0.advancedBy(chunkSize, limit: self.count)]) }
}
}
Note, it returns [] for negative numbers and will result in a fatal error as written above. You'll have to put a guard in if you want to prevent that.
func testChunkByTwo() {
let input = [1,2,3,4,5,6,7]
let output = input.chunk(2)
let expectedOutput = [[1,2], [3,4], [5,6], [7]]
XCTAssertEqual(expectedOutput, output)
}
func testByOne() {
let input = [1,2,3,4,5,6,7]
let output = input.chunk(1)
let expectedOutput = [[1],[2],[3],[4],[5],[6],[7]]
XCTAssertEqual(expectedOutput, output)
}
func testNegative() {
let input = [1,2,3,4,5,6,7]
let output = input.chunk(-2)
let expectedOutput = []
XCTAssertEqual(expectedOutput, output)
}
Solution 5 - Ios
I don't think you'll want to use map or reduce. Map is for applying a function on each individual element in an array while reduce is for flattening an array. What you want to do is slice the array into subarrays of a certain size. This snippet uses slices.
var arr = ["1","2","3","4","5","6","7"]
var splitSize = 2
var newArr = [[String]]()
var i = 0
while i < arr.count {
var slice: Slice<String>!
if i + splitSize >= arr.count {
slice = arr[i..<arr.count]
}
else {
slice = arr[i..<i+splitSize]
}
newArr.append(Array(slice))
i += slice.count
}
println(newArr)
Solution 6 - Ios
Would be nice to express Tyler Cloutier's formulation as an extension on Array:
extension Array {
func chunked(by chunkSize:Int) -> [[Element]] {
let groups = stride(from: 0, to: self.count, by: chunkSize).map {
Array(self[$0..<[$0 + chunkSize, self.count].min()!])
}
return groups
}
}
This gives us a general way to partition an array into chunks.
Solution 7 - Ios
New in Swift 4, you can do this efficiently with reduce(into:)
. Here's an extension on Sequence:
extension Sequence {
func eachSlice(_ clump:Int) -> [[Self.Element]] {
return self.reduce(into:[]) { memo, cur in
if memo.count == 0 {
return memo.append([cur])
}
if memo.last!.count < clump {
memo.append(memo.removeLast() + [cur])
} else {
memo.append([cur])
}
}
}
}
Usage:
let result = [1,2,3,4,5,6,7,8,9].eachSlice(2)
// [[1, 2], [3, 4], [5, 6], [7, 8], [9]]
Solution 8 - Ios
The above is very cleaver, but it makes my head hurt. I had to revert back to a less swifty approach.
For Swift 2.0
var chunks = [[Int]]()
var temp = [Int]()
var splitSize = 3
var x = [1,2,3,4,5,6,7]
for (i, element) in x.enumerate() {
if temp.count < splitSize {
temp.append(element)
}
if temp.count == splitSize {
chunks.append(temp)
temp.removeAll()
}
}
if !temp.isEmpty {
chunks.append(temp)
}
Playground Result [[1, 2, 3], [4, 5, 6], [7]]
Solution 9 - Ios
I'll just throw my hat in the ring here with another implementation based on AnyGenerator.
extension Array {
func chunks(_ size: Int) -> AnyIterator<[Element]> {
if size == 0 {
return AnyIterator {
return nil
}
}
let indices = stride(from: startIndex, to: count, by: size)
var generator = indices.makeIterator()
return AnyIterator {
guard let i = generator.next() else {
return nil
}
var j = self.index(i, offsetBy: size)
repeat {
j = self.index(before: j)
} while j >= self.endIndex
return self[i...j].lazy.map { $0 }
}
}
}
I prefer this method since it relies exclusively on generators which can have a non-negligible, positive memory impact when dealing with large arrays.
For your specific example, here's how it would work:
let chunks = Array(["1","2","3","4","5","6","7"].chunks(2))
Result:
[["1", "2"], ["3", "4"], ["5", "6"], ["7"]]
Solution 10 - Ios
In Swift 4 or later you can also extend Collection
and return a collection of SubSequence
of it to be able to use it also with StringProtocol
types (String
or Substring
). This way it will return a collection of substrings instead of a collection of a bunch of characters:
Xcode 10.1 • Swift 4.2.1 or later
extension Collection {
func subSequences(limitedTo maxLength: Int) -> [SubSequence] {
precondition(maxLength > 0, "groups must be greater than zero")
var start = startIndex
var subSequences: [SubSequence] = []
while start < endIndex {
let end = index(start, offsetBy: maxLength, limitedBy: endIndex) ?? endIndex
defer { start = end }
subSequences.append(self[start..<end])
}
return subSequences
}
}
Or as suggested in comments by @Jessy using collection method
public func sequence<T, State>(state: State, next: @escaping (inout State) -> T?) -> UnfoldSequence<T, State>
extension Collection {
func subSequences(limitedTo maxLength: Int) -> [SubSequence] {
precondition(maxLength > 0, "groups must be greater than zero")
return .init(sequence(state: startIndex) { start in
guard start < self.endIndex else { return nil }
let end = self.index(start, offsetBy: maxLength, limitedBy: self.endIndex) ?? self.endIndex
defer { start = end }
return self[start..<end]
})
}
}
Usage
let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let slices = array.subSequences(limitedTo: 2) // [ArraySlice(["1", "2"]), ArraySlice(["3", "4"]), ArraySlice(["5", "6"]), ArraySlice(["7", "8"]), ArraySlice(["9"])]
for slice in slices {
print(slice) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
}
// To convert from ArraySlice<Element> to Array<element>
let arrays = slices.map(Array.init) // [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]
extension Collection {
var singles: [SubSequence] { return subSequences(limitedTo: 1) }
var pairs: [SubSequence] { return subSequences(limitedTo: 2) }
var triples: [SubSequence] { return subSequences(limitedTo: 3) }
var quads: [SubSequence] { return subSequences(limitedTo: 4) }
}
Array or ArraySlice of Characters
let chars = ["a","b","c","d","e","f","g","h","i"]
chars.singles // [["a"], ["b"], ["c"], ["d"], ["e"], ["f"], ["g"], ["h"], ["i"]]
chars.pairs // [["a", "b"], ["c", "d"], ["e", "f"], ["g", "h"], ["i"]]
chars.triples // [["a", "b", "c"], ["d", "e", "f"], ["g", "h", "i"]]
chars.quads // [["a", "b", "c", "d"], ["e", "f", "g", "h"], ["i"]]
chars.dropFirst(2).quads // [["c", "d", "e", "f"], ["g", "h", "i"]]
StringProtocol Elements (String and SubString)
let str = "abcdefghi"
str.singles // ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
str.pairs // ["ab", "cd", "ef", "gh", "i"]
str.triples // ["abc", "def", "ghi"]
str.quads // ["abcd", "efgh", "i"]
str.dropFirst(2).quads // ["cdef", "ghi"]
Solution 11 - Ios
Swift 5.1 - General solution for all kind of Collections:
extension Collection where Index == Int {
func chunked(by chunkSize: Int) -> [[Element]] {
stride(from: startIndex, to: endIndex, by: chunkSize).map { Array(self[$0..<Swift.min($0 + chunkSize, count)]) }
}
}
Solution 12 - Ios
Do you know that any solution with [a...b] swift style works 10 times slower then regular?
for y in 0..<rows {
var row = [Double]()
for x in 0..<cols {
row.append(stream[y * cols + x])
}
mat.append(row)
}
Try it and will see, here is my raw code for test:
let count = 1000000
let cols = 1000
let rows = count / cols
var stream = [Double].init(repeating: 0.5, count: count)
// Regular
var mat = [[Double]]()
let t1 = Date()
for y in 0..<rows {
var row = [Double]()
for x in 0..<cols {
row.append(stream[y * cols + x])
}
mat.append(row)
}
print("regular: \(Date().timeIntervalSince(t1))")
//Swift
let t2 = Date()
var mat2: [[Double]] = stride(from: 0, to: stream.count, by: cols).map {
let end = stream.endIndex
let chunkEnd = stream.index($0, offsetBy: cols, limitedBy: end) ?? end
return Array(stream[$0..<chunkEnd])
}
print("swift: \(Date().timeIntervalSince(t2))")
and out:
regular: 0.0449600219726562
swift: 0.49255496263504
Solution 13 - Ios
public extension Optional {
/// Wraps a value in an `Optional`, based on a condition.
/// - Parameters:
/// - wrapped: A non-optional value.
/// - getIsNil: The condition that will result in `nil`.
init(
_ wrapped: Wrapped,
nilWhen getIsNil: (Wrapped) throws -> Bool
) rethrows {
self = try getIsNil(wrapped) ? nil : wrapped
}
}
public extension Sequence {
/// Splits a `Sequence` into equal "chunks".
///
/// - Parameter maxArrayCount: The maximum number of elements in a chunk.
/// - Returns: `Array`s with `maxArrayCount` `counts`,
/// until the last chunk, which may be smaller.
subscript(maxArrayCount maxCount: Int) -> AnySequence<[Element]> {
.init(
sequence( state: makeIterator() ) { iterator in
Optional(
(0..<maxCount).compactMap { _ in iterator.next() },
nilWhen: \.isEmpty
)
}
)
}
}
// [ ["1", "2"], ["3", "4"], ["5", "6"], ["7"] ]"
(1...7).map(String.init)[maxArrayCount: 2]
public extension Collection {
/// Splits a `Collection` into equal "chunks".
///
/// - Parameter maxSubSequenceCount: The maximum number of elements in a chunk.
/// - Returns: `SubSequence`s with `maxSubSequenceLength` `counts`,
/// until the last chunk, which may be smaller.
subscript(maxSubSequenceCount maxCount: Int) -> AnySequence<SubSequence> {
.init(
sequence(state: startIndex) { startIndex in
guard startIndex < self.endIndex
else { return nil }
let endIndex =
self.index(startIndex, offsetBy: maxCount, limitedBy: self.endIndex)
?? self.endIndex
defer { startIndex = endIndex }
return self[startIndex..<endIndex]
}
)
}
}
// ["12", "34", "56", "7"]
(1...7).map(String.init).joined()[maxSubSequenceCount: 2]