Number of occurrences of substring in string in Swift
RegexSwiftRegex Problem Overview
My main string is "hello Swift Swift and Swift" and substring is Swift. I need to get the number of times the substring "Swift" occurs in the mentioned string.
This code can determine whether the pattern exists.
var string = "hello Swift Swift and Swift"
if string.rangeOfString("Swift") != nil {
println("exists")
}
Now I need to know the number of occurrence.
Regex Solutions
Solution 1 - Regex
A simple approach would be to split on "Swift"
, and subtract 1 from the number of parts:
let s = "hello Swift Swift and Swift"
let tok = s.components(separatedBy:"Swift")
print(tok.count-1)
This code prints 3.
Edit: Before Swift 3 syntax the code looked like this:
let tok = s.componentsSeparatedByString("Swift")
Solution 2 - Regex
Should you want to count characters rather than substrings:
extension String {
func count(of needle: Character) -> Int {
return reduce(0) {
$1 == needle ? $0 + 1 : $0
}
}
}
Solution 3 - Regex
Optimising dwsolbergs solution to count faster. Also faster than componentsSeparatedByString
.
extension String {
/// stringToFind must be at least 1 character.
func countInstances(of stringToFind: String) -> Int {
assert(!stringToFind.isEmpty)
var count = 0
var searchRange: Range<String.Index>?
while let foundRange = range(of: stringToFind, options: [], range: searchRange) {
count += 1
searchRange = Range(uncheckedBounds: (lower: foundRange.upperBound, upper: endIndex))
}
return count
}
}
Usage:
// return 2
"aaaa".countInstances(of: "aa")
- If you want to ignore accents, you may replace
options: []
withoptions: .diacriticInsensitive
like dwsolbergs did. - If you want to ignore case, you may replace
options: []
withoptions: .caseInsensitive
like ConfusionTowers suggested. - If you want to ignore both accents and case, you may replace
options: []
withoptions: [.caseInsensitive, .diacriticInsensitive]
like ConfusionTowers suggested. - If, on the other hand, you want the fastest comparison possible and you can guarantee some canonical form for composed character sequences, then you may consider option
.literal
and it will only perform exact matchs.
Solution 4 - Regex
I'd recommend an extension to string in Swift 3 such as:
extension String {
func countInstances(of stringToFind: String) -> Int {
var stringToSearch = self
var count = 0
while let foundRange = stringToSearch.range(of: stringToFind, options: .diacriticInsensitive) {
stringToSearch = stringToSearch.replacingCharacters(in: foundRange, with: "")
count += 1
}
return count
}
}
It's a loop that finds and removes each instance of the stringToFind, incrementing the count on each go-round. Once the searchString no longer contains any stringToFind, the loop breaks and the count returns.
Note that I'm using .diacriticInsensitive so it ignore accents (for example résume and resume would both be found). You might want to add or change the options depending on the types of strings you want to find.
Solution 5 - Regex
Swift 5 Extension
extension String {
func numberOfOccurrencesOf(string: String) -> Int {
return self.components(separatedBy:string).count - 1
}
}
Example use
let string = "hello Swift Swift and Swift"
let numberOfOccurrences = string.numberOfOccurrencesOf(string: "Swift")
// numberOfOccurrences = 3
Solution 6 - Regex
I needed a way to count substrings that may contain the start of the next matched substring. Leveraging dwsolbergs extension and Strings range(of:options:range:locale:) method I came up with this String extension
extension String
{
/**
Counts the occurrences of a given substring by calling Strings `range(of:options:range:locale:)` method multiple times.
- Parameter substring : The string to search for, optional for convenience
- Parameter allowOverlap : Bool flag indicating whether the matched substrings may overlap. Count of "🐼🐼" in "🐼🐼🐼🐼" is 2 if allowOverlap is **false**, and 3 if it is **true**
- Parameter options : String compare-options to use while counting
- Parameter range : An optional range to limit the search, default is **nil**, meaning search whole string
- Parameter locale : Locale to use while counting
- Returns : The number of occurrences of the substring in this String
*/
public func count(
occurrencesOf substring: String?,
allowOverlap: Bool = false,
options: String.CompareOptions = [],
range searchRange: Range<String.Index>? = nil,
locale: Locale? = nil) -> Int
{
guard let substring = substring, !substring.isEmpty else { return 0 }
var count = 0
let searchRange = searchRange ?? startIndex..<endIndex
var searchStartIndex = searchRange.lowerBound
let searchEndIndex = searchRange.upperBound
while let rangeFound = range(of: substring, options: options, range: searchStartIndex..<searchEndIndex, locale: locale)
{
count += 1
if allowOverlap
{
searchStartIndex = index(rangeFound.lowerBound, offsetBy: 1)
}
else
{
searchStartIndex = rangeFound.upperBound
}
}
return count
}
}
Solution 7 - Regex
why not just use some length maths??
extension String {
func occurences(of search:String) -> Int {
guard search.count > 0 else {
preconditionFailure()
}
let shrunk = self.replacingOccurrences(of: search, with: "")
return (self.count - shrunk.count)/search.count
}
}
Solution 8 - Regex
Try this
var mainString = "hello Swift Swift and Swift"
var count = 0
mainString.enumerateSubstrings(in: mainString.startIndex..<mainString.endIndex, options: .byWords) { (subString, subStringRange, enclosingRange, stop) in
if case let s? = subString{
if s.caseInsensitiveCompare("swift") == .orderedSame{
count += 1
}
}
}
print(count)
Solution 9 - Regex
For the sake of completeness – and because there is a regex
tag – this is a solution with Regular Expression
let string = "hello Swift Swift and Swift"
let regex = try! NSRegularExpression(pattern: "swift", options: .caseInsensitive)
let numberOfOccurrences = regex.numberOfMatches(in: string, range: NSRange(string.startIndex..., in: string))
The option .caseInsensitive
is optional.
Solution 10 - Regex
My solution, maybe it will be better to use String.Index
instead of Int
range but I think in such way it is a bit easier to read.
extension String {
func count(of char: Character, range: (Int, Int)? = nil) -> Int {
let range = range ?? (0, self.count)
return self.enumerated().reduce(0) {
guard ($1.0 >= range.0) && ($1.0 < range.1) else { return $0 }
return ($1.1 == char) ? $0 + 1 : $0
}
}
}
Solution 11 - Regex
Solution which uses a higher order functions
func subStringCount(str: String, substr: String) -> Int {
{ $0.isEmpty ? 0 : $0.count - 1 } ( str.components(separatedBy: substr))
}
Unit Tests
import XCTest
class HigherOrderFunctions: XCTestCase {
func testSubstringWhichIsPresentInString() {
XCTAssertEqual(subStringCount(str: "hello Swift Swift and Swift", substr: "Swift"), 3)
}
func testSubstringWhichIsNotPresentInString() {
XCTAssertEqual(subStringCount(str: "hello", substr: "Swift"), 0)
}
}