How to count of sub-string occurrences?
C#StringC# Problem Overview
Suppose I have a string like:
MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
then I want to know how many time of occurrences of sub-string "OU=" in this string. With single char, maybe there is something like:
int count = MyString.Split("OU=").Length - 1;
but Split
only works for char
, not string
.
Also how to find the position of n occurrences? For example, the position of 2nd "OU="
in the string?
How to resolve this issue?
C# Solutions
Solution 1 - C#
Regex.Matches(input, "OU=").Count
Solution 2 - C#
You can find all the occurrences and their positions with IndexOf
:
string MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
string stringToFind = "OU=";
List<int> positions = new List<int>();
int pos = 0;
while ((pos < MyString.Length) && (pos = MyString.IndexOf(stringToFind, pos)) != -1)
{
positions.Add(pos);
pos += stringToFind.Length();
}
Console.WriteLine("{0} occurrences", positions.Count);
foreach (var p in positions)
{
Console.WriteLine(p);
}
You can get the same result from a regular expression:
var matches = Regex.Matches(MyString, "OU=");
Console.WriteLine("{0} occurrences", matches.Count);
foreach (var m in matches)
{
Console.WriteLine(m.Index);
}
The primary differences:
- The Regex code is shorter
- The Regex code allocates a collection and multiple strings.
- The IndexOf code could be written to output the position immediately, without creating a collection.
- It's likely that the Regex code will be faster in isolation, but if used many times the combined overhead of the string allocations could cause a much higher load on the garbage collector.
If I were writing this in-line, as something that didn't get used often, I'd probably go with the regex solution. If I were to put it into a library as something to be used a lot, I'd probably go with the IndexOf solution.
Solution 3 - C#
(Clippy-mode:ON)
You look like you're parsing an LDAP query!
Would you like to parse it:
- manually? Goto "SplittingAndParsing"
- Automagically via Win32 calls? Goto "Using Win32 via PInvoke"
(Clippy-mode:OFF)
"SplittingAndParsing":
var MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
var chunksAsKvps = MyString
.Split(',')
.Select(chunk =>
{
var bits = chunk.Split('=');
return new KeyValuePair<string,string>(bits[0], bits[1]);
});
var allOUs = chunksAsKvps
.Where(kvp => kvp.Key.Equals("OU", StringComparison.OrdinalIgnoreCase));
"Using Win32 via PInvoke":
Usage:
var parsedDn = Win32LDAP.ParseDN(str);
var allOUs2 = parsedDn
.Where(dn => dn.Key.Equals("OU", StringComparison.OrdinalIgnoreCase));
Utility Code:
// I don't remember where I got this from, honestly...I *think* it came
// from another SO user long ago, but those details I've lost to history...
public class Win32LDAP
{
#region Constants
public const int ERROR_SUCCESS = 0;
public const int ERROR_BUFFER_OVERFLOW = 111;
#endregion Constants
#region DN Parsing
[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsGetRdnW(
ref IntPtr ppDN,
ref int pcDN,
out IntPtr ppKey,
out int pcKey,
out IntPtr ppVal,
out int pcVal
);
public static KeyValuePair<string, string> GetName(string distinguishedName)
{
IntPtr pDistinguishedName = Marshal.StringToHGlobalUni(distinguishedName);
try
{
IntPtr pDN = pDistinguishedName, pKey, pVal;
int cDN = distinguishedName.Length, cKey, cVal;
int lastError = DsGetRdnW(ref pDN, ref cDN, out pKey, out cKey, out pVal, out cVal);
if(lastError == ERROR_SUCCESS)
{
string key, value;
if(cKey < 1)
{
key = string.Empty;
}
else
{
key = Marshal.PtrToStringUni(pKey, cKey);
}
if(cVal < 1)
{
value = string.Empty;
}
else
{
value = Marshal.PtrToStringUni(pVal, cVal);
}
return new KeyValuePair<string, string>(key, value);
}
else
{
throw new Win32Exception(lastError);
}
}
finally
{
Marshal.FreeHGlobal(pDistinguishedName);
}
}
public static IEnumerable<KeyValuePair<string, string>> ParseDN(string distinguishedName)
{
List<KeyValuePair<string, string>> components = new List<KeyValuePair<string, string>>();
IntPtr pDistinguishedName = Marshal.StringToHGlobalUni(distinguishedName);
try
{
IntPtr pDN = pDistinguishedName, pKey, pVal;
int cDN = distinguishedName.Length, cKey, cVal;
do
{
int lastError = DsGetRdnW(ref pDN, ref cDN, out pKey, out cKey, out pVal, out cVal);
if(lastError == ERROR_SUCCESS)
{
string key, value;
if(cKey < 0)
{
key = null;
}
else if(cKey == 0)
{
key = string.Empty;
}
else
{
key = Marshal.PtrToStringUni(pKey, cKey);
}
if(cVal < 0)
{
value = null;
}
else if(cVal == 0)
{
value = string.Empty;
}
else
{
value = Marshal.PtrToStringUni(pVal, cVal);
}
components.Add(new KeyValuePair<string, string>(key, value));
pDN = (IntPtr)(pDN.ToInt64() + UnicodeEncoding.CharSize); //skip over comma
cDN--;
}
else
{
throw new Win32Exception(lastError);
}
} while(cDN > 0);
return components;
}
finally
{
Marshal.FreeHGlobal(pDistinguishedName);
}
}
[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsQuoteRdnValueW(
int cUnquotedRdnValueLength,
string psUnquotedRdnValue,
ref int pcQuotedRdnValueLength,
IntPtr psQuotedRdnValue
);
public static string QuoteRDN(string rdn)
{
if (rdn == null) return null;
int initialLength = rdn.Length;
int quotedLength = 0;
IntPtr pQuotedRDN = IntPtr.Zero;
int lastError = DsQuoteRdnValueW(initialLength, rdn, ref quotedLength, pQuotedRDN);
switch (lastError)
{
case ERROR_SUCCESS:
{
return string.Empty;
}
case ERROR_BUFFER_OVERFLOW:
{
break; //continue
}
default:
{
throw new Win32Exception(lastError);
}
}
pQuotedRDN = Marshal.AllocHGlobal(quotedLength * UnicodeEncoding.CharSize);
try
{
lastError = DsQuoteRdnValueW(initialLength, rdn, ref quotedLength, pQuotedRDN);
switch(lastError)
{
case ERROR_SUCCESS:
{
return Marshal.PtrToStringUni(pQuotedRDN, quotedLength);
}
default:
{
throw new Win32Exception(lastError);
}
}
}
finally
{
if(pQuotedRDN != IntPtr.Zero)
{
Marshal.FreeHGlobal(pQuotedRDN);
}
}
}
[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsUnquoteRdnValueW(
int cQuotedRdnValueLength,
string psQuotedRdnValue,
ref int pcUnquotedRdnValueLength,
IntPtr psUnquotedRdnValue
);
public static string UnquoteRDN(string rdn)
{
if (rdn == null) return null;
int initialLength = rdn.Length;
int unquotedLength = 0;
IntPtr pUnquotedRDN = IntPtr.Zero;
int lastError = DsUnquoteRdnValueW(initialLength, rdn, ref unquotedLength, pUnquotedRDN);
switch (lastError)
{
case ERROR_SUCCESS:
{
return string.Empty;
}
case ERROR_BUFFER_OVERFLOW:
{
break; //continue
}
default:
{
throw new Win32Exception(lastError);
}
}
pUnquotedRDN = Marshal.AllocHGlobal(unquotedLength * UnicodeEncoding.CharSize);
try
{
lastError = DsUnquoteRdnValueW(initialLength, rdn, ref unquotedLength, pUnquotedRDN);
switch(lastError)
{
case ERROR_SUCCESS:
{
return Marshal.PtrToStringUni(pUnquotedRDN, unquotedLength);
}
default:
{
throw new Win32Exception(lastError);
}
}
}
finally
{
if(pUnquotedRDN != IntPtr.Zero)
{
Marshal.FreeHGlobal(pUnquotedRDN);
}
}
}
#endregion DN Parsing
}
public class DNComponent
{
public string Type { get; protected set; }
public string EscapedValue { get; protected set; }
public string UnescapedValue { get; protected set; }
public string WholeComponent { get; protected set; }
public DNComponent(string component, bool isEscaped)
{
string[] tokens = component.Split(new char[] { '=' }, 2);
setup(tokens[0], tokens[1], isEscaped);
}
public DNComponent(string key, string value, bool isEscaped)
{
setup(key, value, isEscaped);
}
private void setup(string key, string value, bool isEscaped)
{
Type = key;
if(isEscaped)
{
EscapedValue = value;
UnescapedValue = Win32LDAP.UnquoteRDN(value);
}
else
{
EscapedValue = Win32LDAP.QuoteRDN(value);
UnescapedValue = value;
}
WholeComponent = Type + "=" + EscapedValue;
}
public override bool Equals(object obj)
{
if (obj is DNComponent)
{
DNComponent dnObj = (DNComponent)obj;
return dnObj.WholeComponent.Equals(this.WholeComponent, StringComparison.CurrentCultureIgnoreCase);
}
return base.Equals(obj);
}
public override int GetHashCode()
{
return WholeComponent.GetHashCode();
}
}
public class DistinguishedName
{
public DNComponent[] Components
{
get
{
return components.ToArray();
}
}
private List<DNComponent> components;
private string cachedDN;
public DistinguishedName(string distinguishedName)
{
cachedDN = distinguishedName;
components = new List<DNComponent>();
foreach (KeyValuePair<string, string> kvp in Win32LDAP.ParseDN(distinguishedName))
{
components.Add(new DNComponent(kvp.Key, kvp.Value, true));
}
}
public DistinguishedName(IEnumerable<DNComponent> dnComponents)
{
components = new List<DNComponent>(dnComponents);
cachedDN = GetWholePath(",");
}
public bool Contains(DNComponent dnComponent)
{
return components.Contains(dnComponent);
}
public string GetDNSDomainName()
{
List<string> dcs = new List<string>();
foreach (DNComponent dnc in components)
{
if(dnc.Type.Equals("DC", StringComparison.CurrentCultureIgnoreCase))
{
dcs.Add(dnc.UnescapedValue);
}
}
return string.Join(".", dcs.ToArray());
}
public string GetDomainDN()
{
List<string> dcs = new List<string>();
foreach (DNComponent dnc in components)
{
if(dnc.Type.Equals("DC", StringComparison.CurrentCultureIgnoreCase))
{
dcs.Add(dnc.WholeComponent);
}
}
return string.Join(",", dcs.ToArray());
}
public string GetWholePath()
{
return GetWholePath(",");
}
public string GetWholePath(string separator)
{
List<string> parts = new List<string>();
foreach (DNComponent component in components)
{
parts.Add(component.WholeComponent);
}
return string.Join(separator, parts.ToArray());
}
public DistinguishedName GetParent()
{
if(components.Count == 1)
{
return null;
}
List<DNComponent> tempList = new List<DNComponent>(components);
tempList.RemoveAt(0);
return new DistinguishedName(tempList);
}
public override bool Equals(object obj)
{
if(obj is DistinguishedName)
{
DistinguishedName objDN = (DistinguishedName)obj;
if (this.Components.Length == objDN.Components.Length)
{
for (int i = 0; i < this.Components.Length; i++)
{
if (!this.Components[i].Equals(objDN.Components[i]))
{
return false;
}
}
return true;
}
return false;
}
return base.Equals(obj);
}
public override int GetHashCode()
{
return cachedDN.GetHashCode();
}
}
Solution 4 - C#
this extension needs less resources than regualr expressions.
public static int CountSubstring(this string text, string value)
{
int count = 0, minIndex = text.IndexOf(value, 0);
while (minIndex != -1)
{
minIndex = text.IndexOf(value, minIndex + value.Length);
count++;
}
return count;
}
usage:
MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
int count = MyString.CountSubstring("OU=");
Solution 5 - C#
below should work
MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
int count = Regex.Matches(MyString, "OU=").Count
Solution 6 - C#
int count = myString.Split(new []{','})
.Count(item => item.StartsWith(
"OU=", StringComparison.OrdinalIgnoreCase))
Solution 7 - C#
Here are two examples of how you can get the results that you are looking for
var MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
This one you would see a list of the values separated but it would have DC just an idea to show that the split with String does work`
var split = MyString.Split(new string[] { "OU=", "," }, StringSplitOptions.RemoveEmptyEntries);
This one will Split and return you only the 3 items into a List
var lstSplit = MyString.Split(new[] { ',' })
.Where(splitItem => splitItem.StartsWith(
"OU=", StringComparison.OrdinalIgnoreCase)).ToList();
Solution 8 - C#
public static int CountOccurences(string needle, string haystack)
{
return (haystack.Length - haystack.Replace(needle, "").Length) / needle.Length;
}
Benchmarked it against other answers here (the regex one and the "IndexOf" one), works faster.