This is a useful little algorithm and I particularly like it for its simplicity and the fact that the heuristics used in the algorithm work well in most cases (one limitation of Soundex is that it falls short of covering words that sound the same but have a different first letter, e.g. "site" and "cite" produce different Soundex codes). Soundex is useful when writing search functionality where you want to account for misspellings in the users query. It's worth pointing out that SQL Server natively supports Soundex (see the Soundex function in T-SQL, for example).
My C# implementation is below - I opted to implement it in a static class that exposes one public method "For". The example source code is available on GitHub - https://github.com/rsingh85/SoundexExample
public static class Soundex
{
public static string For(string word)
{
const int MaxSoundexCodeLength = 4;
var soundexCode = new StringBuilder();
var previousWasHOrW = false;
word = Regex.Replace(
word == null ? string.Empty : word.ToUpper(),
@"[^\w\s]",
string.Empty);
if (string.IsNullOrEmpty(word))
return string.Empty.PadRight(MaxSoundexCodeLength, '0');
soundexCode.Append(word.First());
for (var i = 1; i < word.Length; i++)
{
var numberCharForCurrentLetter =
GetCharNumberForLetter(word[i]);
if (i == 1 &&
numberCharForCurrentLetter ==
GetCharNumberForLetter(soundexCode[0]))
continue;
if (soundexCode.Length > 2 && previousWasHOrW &&
numberCharForCurrentLetter ==
soundexCode[soundexCode.Length - 2])
continue;
if (soundexCode.Length > 0 &&
numberCharForCurrentLetter ==
soundexCode[soundexCode.Length - 1])
continue;
soundexCode.Append(numberCharForCurrentLetter);
previousWasHOrW = "HW".Contains(word[i]);
}
return soundexCode
.Replace("0", string.Empty)
.ToString()
.PadRight(MaxSoundexCodeLength, '0')
.Substring(0, MaxSoundexCodeLength);
}
private static char GetCharNumberForLetter(char letter)
{
if ("BFPV".Contains(letter)) return '1';
if ("CGJKQSXZ".Contains(letter)) return '2';
if ("DT".Contains(letter)) return '3';
if ('L' == letter) return '4';
if ("MN".Contains(letter)) return '5';
if ('R' == letter) return '6';
return '0';
}
}
Example:
{
public static string For(string word)
{
const int MaxSoundexCodeLength = 4;
var soundexCode = new StringBuilder();
var previousWasHOrW = false;
word = Regex.Replace(
word == null ? string.Empty : word.ToUpper(),
@"[^\w\s]",
string.Empty);
if (string.IsNullOrEmpty(word))
return string.Empty.PadRight(MaxSoundexCodeLength, '0');
soundexCode.Append(word.First());
for (var i = 1; i < word.Length; i++)
{
var numberCharForCurrentLetter =
GetCharNumberForLetter(word[i]);
if (i == 1 &&
numberCharForCurrentLetter ==
GetCharNumberForLetter(soundexCode[0]))
continue;
if (soundexCode.Length > 2 && previousWasHOrW &&
numberCharForCurrentLetter ==
soundexCode[soundexCode.Length - 2])
continue;
if (soundexCode.Length > 0 &&
numberCharForCurrentLetter ==
soundexCode[soundexCode.Length - 1])
continue;
soundexCode.Append(numberCharForCurrentLetter);
previousWasHOrW = "HW".Contains(word[i]);
}
return soundexCode
.Replace("0", string.Empty)
.ToString()
.PadRight(MaxSoundexCodeLength, '0')
.Substring(0, MaxSoundexCodeLength);
}
private static char GetCharNumberForLetter(char letter)
{
if ("BFPV".Contains(letter)) return '1';
if ("CGJKQSXZ".Contains(letter)) return '2';
if ("DT".Contains(letter)) return '3';
if ('L' == letter) return '4';
if ("MN".Contains(letter)) return '5';
if ('R' == letter) return '6';
return '0';
}
}
// Both lines below output R100
Console.WriteLine(Soundex.For("Ravi"));
Console.WriteLine(Soundex.For("Ravee"));
Console.WriteLine(Soundex.For("Ravi"));
Console.WriteLine(Soundex.For("Ravee"));
Examsvce give validate CIA VCE Exam Dumps Everyday to keep the Questions and Answers up-to-date. Click here for Details CIA VCE exams
ReplyDelete