C# 文字列マッチングのための Jaro-Winkler 類似度アルゴリズム

入力文字列とオプションをより正確に照合するには、レーベンシュタイン距離や Jaro-Winkler 類似度などの文字列類似性アルゴリズムを使用できます。これらのアルゴリズムは、文字列間の編集距離、文字順序などの要素を考慮して、文字列の類似性をより正確に計算します。コードは次のとおりです。使用方法は次のとおりです。

  //参数1是待匹配的文本,参数2是样本数组.
  string bestMatch = JaroWinklerDistanceCalculate.Calculate(str, nameArry);
  Debug.Log(bestMatch);
using System;
using System.Collections;
using System.Collections.Generic;

public class JaroWinklerDistanceCalculate
{
    
    
    public static string Calculate(string compareStr, string[] compareStrs)
    {
    
    
        double bestSimilarity = 0;
        string bestMatch = "";

        foreach (string strItem in compareStrs)
        {
    
    
            double similarity = JaroWinklerDistance(compareStr, strItem);
            if (similarity > bestSimilarity)
            {
    
    
                bestSimilarity = similarity;
                bestMatch = strItem;
            }
        }
        return bestMatch;
    }
    static double JaroWinklerDistance(string s1, string s2)
    {
    
    
        double jaroDistance = JaroDistance(s1, s2);
        double prefixLength = GetCommonPrefixLength(s1, s2);
        double scalingFactor = 0.1;
        double jaroWinklerDistance = jaroDistance + prefixLength * scalingFactor * (1 - jaroDistance);
        return jaroWinklerDistance;
    }
    static double JaroDistance(string s1, string s2)
    {
    
    
        if (s1 == s2)
        {
    
    
            return 1.0;
        }
        int maxDistance = Math.Max(s1.Length, s2.Length) / 2 - 1;
        bool[] s1Matches = new bool[s1.Length];
        bool[] s2Matches = new bool[s2.Length];
        int matchingCharacters = 0;

        for (int i = 0; i < s1.Length; i++)
        {
    
    
            int start = Math.Max(0, i - maxDistance);
            int end = Math.Min(i + maxDistance + 1, s2.Length);
            for (int j = start; j < end; j++)
            {
    
    
                if (s2Matches[j])
                {
    
    
                    continue;
                }
                if (s1[i] != s2[j])
                {
    
    
                    continue;
                }
                s1Matches[i] = true;
                s2Matches[j] = true;
                matchingCharacters++;
                break;
            }
        }
        if (matchingCharacters == 0)
        {
    
    
            return 0.0;
        }
        int transpositions = 0;
        int k = 0;
        for (int i = 0; i < s1.Length; i++)
        {
    
    
            if (!s1Matches[i])
            {
    
    
                continue;
            }
            while (!s2Matches[k])
            {
    
    
                k++;
            }
            if (s1[i] != s2[k])
            {
    
    
                transpositions++;
            }
            k++;
        }
        double jaroDistance = (matchingCharacters / (double)s1.Length
                              + matchingCharacters / (double)s2.Length
                              + (matchingCharacters - transpositions / 2.0) / matchingCharacters) / 3.0;
        return jaroDistance;
    }

    static int GetCommonPrefixLength(string s1, string s2)
    {
    
    
        int commonPrefixLength = 0;
        int minLength = Math.Min(s1.Length, s2.Length);
        for (int i = 0; i < minLength; i++)
        {
    
    
            if (s1[i] != s2[i])
            {
    
    
                break;
            }
            commonPrefixLength++;
        }
        return commonPrefixLength;
    }
}

おすすめ

転載: blog.csdn.net/kuilaurence/article/details/131704766