Levenshtein字符串编辑距离算法

Levenshtein(莱文斯坦)编辑距离算法实现


1.C++版本(含三个benchmark)

#include <iostream>
#include <string>
#include <vector>

using namespace std;

int minimum(int first, int second, int third)
{
    int former = 0;
    if (first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if (former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

int getLevenshteinDistance(string firstStr, string secondStr)
{
    int firstStrLen = firstStr.length();
    int secondStrLen = secondStr.length();
    vector<vector<int>> levenshteinDistanceTable;
    for (int firstIndex = 0; firstIndex <= firstStrLen; firstIndex++)
    {
        vector<int> levenshteinRow(secondStrLen+1);
        levenshteinDistanceTable.push_back(levenshteinRow);
        levenshteinDistanceTable[firstIndex][0] = firstIndex;
    }
    for (int secondIndex = 0; secondIndex <= secondStrLen; secondIndex++)
    {
        levenshteinDistanceTable[0][secondIndex] = secondIndex;
    }

    int lastCharCost = 0;
    for (int firstIndex = 1; firstIndex <= firstStrLen; firstIndex++)
    {
        for (int secondIndex = 1; secondIndex <= secondStrLen; secondIndex++)
        {
            if (firstStr[firstIndex-1] == secondStr[secondIndex-1])
            {
                lastCharCost = 0;
            }
            else
            {
                lastCharCost = 1;
            }
            levenshteinDistanceTable[firstIndex][secondIndex] = minimum(levenshteinDistanceTable[firstIndex - 1][secondIndex] + 1, levenshteinDistanceTable[firstIndex][secondIndex - 1] + 1, levenshteinDistanceTable[firstIndex - 1][secondIndex - 1] + lastCharCost);
        }
    }
    return levenshteinDistanceTable[firstStrLen][secondStrLen];
}

int main(int argc, char * * argv, char * * env)
{
    string firstStr1 = "sitting";
    string secondStr1 = "kitten";
    cout << "levenstein distance of " << firstStr1 << " and " << secondStr1 << " is:" << getLevenshteinDistance(firstStr1, secondStr1) << endl;
    string firstStr2 = "Saturday";
    string secondStr2 = "Sunday";
    cout << "levenstein distance of " << firstStr2 << " and " << secondStr2 << " is:" << getLevenshteinDistance(firstStr2, secondStr2) << endl;
    string firstStr3 = "levenshtein";
    string secondStr3 = "meilenstein";
    cout << "levenstein distance of " << firstStr3 << " and " << secondStr3 << " is:" << getLevenshteinDistance(firstStr3, secondStr3) << endl;
    char ch;
    cin >> ch;
    return 0;
}

2.awk版本1(表格法)

function minimum(first, second, third)
{
    if(first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if(former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

function getLevenshteinDistance(firstStr, secondStr)
{
    firstStrLen = length(firstStr);
    secondStrLen = length(secondStr);
    for(secondIndex = 0; secondIndex <= secondStrLen; secondIndex++)
    {
        levenshteinDistanceTable[0, secondIndex] = secondIndex;
    }
    for(firstIndex = 0; firstIndex <= firstStrLen; firstIndex++)
    {
        levenshteinDistanceTable[firstIndex, 0] = firstIndex;
    }
    for(firstIndex = 1; firstIndex <= firstStrLen; firstIndex++)
    {
        for(secondIndex = 1; secondIndex <= secondStrLen; secondIndex++)
        {
            if(match(substr(firstStr, firstIndex, 1), substr(secondStr, secondIndex, 1)) > 0)
            {
                lastCharCost = 0;
            }
            else
            {
                lastCharCost = 1;
            }
            levenshteinDistanceTable[firstIndex, secondIndex] = minimum(levenshteinDistanceTable[firstIndex-1, secondIndex] + 1, levenshteinDistanceTable[firstIndex, secondIndex-1] + 1, levenshteinDistanceTable[firstIndex-1, secondIndex-1] + lastCharCost);
        }
    }
    return levenshteinDistanceTable[firstStrLen, secondStrLen];
}

2.awk版本1(递归法,存在重叠子问题重复计算问题,性能较低)

function minimum(first, second, third)
{
    if(first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if(former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

function getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen)
{
    lastCost = 0;
    if(firstLen == 0)
    {
        return secondLen;
    }
    if(secondLen == 0)
    {
        return firstLen;
    }
    if(match(substr(firstStr, firstLen, 1), substr(secondStr, secondLen, 1)) > 0)
    {
        lastCost = 0;
    }
    else
    {
        lastCost = 1;
    }
    first = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen) + 1;
    second = getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen-1) + 1;
    third = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen-1) + lastCost;
    return minimum(first, second, third);
}
发布了93 篇原创文章 · 获赞 22 · 访问量 13万+

猜你喜欢

转载自blog.csdn.net/liushaofang/article/details/79317350
今日推荐