|
各位专家:
小虫在网上找到一段可以计算两个单元格文字间Levenshtein edit distance距离的vba代码,但是该代码只能一次处理两个单元格,我如果想批量处理两列文本的相似度,不知道该如何修改该代码?
谢谢!
Public result As Double
Public Function Levenshtein_Distance(ByVal str1 As String, ByVal str2 As String) As Double
Dim d As Long
If Len(str1) > Len(str2) Then
d = Len(str1)
Else
d = Len(str2)
End If
result = 1 - (LD(str1, str2) / d)
End Function
Private Function Minimum(ByVal a As Integer, ByVal b As Integer, ByVal c As Integer) As Integer
Dim min As Integer
min = a
If b < min Then
min = b
End If
If c < min Then
min = c
End If
Minimum = min
End Function
Public Function LD(ByVal s As String, ByVal t As String) As Integer
Dim dis() As Integer
Dim m As Integer
Dim n As Integer
Dim i As Integer
Dim j As Integer
Dim s_i As String
Dim t_j As String
Dim cost As Integer
n = Len(s)
m = Len(t)
If n = 0 Then
LD = m
Exit Function
End If
If m = 0 Then
LD = n
Exit Function
End If
ReDim dis(0 To n, 0 To m)
For i = 0 To n
dis(i, 0) = i
Next i
For j = 0 To m
dis(0, j) = j
Next j
For i = 1 To n
s_i = Mid$(s, i, 1)
For j = 1 To m
t_j = Mid$(t, j, 1)
If s_i = t_j Then
cost = 0
Else
cost = 1
End If
dis(i, j) = Minimum(dis(i - 1, j) + 1, dis(i, j - 1) + 1, dis(i - 1, j - 1) + cost)
Next j
Next i
LD = dis(n, m)
Erase dis
End Function
Sub cc()
str1 = Sheets("sheet1").Range("A1").Value
str2 = Sheets("sheet1").Range("B1").Value
Levenshtein_Distance str1, str2
MsgBox result
[C1] = result
End Sub
|
|