|
请教一下,能否用VBA实现提出Word文件中出现频率较高的短语, 之前提出出现频率高的单词的方法已经有了,不知能否在此基础上改进一下,提出短语。(当然会后很多不是短语的组合,但其出现的频率也往往较低)
附上之前已发表的计算单词出现频率的程序(是从本版下载的,不知是哪位大侠编写的了)。
Sub ChineseCharCounting()
'统计汉字的字词频,并按降序排序
'中文词语的判断与Word的词典关联
Dim a As Byte
Dim n As Long
Dim TF As Boolean
Dim filetext As String
Dim d
Dim Wd As Range
Dim W As Range
Dim b
Dim e As Long
Dim c() As String
Dim i As Long
Dim temp As String
Dim st As Single
a = 1
st = Timer
Application.ScreenUpdating = False
n = ActiveDocument.Content.ComputeStatistics(wdStatisticFarEastCharacters)
If ActiveDocument.Content.text Like "*[【】〖〗《》〈〉〔〕]*" Then TF = True
With ActiveDocument.Content.Find
.text = "[【】〖〗《》〈〉〔〕]"
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
End With
Set d = CreateObject("Scripting.Dictionary")
If a = 1 Then
For Each Wd In ActiveDocument.Words
With Wd
If .Start < e Then .Start = e
e = .End
If .text Like "*[一-龥]*" And Len(.text) > 1 Then
If .text Like "*[!一-龥]*" = False And .Words.Count = 1 Then
d(.text) = d(.text) + 1
Else
For i = 1 To Len(.text)
If Mid(.text, i, 1) Like "[!一-龥]" Then Exit For
Next
With .Duplicate
.End = .Start + i - 1
For Each W In .Words
With W
If Len(.text) > 1 Then
If Right(.text, 1) Like "[!一-龥]" Then .End = .End - 1
If .text Like "*[!一-龥]*" = False Then d(.text) = d(.text) + 1
End If
End With
Next
End With
End If
End If
End With
Next
Else
filetext = ActiveDocument.Content.text
For i = 1 To Len(filetext)
temp = Mid(filetext, i, 1)
If temp Like "[一-龥]" Then d(temp) = d(temp) + 1
Next
End If
b = d.keys
ReDim c(UBound(b))
For i = 0 To UBound(b)
c(i) = b(i) & vbTab & d(b(i))
Next
If TF = True Then ActiveDocument.Undo 1
With Documents.Add.Content
.text = "文档共有" & n & "个中文字符。共提取到" & d.Count _
& IIf(a = 6, "个中文词语", "个不同的汉字") & ",其出现次数分别为:" & vbCrLf & Join(c, vbCrLf)
.Parent.DefaultTabStop = .Characters.First.Font.Size * 6
.MoveStart wdParagraph
.Sort , 2, wdSortFieldNumeric, wdSortOrderDescending, 1, , , , , , wdSortSeparateByTabs
End With
MsgBox "提取完毕。用时" & Format(Timer - st, "0") & "秒。"
Application.ScreenUpdating = True
End Sub
|
|