|
4楼程序基本是按楼主1楼截图所编,确实有错漏。这样的文本处理复杂,主要是多重要求,文档格式需要归纳总结,分析逻辑结构,以我目前的分析,认为要求与文本层次结构似有冲突。可试试以下代码,是以另一种思路编写。按楼主附件文档简单测试基本通过。楼主可在此基础上修改,或解释其合理的编辑思路,处理要点,看看能否作进一步修改。
num是模块级数组变量,可在多个相关过程时方便调用,只是需要初始化,原主程序没有对其初始化。
- Private num(0 To 99) As String
- Sub test2()
- Dim i%, j%, k%, m%, n%, n1%, n2%, r%
- Dim data$, code$, info$(), info1$(), info2$(), data1&(), data2$()
- Dim RegExp As Object
- Dim Match As Object
- Dim RegExp2 As Object
- Dim Match2 As Object
-
- Call NumToCnNum
- data = IIf(Selection.Type = wdSelectionIP, ActiveDocument.Content.Text, Selection.Text)
- code = "斤两钱分厘枚粒片" '数量单位据此先后排序
- Set RegExp = CreateObject("VBScript.RegExp")
- Set RegExp2 = CreateObject("VBScript.RegExp")
- With RegExp
- .Global = True
- .Pattern = "([用方]:|;)([^。;:]+[十九八七六五四三二一半][斤两钱分厘枚粒片])(。|,[^。;:]+?。)"
- If .test(data) = False Then
- MsgBox "找不到匹配,程序退出!", vbCritical
- Exit Sub
- End If
-
- For Each Match In .Execute(data) '获取匹配方剂各药材配伍文本相关定位参数
- ReDim Preserve data1(5, m)
- With Match
- data1(0, m) = .firstindex
- data1(1, m) = .Length
- data1(2, m) = data1(0, m) + data1(1, m)
- data1(3, m) = .firstindex + Len(.submatches(0))
- data1(4, m) = Len(.submatches(2))
- data1(5, m) = data1(2, m) - data1(4, m) + 1
- End With
- m = m + 1
- Next
-
- With IIf(Selection.Type = wdSelectionIP, ActiveDocument.Content, Selection.Range)
- For i = 0 To UBound(data1, 2)
- If Selection.Type = wdSelectionIP Then
- .SetRange data1(0, i), data1(2, i)
- Else
- .SetRange Selection.Start + data1(0, i), Selection.Start + data1(2, i)
- End If
- .HighlightColorIndex = wdYellow '在原文档突出显示匹配文本
- Next
- End With
-
- For n = UBound(data1, 2) To 0 Step -1 '按倒序处理方剂药材配伍文本
- With RegExp2
- .Global = True
- For i = 1 To Len(code)
- .Pattern = "([^、:。]+?([十九八七六五四三二一半]+)" & Mid(code, i, 1) & ")[、。]"
- ReDim info(Len(code))
- If .test(Mid(data, data1(3, n), data1(1, n))) = True Then
- For Each Match2 In .Execute(Mid(data, data1(3, n), data1(1, n)))
- info(i - 1) = info(i - 1) & Match2.Value
- ReDim Preserve info1(3, n1)
- info1(0, n1) = Match2.submatches(0)
- info1(1, n1) = Match2.submatches(1)
- info1(2, n1) = Match2.firstindex
- info1(3, n1) = Match2.firstindex + Match2.Length
- n1 = n1 + 1
- Next
-
- For k = 99 To 0 Step -1 '对提取的药材以剂量大小进行排序
- For j = 0 To UBound(info1, 2)
- If info1(1, j) = num(k) Then
- ReDim Preserve info2(n2)
- info2(n2) = info1(0, j)
- n2 = n2 + 1
- End If
- Next
- Next
- ReDim Preserve data2(r)
- data2(r) = data2(r) & Join(info2, "、") '对匹配文本的中药材重新排序后的文本
- r = r + 1
-
- Erase info1
- Erase info2
- n1 = 0
- n2 = 0
- End If
- Next
- End With
-
- For i = 1 To Len(code) '处理判断为相同数量及单位的药材
- If Join(data2, "") <> Empty Then
- For j = 0 To UBound(data2)
- RegExp2.Pattern = "([^、。]+?)各?([十九八七六五四三二一半]+" & Mid(code, i, 1) & ")[、。]([^、。]+?)\2([、。])"
- If InStr(data2(j), Mid(code, i, 1)) Then
- Do While RegExp2.test(data2(j)) = True
- data2(j) = RegExp2.Replace(data2(j), "$1,$3各$2$4") '此处与原处理要求有所不同
- Loop
- End If
- Next
- End If
- Next
- If Join(data2, "") <> Empty Then
- data = Left(data, data1(3, n)) & "△" & Join(data2, "、") & Mid(data, data1(5, n), data1(4, n)) & "▲" & Mid(data, data1(2, n) + 1)
- End If
-
- Erase info
- Erase data2
- r = 0
- Next
-
- With Documents.Add(ActiveDocument.FullName).Content.Find
- .Parent.Text = data '将处理结果输出到以原文档为模板的新文档
- .Replacement.Highlight = True
- .Execute "△*▲", MatchWildcards:=True, Replace:=wdReplaceAll
- End With
- End With
- MsgBox "共处理了" & m & "条,处理条目见原文档、结果文本见新文档突出显示部分。"
- End Sub
- Sub NumToCnNum()
- '生成百位内汉字小写数字
- Const a As String = "一二三四五六七八九"
- Dim i%
-
- For i = 1 To 99
- Select Case i
- Case Is >= 20
- If i Mod 10 > 0 Then
- num(i) = Mid(a, Int(i / 10), 1) & "十" & Mid(a, i Mod 10, 1)
- Else
- num(i) = Mid(a, Int(i / 10), 1) & "十"
- End If
- Case Is > 10
- num(i) = "十" & Mid(a, Int(i Mod 10), 1)
- Case Is < 10
- num(i) = Mid(a, i, 1)
- Case Else
- num(i) = "十"
- End Select
- Next
- num(0) = "半"
- End Sub
复制代码 |
|