|
楼主 |
发表于 2018-12-3 18:24
|
显示全部楼层
本帖最后由 Moneky 于 2018-12-3 18:43 编辑
借助剪贴板和正则达成
原理:在word中选中表格→复制→读取剪贴板HTML格式→提取table代码→清洗冗余代码
- Declare Function OpenClipboard Lib "user32" (ByVal hwnd As Long) As Long
- Declare Function GetClipboardData Lib "user32" (ByVal wFormat As Long) As Long
- Declare Function CloseClipboard Lib "user32" () As Long
- Declare Function RegisterClipboardFormat Lib "user32" Alias "RegisterClipboardFormatA" (ByVal lpString As String) As Long
- Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (Destination As Any, Source As Any, ByVal Length As Long)
- Declare Function GlobalLock Lib "kernel32" (ByVal hMem As Long) As Long
- Declare Function GlobalUnlock Lib "kernel32" (ByVal hMem As Long) As Long
- Declare Function GlobalSize Lib "kernel32" (ByVal hMem As Long) As Long
- Private Function GetTable2Html(bXXHtml As Boolean) As String
- Dim hMem As Long
- Dim lpData As Long
- Dim nClipSize As Long
- Dim bytClipData() As Byte
- Dim sClipString As String
- CF_HTML = RegisterClipboardFormat("HTML Format") '注册HTML Format
- If OpenClipboard(ByVal 0&) Then '如果OpenClipboard函数返回非0值,说明成功打开剪贴板
- hMem = GetClipboardData(CF_HTML) '获取剪贴板中以文本格式存在的内存对象的句柄
- If CBool(hMem) Then '如果剪贴板中对应的格式不存在,此时的hMem会是0(Null),这里用CBool把它转换成Boolean类型加以判断
- lpData = GlobalLock(hMem) '获取内存对象第一个字节的内存地址
- nClipSize = GlobalSize(hMem) '获取内存对象的字节长度
- ReDim bytClipData(1 To nClipSize) '修改缓冲字节数组的长度,确保能够容纳内存对象的全部数据
- CopyMemory bytClipData(1), ByVal lpData, nClipSize '复制内存对象的数据到字节数组中,注意Byval的用法
- sClipString = StrConv(bytClipData, vbUnicode) '将字节转化成字符串
- GetTable2Html = IIf(bXXHtml, xxHTML(sClipString), sClipString)
- Else
- GetTable2Html = "None"
- End If
- CloseClipboard
- End If
- End Function
- Private Function xxHTML(sHtml As String) As String '正则清洗
- Dim oReg As New RegExp, tmp As String
- With oReg
- .Pattern = "<table[\w\W]*/table>"
- .Global = True
- .MultiLine = True
- tmp = .Execute(sHtml)(0).Value
- .Pattern = "<span.*?>|</span>|<o:p>|</o:p>|style='[\s\S]*?'|class=[a-zA-Z]*"
- tmp = .Replace(tmp, "")
- xxHTML = tmp
- End With
- End Function
- Private Function Table2Html(vDoc As Document, id As Long, bXHtml As Boolean) As String
- ' document 表格id 是否清洗
- Dim vTable As Table
- Set vTable = vDoc.Tables(id)
- vTable.Select
- Selection.Copy
- Table2Html = GetTable2Html(bXHtml)
- End Function
- Sub test()
- Dim sHtml As String
- sHtml = Table2Html(ActiveDocument, 1, True)
- MsgBox sHtml
- Debug.Print sHtml
- End Sub
复制代码 |
|