|
这是从网上寻找的VB6调用OneNote2013 OCR识别图像的程序,但在2010中提示Function AddNodeInfo(ContentElement As MSXML2.IXMLDOMElement) As MSXML2.IXMLDOMElement
用户类型未定义,大神们能否修改为VBA可以用在2010版使用。
Private Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Function GetTextFromSinglePicture(inPicPath As String) As String
'图片的信息编码,并输出到xml文本中
Dim xmlDoc As New MSXML2.DOMDocument60
Dim xmlNode As MSXML2.IXMLDOMNode
Dim xmlEle As MSXML2.IXMLDOMElement
Dim picBase64 As imageBase64
'创建临时的笔记本
Dim onenoteFullName As String
With New Scripting.FileSystemObject
onenoteFullName = .GetSpecialFolder(TemporaryFolder) & "\" & .GetBaseName(.GetTempName) & ".one"
'判断函数值是否正常
If .FileExists(inPicPath) = False Then
GetTextFromPicture = "! Error File Path !"
Exit Function
End If
End With
Dim onenoteApp As New OneNote.Application
If onenoteApp Is Nothing Then
GetTextFromPicture = "! Error in Openning OneNote !"
GoTo clear_variable_before_exit
End If
Dim sectionID As String
Dim pageID As String
Set xmlEle = CreateNotePageContentElement(2, inPicPath)
Set xmlEle = AddNodeInfo(xmlEle)
'创建临时的笔记本,获取sectionID
onenoteApp.OpenHierarchy onenoteFullName, "", sectionID, cftSection
'创建新的页面,获取pageID
onenoteApp.CreateNewPage sectionID, pageID, npsBlankPageNoTitle
'获取页面的XML格式
Dim pageXmlText As String
onenoteApp.GetPageContent pageID, pageXmlText, , xs2013
'导入到XML中进行处理,将图片形式导入到XML中
If xmlDoc.LoadXML(pageXmlText) = False Then
GetTextFromPicture = "! Error in Loading Xml !"
GoTo clear_variable_before_exit
End If
With xmlDoc.getElementsByTagName("one:Page").Item(0)
.appendChild xmlEle
End With
'更新Page内容
onenoteApp.UpdatePageContent xmlDoc.DocumentElement.XML, , xs2013
'OneNote识别图片需要时间,以下开始轮询结果,1秒*10次
Sleep 1000
Dim iCNT As Integer
iCNT = 10
re_getPageContent:
onenoteApp.GetPageContent pageID, pageXmlText, , xs2013
xmlDoc.LoadXML pageXmlText
Set xmlEle = xmlDoc.DocumentElement.getElementsByTagName("one:OCRText").Item(0)
If xmlEle Is Nothing Then
If iCNT > 0 Then
Sleep 1000
iCNT = iCNT - 1
GoTo re_getPageContent
Else
GetTextFromPicture = "! Waiting OneNote Time Expired !"
End If
Else
GetTextFromPicture = xmlEle.Text
End If
clear_variable_before_exit:
If Not onenoteApp Is Nothing Then
If Len(pageID) > 0 Then
onenoteApp.DeleteHierarchy pageID, , True
End If
Set onenoteApp = Nothing
End If
Kill onenoteFullName
End Function
'OneNote识别的VBA主要函数
Type imageBase64
base64Text As String
imageWidth As Long
imageHeight As Long
End Type
Function CreateNotePageContentElement(contentType As Integer, paraContent As String) As MSXML2.IXMLDOMElement
Dim xmlEle As MSXML2.IXMLDOMElement
Dim xmlNode As MSXML2.IXMLDOMElement
Dim ns As String
ns = "one:"
With New MSXML2.DOMDocument60
Select Case contentType
Case 1 '文本
Set xmlNode = .createElement(ns & "T")
xmlNode.Text = paraContent
Case 2 '图片
Dim picBase64 As imageBase64
picBase64 = getBase64(paraContent)
'创建一个图片XML信息
Set xmlNode = .createElement(ns & "Image")
xmlNode.setAttribute "format", "jpg"
xmlNode.setAttribute "originalPageNumber", 0
Set xmlEle = .createElement(ns & "Position")
xmlEle.setAttribute "x", 0
xmlEle.setAttribute "y", 0
xmlEle.setAttribute "z", 0
xmlNode.appendChild xmlEle
Set xmlEle = .createElement(ns & "Size")
xmlEle.setAttribute "width", picBase64.imageWidth
xmlEle.setAttribute "height", picBase64.imageHeight
xmlNode.appendChild xmlEle
Set xmlEle = .createElement(ns & "Data")
xmlEle.Text = picBase64.base64Text
xmlNode.appendChild xmlEle
End Select
End With
Set CreateNotePageContentElement = xmlNode
End Function
Function AddNodeInfo(ContentElement As MSXML2.IXMLDOMElement) As MSXML2.IXMLDOMElement
Dim xmlEle As MSXML2.IXMLDOMElement
Dim xmlNode As MSXML2.IXMLDOMElement
Dim ns As String
ns = "one:"
Set xmlNode = ContentElement
With New MSXML2.DOMDocument60
Set xmlEle = .createElement(ns & "OE")
xmlEle.appendChild xmlNode
Set xmlNode = xmlEle
Set xmlEle = .createElement(ns & "OEChildren")
xmlEle.appendChild xmlNode
Set xmlNode = xmlEle
Set xmlEle = .createElement(ns & "Outline")
xmlEle.appendChild xmlNode
Set xmlNode = xmlEle
End With
Set AddNodeInfo = xmlNode
End Function
'XML处理的函数
Function getBase64(inBmpFile As String) As imageBase64
Dim xmlEle As MSXML2.IXMLDOMElement
With New MSXML2.DOMDocument60
Set xmlEle = .createElement("Base64Data")
End With
xmlEle.DataType = "bin.base64"
With New ADODB.Stream
.Type = adTypeBinary
.Open
.LoadFromFile inBmpFile
xmlEle.nodeTypedValue = .Read()
.Close
End With
getBase64.base64Text = xmlEle.Text
With CreateObject("WIA.ImageFile")
.loadfile inBmpFile
getBase64.imageHeight = .Height
getBase64.imageWidth = .Width
End With
End Function
'图片处理为Base64编码的函数
Sub OCR_Pictures_To_Text()
Dim vFNi As Variant
Dim sFNi As Variant
Dim sFNo As String
Dim oTS As TextStream
vFNi = Application.GetOpenFilename("*.jpg,*.jpg", , , , True)
If VarType(vFNi) = vbBoolean Then Exit Sub
sFNo = Application.GetSaveAsFilename(, "*.txt,*.txt")
If sFNo = "False" Then Exit Sub
Dim sTmp As String
With New Scripting.FileSystemObject
Set oTS = .CreateTextFile(sFNo)
End With
For Each sFNi In vFNi
sTmp = GetTextFromPicture(CStr(sFNi))
While InStr(1, sTmp, " ") > 0
sTmp = Replace(sTmp, " ", "")
Wend
oTS.Write sTmp
Next
oTS.Close
MsgBox "OK"
End Sub
'OCR主程序
|
|