|
[广告] Excel易用宝 - 提升Excel的操作效率 · Excel / WPS表格插件 ★ 免费下载 ★ ★ 使用帮助★
''首先一定安装 PDF Pro专业版,然后引用
'读取PDF-字符文本,
''以下code是读取银行回单PDF的code一部分,code可以实现读取银行回单所有的信息,code我做了删减;
'下面代码,可以读取PDF每一页的字符内容,存放在EXCEL里;
''图片的,加密的PDF无法处理
sTime = Timer
Dim AC_PD As New Acrobat.AcroPDDoc
Dim AC_Hi As Acrobat.AcroHiliteList
Dim AC_PG As Acrobat.AcroPDPage
Dim AC_PGTxt As Acrobat.AcroPDTextSelect
Dim WS_PDF As Worksheet
Dim RW_Ct As Long
Dim Col_Num As Integer
Dim Li_Row As Long
Dim Yes_Fir As Boolean
Dim Ct_Page As Long
Dim i As Long, j As Long, k As Long, ROW_DEL As Long
Dim T_Str As String
Dim T_Stryy As String
Dim Hld_Txt As Variant
lb = Sheet3.Range("B62222").End(xlUp).Row '行数
lb = Application.WorksheetFunction.Max(lb, 2)
Application.ScreenUpdating = False
Set AC_PD = New Acrobat.AcroPDDoc 'PDF文件
Set AC_Hi = New Acrobat.AcroHiliteList 'PDF文本字符
AC_Hi.Add 0, 32767 '限制文本字符个数
With AC_PD
watermarkfile = Sheet1.Range("A" & 2) '需要处理的文件名,注意这里,加地址,例如 <D:\BANK\bank.pdf>
.Open watermarkfile '打开PDF文件
Ct_Page = .GetNumPages '得到PDF文件总页数
If Ct_Page = -1 Then 'pdf文件页数不对,说明文件没有找到或者文件名错误
MsgBox "请确认PDF文件 '" & PDF_File & "'"
.Close
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End If
ROW_DEL = Sheet2.Range("E62222").End(xlUp).Row
ROW_DEL = Application.WorksheetFunction.Max(ROW_DEL, 2)
Sheet2.Range("E2:F" & ROW_DEL).Clear '清除读取区域的旧数据
For i = 1 To Ct_Page '从PDF第一页 到最后一页
T_Str = ""
Set AC_PG = .AcquirePage(i - 1) '得到当前页
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi) '得到当期页的文字列表
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
III = .GetNumText - 1'得到当前页文本最后一行
For j = 0 To III
T_Str = T_Str & .GetText(j) '合并获得的每一行文本
Next j
End With
End If
T_Stryy = T_Str 'T_Str 后面还有各种处理的,这里我删除了
T_Stryy = Application.WorksheetFunction.Clean(Application.WorksheetFunction.Trim(T_Stryy)) '去掉文本前后空格和杂质
T_Stryy = Replace(T_Stryy, " ", "") '去掉文本中间空格
Sheet2.Range("F" & i + 1).Value = i ' 对应的PDF的页码
Sheet2.Range("G" & i + 1).Value = T_Stryy '得到PDF每一页的文字内容没有任何格式的文本,和页码对应
Next i
.Close '关闭文件
End With
Application.ScreenUpdating = True
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
MsgBox "数据计算完毕!用时" & Round(Timer - sTime, 2) & "秒。" & Round((Timer - sTime) / 60, 4) & "分钟。"
|
|