|
Public Function GetPDFtext(filePath As String, fileName As String)
'读取PDF-解析内容,得到页码
'filePath PD完整地址,例如:D:\OK, fileName 文件名称,例如: a.pdf
Dim AC_PD As New Acrobat.acroPDDoc
Dim AC_Hi As Acrobat.AcroHiliteList
Dim AC_PG As Acrobat.AcroPDPage
Dim Ct_Page As Long
Dim i As Long, j As Long
Dim T_Str As String
Set AC_PD = New Acrobat.acroPDDoc 'PDF文件
Set AC_Hi = New Acrobat.AcroHiliteList 'PDF文本字符
AC_Hi.Add 0, 32767 '限制文本字符个数
With AC_PD
markfile = filePath & "\" & fileName '需要的文件名
.Open markfile '打开PDF文件
Ct_Page = .GetNumPages '得到PDF文件页数
If Ct_Page = -1 Then 'pdf文件页数不对
.Close
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
Exit Function
End If
Ct_Page = .GetNumPages '得到PDF文件页数
For i = 1 To Ct_Page 'PDF第一页开始...
T_Str = ""
Set AC_PG = .AcquirePage(i - 1) '得到当前页
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi) '得到当期文字列表?
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j) '获得文本
Next j
End With
End If
ActiveSheet.Range("D" & i + NUM) = i
ActiveSheet.Range("E" & i + NUM) = T_Str
ActiveSheet.Range("F" & i + NUM) = Application.WorksheetFunction.Clean(WorksheetFunction.Trim(T_Str))
Next i
.Close
End With
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End Function |
评分
-
1
查看全部评分
-
|