|
本帖最后由 bluesky_0 于 2022-11-25 08:55 编辑
Private Sub CommandButton3_Click()
'读取PDF文件内容,不支持图片.仅支持源文件是文档文件转化为PDF的
sTime = Timer
'PDF_File As String
Dim AC_PD As New Acrobat.AcroPDDoc
Dim AC_Hi As Acrobat.AcroHiliteList
Dim AC_PG As Acrobat.AcroPDPage
Dim AC_PGTxt As Acrobat.AcroPDTextSelect
Dim WS_PDF As Worksheet
Dim RW_Ct As Long
Dim Col_Num As Integer
Dim Li_Row As Long
Dim Yes_Fir As Boolean
Dim Ct_Page As Long
Dim i As Long, j As Long, k As Long, ROW_DEL As Long
Dim T_Str As String
Dim Hld_Txt As Variant
'Application.ScreenUpdating = False
Set AC_PD = New Acrobat.AcroPDDoc 'PDF文件
Set AC_Hi = New Acrobat.AcroHiliteList 'PDF文本字符
AC_Hi.Add 0, 32767 '限制文本字符个数
With AC_PD
watermarkfile = ThisWorkbook.Path & "\" & Sheet1.Range("A" & 2) '需要操作的文件名
.Open watermarkfile '打开PDF文件
Ct_Page = .GetNumPages '得到PDF文件页数
If Ct_Page = -1 Then 'pdf文件页数不对
MsgBox "请确认PDF文件 '" & PDF_File & "'"
.Close
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End If
ROW_DEL = Sheet2.Range("E62222").End(xlUp).Row
ROW_DEL = Application.WorksheetFunction.Max(ROW_DEL, 2)
Sheet2.Range("E2:G" & ROW_DEL).Clear '清除读取区域的旧数据
'MsgBox ROW_DEL
For i = 1 To Ct_Page '从PDF第一页 到最后一页
T_Str = ""
Set AC_PG = .AcquirePage(i - 1) '得到当前页
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi) '得到当期文字列表
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
III = .GetNumText
For j = .GetNumText - 8 To .GetNumText - 1
T_Str = T_Str & .GetText(j) '获得文本
Next j
End With
End If
T_Str = Right(T_Str, 13)
Sheet2.Range("E" & i + 1).Value = T_Str
Sheet2.Range("F" & i + 1).Value = i
Next i
.Close
End With
'Sheet2.Range("G" & 2).Value = "=B2=E2"
'Sheet2.Range("G2:G" & i).FillDown
Application.ScreenUpdating = True
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
MsgBox "数据计算完毕!用时" & Round(Timer - sTime, 2) & "秒。" & Round((Timer - sTime) / 60, 4) & "分钟。"
End Sub
xlsm文件附件:
|
评分
-
1
查看全部评分
-
|