hoangson8768
Thành viên mới
- Tham gia
- 20/6/10
- Bài viết
- 48
- Được thích
- 1
Em có đoạn code convert từ file PDF sang excel sử dụng phần mềm Acrobat nhưng do Acrobat không đáp ứng được yêu cầu của em và em muốn sử dụng phần mềm ABBYY FineReader 12 rất hiệu quả. Nhưng em không biết sửa trên code VBA như thế nào các thầy có thể giúp em sửa code này để sử dụng phần mềm ABBYY Finereader 12 với ạ.
Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)
'This procedure get the PDF data into excel by following way
'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter
Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area
Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page
Li_Row = Rows.Count
Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String
Dim Hld_Txt As Variant 'get PDF total text into array
RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value
Application.ScreenUpdating = False
Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList
'set maximum selection area of PDF page
AC_Hi.Add 0, 32767
With AC_PD
'open PDF file
.Open PDF_File
'get the number of pages of PDF file
Ct_Page = .GetNumPages
'if get pages is failed exit sub
If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If
'add sheet only one time if Data retrive in one sheet
If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If
'looping through sheets
For i = 1 To Ct_Page
T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)
'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
'if text selected successfully get the all the text into T_Str string
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j
End With
End If
If Each_Sheet = True Then
'add each sheet for each page
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
End If
'transfer PDF data into sheet
With WS_PDF
If Each_Sheet = True Then
.Name = "Page-" & i
'get the PDF data into each sheet for each PDF page
'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else
'information if text not retrive from PDF page
.Cells(1, 1).Value = "No text found in page " & i
End If
Else
'get the pdf data into single sheet
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
Yes_Fir = True
For k = 0 To UBound(Hld_Txt)
RW_Ct = RW_Ct + 1
'check begining of page if yes enter PDF page number for any idenfication
If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If
'check for maximum rows if exceeds start from next column
If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str
Next k
Else
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
RW_Ct = RW_Ct + 1
End If
End If
End With
Next i
.Close
End With
Application.ScreenUpdating = True
MsgBox "Imported"
h_end:
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End Sub
Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)
'This procedure get the PDF data into excel by following way
'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter
Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area
Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page
Li_Row = Rows.Count
Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String
Dim Hld_Txt As Variant 'get PDF total text into array
RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value
Application.ScreenUpdating = False
Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList
'set maximum selection area of PDF page
AC_Hi.Add 0, 32767
With AC_PD
'open PDF file
.Open PDF_File
'get the number of pages of PDF file
Ct_Page = .GetNumPages
'if get pages is failed exit sub
If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If
'add sheet only one time if Data retrive in one sheet
If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If
'looping through sheets
For i = 1 To Ct_Page
T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)
'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
'if text selected successfully get the all the text into T_Str string
If Not AC_PGTxt Is Nothing Then
With AC_PGTxt
For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j
End With
End If
If Each_Sheet = True Then
'add each sheet for each page
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
End If
'transfer PDF data into sheet
With WS_PDF
If Each_Sheet = True Then
.Name = "Page-" & i
'get the PDF data into each sheet for each PDF page
'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else
'information if text not retrive from PDF page
.Cells(1, 1).Value = "No text found in page " & i
End If
Else
'get the pdf data into single sheet
If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)
Yes_Fir = True
For k = 0 To UBound(Hld_Txt)
RW_Ct = RW_Ct + 1
'check begining of page if yes enter PDF page number for any idenfication
If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If
'check for maximum rows if exceeds start from next column
If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str
Next k
Else
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
RW_Ct = RW_Ct + 1
End If
End If
End With
Next i
.Close
End With
Application.ScreenUpdating = True
MsgBox "Imported"
h_end:
Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing
End Sub