Code convert từ PDF sang excel sử dụng phần mềm ABBYY FineReader 12

Liên hệ QC

hoangson8768

Thành viên mới
Tham gia
20/6/10
Bài viết
48
Được thích
1
Em có đoạn code convert từ file PDF sang excel sử dụng phần mềm Acrobat nhưng do Acrobat không đáp ứng được yêu cầu của em và em muốn sử dụng phần mềm ABBYY FineReader 12 rất hiệu quả. Nhưng em không biết sửa trên code VBA như thế nào các thầy có thể giúp em sửa code này để sử dụng phần mềm ABBYY Finereader 12 với ạ.-=.,,

Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)

'This procedure get the PDF data into excel by following way

'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter


Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area

Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page

Li_Row = Rows.Count

Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String

Dim Hld_Txt As Variant 'get PDF total text into array

RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value

Application.ScreenUpdating = False

Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList

'set maximum selection area of PDF page

AC_Hi.Add 0, 32767

With AC_PD

'open PDF file

.Open PDF_File

'get the number of pages of PDF file

Ct_Page = .GetNumPages

'if get pages is failed exit sub

If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If

'add sheet only one time if Data retrive in one sheet

If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If

'looping through sheets

For i = 1 To Ct_Page

T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)

'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)

'if text selected successfully get the all the text into T_Str string

If Not AC_PGTxt Is Nothing Then

With AC_PGTxt

For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j

End With

End If


If Each_Sheet = True Then

'add each sheet for each page

Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))

End If

'transfer PDF data into sheet

With WS_PDF

If Each_Sheet = True Then

.Name = "Page-" & i

'get the PDF data into each sheet for each PDF page

'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else

'information if text not retrive from PDF page

.Cells(1, 1).Value = "No text found in page " & i
End If

Else

'get the pdf data into single sheet

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

Yes_Fir = True

For k = 0 To UBound(Hld_Txt)

RW_Ct = RW_Ct + 1

'check begining of page if yes enter PDF page number for any idenfication

If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If

'check for maximum rows if exceeds start from next column

If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If

T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str

Next k

Else

RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
RW_Ct = RW_Ct + 1

End If

End If

End With
Next i

.Close

End With

Application.ScreenUpdating = True

MsgBox "Imported"

h_end:

Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing

End Sub
 
Em có đoạn code convert từ file PDF sang excel sử dụng phần mềm Acrobat nhưng do Acrobat không đáp ứng được yêu cầu của em và em muốn sử dụng phần mềm ABBYY FineReader 12 rất hiệu quả. Nhưng em không biết sửa trên code VBA như thế nào các thầy có thể giúp em sửa code này để sử dụng phần mềm ABBYY Finereader 12 với ạ.-=.,,

...........
Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area

Rất tiếc, theo tôi giacatdu thì không thể được, vì các dòng code tôi trích nên lại đó, thì code trên là viết hướng tới cho acrobat - bạn hãy cài phần mềm đó.
 
Upvote 0
Em có đoạn code convert từ file PDF sang excel sử dụng phần mềm Acrobat nhưng do Acrobat không đáp ứng được yêu cầu của em và em muốn sử dụng phần mềm ABBYY FineReader 12 rất hiệu quả. Nhưng em không biết sửa trên code VBA như thế nào các thầy có thể giúp em sửa code này để sử dụng phần mềm ABBYY Finereader 12 với ạ.-=.,,

Sub Imp_Into_XL(PDF_File As String, Each_Sheet As Boolean)

'This procedure get the PDF data into excel by following way

'1.Open PDF file
'2.Looping through pages
'3.get the each PDF page data into individual _
sheets or single sheet as defined in Each_Sheet Parameter


Dim AC_PD As Acrobat.AcroPDDoc 'access pdf file
Dim AC_Hi As Acrobat.AcroHiliteList 'set selection word count
Dim AC_PG As Acrobat.AcroPDPage 'get the particular page
Dim AC_PGTxt As Acrobat.AcroPDTextSelect 'get the text of selection area

Dim WS_PDF As Worksheet
Dim RW_Ct As Long 'row count
Dim Col_Num As Integer 'column count
Dim Li_Row As Long 'Maximum rows limit for one column
Dim Yes_Fir As Boolean 'to identify beginning of page

Li_Row = Rows.Count

Dim Ct_Page As Long 'count pages in pdf file
Dim i As Long, j As Long, k As Long 'looping variables
Dim T_Str As String

Dim Hld_Txt As Variant 'get PDF total text into array

RW_Ct = 0 'set the intial value
Col_Num = 1 'set the intial value

Application.ScreenUpdating = False

Set AC_PD = New Acrobat.AcroPDDoc
Set AC_Hi = New Acrobat.AcroHiliteList

'set maximum selection area of PDF page

AC_Hi.Add 0, 32767

With AC_PD

'open PDF file

.Open PDF_File

'get the number of pages of PDF file

Ct_Page = .GetNumPages

'if get pages is failed exit sub

If Ct_Page = -1 Then
MsgBox "Pages Cannot determine in PDF file '" & PDF_File & "'"
.Close
GoTo h_end
End If

'add sheet only one time if Data retrive in one sheet

If Each_Sheet = False Then
Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))
WS_PDF.Name = "PDF2Text"
End If

'looping through sheets

For i = 1 To Ct_Page

T_Str = ""
'get the page
Set AC_PG = .AcquirePage(i - 1)

'get the full page selection
Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)

'if text selected successfully get the all the text into T_Str string

If Not AC_PGTxt Is Nothing Then

With AC_PGTxt

For j = 0 To .GetNumText - 1
T_Str = T_Str & .GetText(j)
Next j

End With

End If


If Each_Sheet = True Then

'add each sheet for each page

Set WS_PDF = Worksheets.Add(, Worksheets(Sheets.Count))

End If

'transfer PDF data into sheet

With WS_PDF

If Each_Sheet = True Then

.Name = "Page-" & i

'get the PDF data into each sheet for each PDF page

'if text accessed successfully then split T_Str by VbCrLf
'and get into array Hld_Txt and looping through array and fill sheet with PDF data

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

For k = 0 To UBound(Hld_Txt)
T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(k + 1, 1).Value = T_Str
Next k
Else

'information if text not retrive from PDF page

.Cells(1, 1).Value = "No text found in page " & i
End If

Else

'get the pdf data into single sheet

If T_Str <> "" Then
Hld_Txt = Split(T_Str, vbCrLf)

Yes_Fir = True

For k = 0 To UBound(Hld_Txt)

RW_Ct = RW_Ct + 1

'check begining of page if yes enter PDF page number for any idenfication

If Yes_Fir Then
RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "Text In Page - " & i
RW_Ct = RW_Ct + 2
Yes_Fir = False
End If

'check for maximum rows if exceeds start from next column

If RW_Ct > Li_Row Then
RW_Ct = 1
Col_Num = Col_Num + 1
End If

T_Str = CStr(Hld_Txt(k))
If Left(T_Str, 1) = "=" Then T_Str = "'" & T_Str
.Cells(RW_Ct, Col_Num).Value = T_Str

Next k

Else

RW_Ct = RW_Ct + 1
.Cells(RW_Ct, Col_Num).Value = "No text found in page " & i
RW_Ct = RW_Ct + 1

End If

End If

End With
Next i

.Close

End With

Application.ScreenUpdating = True

MsgBox "Imported"

h_end:

Set WS_PDF = Nothing
Set AC_PGTxt = Nothing
Set AC_PG = Nothing
Set AC_Hi = Nothing
Set AC_PD = Nothing

End Sub
Có bác nào có code sử dụng ABBYY để chuyển đổi sang chưa ạ?
 
Upvote 0
Web KT

Bài viết mới nhất

Back
Top Bottom