Difference between revisions of "Python: PDF"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with "==pyPDF2== #install pyPDF2 pip install PyPDF2 # importing all the required modules import PyPDF2 # creating an object file = open('example.pdf', 'rb') # creating a...") |
Onnowpurbo (talk | contribs) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 3: | Line 3: | ||
#install pyPDF2 | #install pyPDF2 | ||
pip install PyPDF2 | pip install PyPDF2 | ||
− | + | ||
# importing all the required modules | # importing all the required modules | ||
import PyPDF2 | import PyPDF2 | ||
− | + | ||
# creating an object | # creating an object | ||
file = open('example.pdf', 'rb') | file = open('example.pdf', 'rb') | ||
− | + | ||
# creating a pdf reader object | # creating a pdf reader object | ||
fileReader = PyPDF2.PdfFileReader(file) | fileReader = PyPDF2.PdfFileReader(file) | ||
− | + | ||
# print the number of pages in pdf file | # print the number of pages in pdf file | ||
print(fileReader.numPages) | print(fileReader.numPages) | ||
+ | |||
+ | ==textract== | ||
+ | |||
+ | |||
+ | pip install textract | ||
+ | |||
+ | # for read pdf | ||
+ | import textract | ||
+ | text = textract.process('path/to/pdf/file', method='pdfminer') | ||
==Referensi== | ==Referensi== | ||
* http://pythonhosted.org/PyPDF2/ | * http://pythonhosted.org/PyPDF2/ | ||
+ | * http://textract.readthedocs.io/en/stable/index.html | ||
+ | * https://automatetheboringstuff.com/chapter13/ |
Latest revision as of 05:29, 25 October 2018
pyPDF2
#install pyPDF2 pip install PyPDF2 # importing all the required modules import PyPDF2 # creating an object file = open('example.pdf', 'rb') # creating a pdf reader object fileReader = PyPDF2.PdfFileReader(file) # print the number of pages in pdf file print(fileReader.numPages)
textract
pip install textract # for read pdf import textract text = textract.process('path/to/pdf/file', method='pdfminer')