Difference between revisions of "Python: PDF"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
| (One intermediate revision by the same user not shown) | |||
| Line 20: | Line 20: | ||
pip install textract | pip install textract | ||
| − | + | ||
# for read pdf | # for read pdf | ||
import textract | import textract | ||
text = textract.process('path/to/pdf/file', method='pdfminer') | text = textract.process('path/to/pdf/file', method='pdfminer') | ||
| − | |||
==Referensi== | ==Referensi== | ||
| Line 30: | Line 29: | ||
* http://pythonhosted.org/PyPDF2/ | * http://pythonhosted.org/PyPDF2/ | ||
* http://textract.readthedocs.io/en/stable/index.html | * http://textract.readthedocs.io/en/stable/index.html | ||
| + | * https://automatetheboringstuff.com/chapter13/ | ||
Latest revision as of 05:29, 25 October 2018
pyPDF2
#install pyPDF2
pip install PyPDF2
# importing all the required modules
import PyPDF2
# creating an object
file = open('example.pdf', 'rb')
# creating a pdf reader object
fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file
print(fileReader.numPages)
textract
pip install textract
# for read pdf
import textract
text = textract.process('path/to/pdf/file', method='pdfminer')