Python: Read PDF
Revision as of 16:22, 28 October 2018 by Onnowpurbo (talk | contribs)
Install
pip install PyPDF2
Script Cek Jumlah Halaman
# importing all the required modules import PyPDF2
# creating an object file = open('example.pdf', 'rb')
# creating a pdf reader object fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file print(fileReader.numPages)
Script Print Satu Page
# extracting_text.py from PyPDF2 import PdfFileReader def text_extractor(path): with open(path, 'rb') as f: pdf = PdfFileReader(f) # get the first page page = pdf.getPage(1) print(page) print('Page type: {}'.format(str(type(page)))) text = page.extractText() print(text) if __name__ == '__main__': path = 'reportlab-sample.pdf' text_extractor(path)