Difference between revisions of "Python: Read PDF"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 1: | Line 1: | ||
+ | Install | ||
+ | pip install PyPDF2 | ||
− | + | ==Script Cek Jumlah Halaman== | |
− | |||
− | |||
# importing all the required modules | # importing all the required modules | ||
Line 17: | Line 17: | ||
print(fileReader.numPages) | print(fileReader.numPages) | ||
+ | |||
+ | ==Script Print Satu Page== | ||
+ | |||
+ | # extracting_text.py | ||
+ | |||
+ | from PyPDF2 import PdfFileReader | ||
+ | |||
+ | |||
+ | def text_extractor(path): | ||
+ | with open(path, 'rb') as f: | ||
+ | pdf = PdfFileReader(f) | ||
+ | |||
+ | # get the first page | ||
+ | page = pdf.getPage(1) | ||
+ | print(page) | ||
+ | print('Page type: {}'.format(str(type(page)))) | ||
+ | |||
+ | text = page.extractText() | ||
+ | print(text) | ||
+ | |||
+ | if __name__ == '__main__': | ||
+ | path = 'reportlab-sample.pdf' | ||
+ | text_extractor(path) | ||
Latest revision as of 16:22, 28 October 2018
Install
pip install PyPDF2
Script Cek Jumlah Halaman
# importing all the required modules import PyPDF2
# creating an object file = open('example.pdf', 'rb')
# creating a pdf reader object fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file print(fileReader.numPages)
Script Print Satu Page
# extracting_text.py from PyPDF2 import PdfFileReader def text_extractor(path): with open(path, 'rb') as f: pdf = PdfFileReader(f) # get the first page page = pdf.getPage(1) print(page) print('Page type: {}'.format(str(type(page)))) text = page.extractText() print(text) if __name__ == '__main__': path = 'reportlab-sample.pdf' text_extractor(path)