Difference between revisions of "Python: Read PDF"

From OnnoWiki
Jump to navigation Jump to search
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
 +
Install
  
 +
pip install PyPDF2
  
 +
==Script Cek Jumlah Halaman==
  
#install pyDF2
+
# importing all the required modules
pip install PyPDF2
+
import PyPDF2
  
# importing all the required modules
+
# creating an object
import PyPDF2
+
file = open('example.pdf', 'rb')
  
# creating an object  
+
# creating a pdf reader object
file = open('example.pdf', 'rb')
+
fileReader = PyPDF2.PdfFileReader(file)
  
# creating a pdf reader object
+
# print the number of pages in pdf file
fileReader = PyPDF2.PdfFileReader(file)
+
print(fileReader.numPages)
  
# print the number of pages in pdf file
 
print(fileReader.numPages)
 
  
 +
==Script Print Satu Page==
 +
 +
# extracting_text.py
 +
 +
from PyPDF2 import PdfFileReader
 +
 +
 +
def text_extractor(path):
 +
    with open(path, 'rb') as f:
 +
        pdf = PdfFileReader(f)
 +
 
 +
        # get the first page
 +
        page = pdf.getPage(1)
 +
        print(page)
 +
        print('Page type: {}'.format(str(type(page))))
 +
 
 +
        text = page.extractText()
 +
        print(text)
 +
 
 +
if __name__ == '__main__':
 +
    path = 'reportlab-sample.pdf'
 +
    text_extractor(path)
  
  

Latest revision as of 16:22, 28 October 2018

Install

pip install PyPDF2

Script Cek Jumlah Halaman

# importing all the required modules
import PyPDF2
# creating an object 
file = open('example.pdf', 'rb')
# creating a pdf reader object
fileReader = PyPDF2.PdfFileReader(file)
# print the number of pages in pdf file
print(fileReader.numPages)


Script Print Satu Page

# extracting_text.py

from PyPDF2 import PdfFileReader


def text_extractor(path):
    with open(path, 'rb') as f:
        pdf = PdfFileReader(f)
 
        # get the first page
        page = pdf.getPage(1)
        print(page)
        print('Page type: {}'.format(str(type(page))))
 
        text = page.extractText()
        print(text)
 
if __name__ == '__main__':
    path = 'reportlab-sample.pdf'
    text_extractor(path)


Pranala Menarik