Difference between revisions of "R: read PDF"

From OnnoWiki
Jump to navigation Jump to search
 
Line 1: Line 1:
 +
 +
==dengan pdftools==
  
 
  install.packages("pdftools")
 
  install.packages("pdftools")
Line 5: Line 7:
 
  text <- pdf_text("folder/tempat/data/namafile.pdf")
 
  text <- pdf_text("folder/tempat/data/namafile.pdf")
 
  text <- pdf_text("data/BUSINESSCENTER.pdf")
 
  text <- pdf_text("data/BUSINESSCENTER.pdf")
 +
 +
==dengan tm==
 +
 +
install.packages("tm")
 +
library(tm)
 +
read <- readPDF(control = list(text = "-layout"))
 +
 +
document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read))
 +
doc <- content(document[[1]])
 +
head(doc)
  
  

Latest revision as of 11:03, 6 November 2018

dengan pdftools

install.packages("pdftools")
library(pdftools)
text <- pdf_text("folder/tempat/data/namafile.pdf")
text <- pdf_text("data/BUSINESSCENTER.pdf")

dengan tm

install.packages("tm")
library(tm)
read <- readPDF(control = list(text = "-layout"))
document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read))
doc <- content(document1)
head(doc)


Pranala Menarik