Difference between revisions of "R: read PDF"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
| Line 1: | Line 1: | ||
| + | |||
| + | ==dengan pdftools== | ||
install.packages("pdftools") | install.packages("pdftools") | ||
| Line 5: | Line 7: | ||
text <- pdf_text("folder/tempat/data/namafile.pdf") | text <- pdf_text("folder/tempat/data/namafile.pdf") | ||
text <- pdf_text("data/BUSINESSCENTER.pdf") | text <- pdf_text("data/BUSINESSCENTER.pdf") | ||
| + | |||
| + | ==dengan tm== | ||
| + | |||
| + | install.packages("tm") | ||
| + | library(tm) | ||
| + | read <- readPDF(control = list(text = "-layout")) | ||
| + | |||
| + | document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read)) | ||
| + | doc <- content(document[[1]]) | ||
| + | head(doc) | ||
Latest revision as of 11:03, 6 November 2018
dengan pdftools
install.packages("pdftools")
library(pdftools)
text <- pdf_text("folder/tempat/data/namafile.pdf")
text <- pdf_text("data/BUSINESSCENTER.pdf")
dengan tm
install.packages("tm")
library(tm)
read <- readPDF(control = list(text = "-layout"))
document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read))
doc <- content(document1)
head(doc)