Difference between revisions of "R: read PDF"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 1: | Line 1: | ||
+ | |||
+ | ==dengan pdftools== | ||
install.packages("pdftools") | install.packages("pdftools") | ||
Line 5: | Line 7: | ||
text <- pdf_text("folder/tempat/data/namafile.pdf") | text <- pdf_text("folder/tempat/data/namafile.pdf") | ||
text <- pdf_text("data/BUSINESSCENTER.pdf") | text <- pdf_text("data/BUSINESSCENTER.pdf") | ||
+ | |||
+ | ==dengan tm== | ||
+ | |||
+ | install.packages("tm") | ||
+ | library(tm) | ||
+ | read <- readPDF(control = list(text = "-layout")) | ||
+ | |||
+ | document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read)) | ||
+ | doc <- content(document[[1]]) | ||
+ | head(doc) | ||
Latest revision as of 11:03, 6 November 2018
dengan pdftools
install.packages("pdftools") library(pdftools)
text <- pdf_text("folder/tempat/data/namafile.pdf") text <- pdf_text("data/BUSINESSCENTER.pdf")
dengan tm
install.packages("tm") library(tm) read <- readPDF(control = list(text = "-layout"))
document <- Corpus(URISource("./71_PV.62.pdf"), readerControl = list(reader = read)) doc <- content(document1) head(doc)