R: read multi PDF ke tidytext
Revision as of 11:25, 6 November 2018 by Onnowpurbo (talk | contribs) (Created page with " library(tidyverse) library(tidytext) library(tm) directory <- "data-pdf" # create corpus from pdfs converted <- VCorpus(DirSource(directory), readerControl = list(rea...")
library(tidyverse) library(tidytext) library(tm) directory <- "data-pdf"
# create corpus from pdfs converted <- VCorpus(DirSource(directory), readerControl = list(reader = readPDF)) %>% DocumentTermMatrix()
converted %>% tidy() %>% filter(!grepl("[0-9]+", term))