R: tidytext RPJP BAPPENAS

From OnnoWiki
Revision as of 10:45, 6 November 2018 by Onnowpurbo (talk | contribs) (Created page with " install.packages("pdftools") library(pdftools) rpjp2005 <- pdf_text("RPJP_2005-2025.pdf") %>% strsplit(split = "\n") original_rpjp2005 <- rpjp2005 %>% group_by(book...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search


install.packages("pdftools")
library(pdftools)
rpjp2005 <- pdf_text("RPJP_2005-2025.pdf") %>% strsplit(split = "\n")


original_rpjp2005 <- rpjp2005 %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
                                                 ignore_case = TRUE)))) %>%
  ungroup()
original_books


Pranala Menarik