Difference between revisions of "R: tidy text dataset - tibble"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 14: | Line 14: | ||
text_df <- data_frame(line = 1:4, text = text) | text_df <- data_frame(line = 1:4, text = text) | ||
text_df | text_df | ||
+ | |||
+ | |||
+ | ==Tidy Text Novel== | ||
+ | |||
+ | library(janeaustenr) | ||
+ | library(dplyr) | ||
+ | library(stringr) | ||
+ | original_books <- austen_books() %>% | ||
+ | group_by(book) %>% | ||
+ | mutate(linenumber = row_number(), | ||
+ | chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", | ||
+ | ignore_case = TRUE)))) %>% | ||
+ | ungroup() | ||
+ | original_books | ||
Revision as of 09:14, 31 October 2018
Text Vector
text <- c("Because I could not stop for Death -", "He kindly stopped for me -", "The Carriage held but just Ourselves -", "and Immortality") text
Tidy Text Dataset
install.packages("dplyr") library(dplyr) text_df <- data_frame(line = 1:4, text = text) text_df
Tidy Text Novel
library(janeaustenr) library(dplyr) library(stringr) original_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() original_books