Difference between revisions of "R: tidy text dataset - tibble"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
| Line 14: | Line 14: | ||
text_df <- data_frame(line = 1:4, text = text) | text_df <- data_frame(line = 1:4, text = text) | ||
text_df | text_df | ||
| + | |||
| + | |||
| + | ==Tidy Text Novel== | ||
| + | |||
| + | library(janeaustenr) | ||
| + | library(dplyr) | ||
| + | library(stringr) | ||
| + | original_books <- austen_books() %>% | ||
| + | group_by(book) %>% | ||
| + | mutate(linenumber = row_number(), | ||
| + | chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", | ||
| + | ignore_case = TRUE)))) %>% | ||
| + | ungroup() | ||
| + | original_books | ||
Revision as of 09:14, 31 October 2018
Text Vector
text <- c("Because I could not stop for Death -",
"He kindly stopped for me -",
"The Carriage held but just Ourselves -",
"and Immortality")
text
Tidy Text Dataset
install.packages("dplyr")
library(dplyr)
text_df <- data_frame(line = 1:4, text = text)
text_df
Tidy Text Novel
library(janeaustenr)
library(dplyr)
library(stringr)
original_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup()
original_books