Difference between revisions of "R: tidytext Jane Austen Book"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with " library(janeaustenr) library(dplyr) library(stringr) original_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter =...") |
Onnowpurbo (talk | contribs) |
||
Line 11: | Line 11: | ||
ungroup() | ungroup() | ||
original_books | original_books | ||
− | |||
# konversi ke token | # konversi ke token | ||
Line 19: | Line 18: | ||
unnest_tokens(word, text) | unnest_tokens(word, text) | ||
tidy_books | tidy_books | ||
− | |||
# buang stopwords | # buang stopwords | ||
Line 26: | Line 24: | ||
tidy_books <- tidy_books %>% | tidy_books <- tidy_books %>% | ||
anti_join(stop_words) | anti_join(stop_words) | ||
− | |||
# count | # count | ||
Line 44: | Line 41: | ||
xlab(NULL) + | xlab(NULL) + | ||
coord_flip() | coord_flip() | ||
− | |||
− | |||
==Pranala Menarik== | ==Pranala Menarik== | ||
* [[R]] | * [[R]] |
Latest revision as of 09:36, 6 November 2018
library(janeaustenr) library(dplyr) library(stringr) original_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() original_books
# konversi ke token # library(tidytext) tidy_books <- original_books %>% unnest_tokens(word, text) tidy_books
# buang stopwords # data(stop_words) tidy_books <- tidy_books %>% anti_join(stop_words)
# count # tidy_books %>% count(word, sort = TRUE)
# visualisasi # library(ggplot2) tidy_books %>% count(word, sort = TRUE) %>% filter(n > 600) %>% mutate(word = reorder(word, n)) %>% ggplot(aes(word, n)) + geom_col() + xlab(NULL) + coord_flip()