Difference between revisions of "R: tidytext Jane Austen Book"

From OnnoWiki
Jump to navigation Jump to search
(Created page with " library(janeaustenr) library(dplyr) library(stringr) original_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter =...")
 
 
Line 11: Line 11:
 
   ungroup()
 
   ungroup()
 
  original_books
 
  original_books
 
  
 
  # konversi ke token
 
  # konversi ke token
Line 19: Line 18:
 
   unnest_tokens(word, text)
 
   unnest_tokens(word, text)
 
  tidy_books
 
  tidy_books
 
  
 
  # buang stopwords
 
  # buang stopwords
Line 26: Line 24:
 
  tidy_books <- tidy_books %>%
 
  tidy_books <- tidy_books %>%
 
   anti_join(stop_words)
 
   anti_join(stop_words)
 
  
 
  # count
 
  # count
Line 44: Line 41:
 
   xlab(NULL) +
 
   xlab(NULL) +
 
   coord_flip()
 
   coord_flip()
 
 
  
 
==Pranala Menarik==
 
==Pranala Menarik==
  
 
* [[R]]
 
* [[R]]

Latest revision as of 09:36, 6 November 2018

library(janeaustenr)
library(dplyr)
library(stringr)

original_books <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
                                                 ignore_case = TRUE)))) %>%
  ungroup()
original_books
# konversi ke token
#
library(tidytext)
tidy_books <- original_books %>%
  unnest_tokens(word, text)
tidy_books
# buang stopwords
#
data(stop_words)
tidy_books <- tidy_books %>%
  anti_join(stop_words)
# count
#
tidy_books %>%
  count(word, sort = TRUE) 
# visualisasi
#
library(ggplot2)
tidy_books %>%
  count(word, sort = TRUE) %>%
  filter(n > 600) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip()

Pranala Menarik