R: bigram

From OnnoWiki
Revision as of 12:34, 31 October 2018 by Onnowpurbo (talk | contribs) (Created page with " library(dplyr) library(tidytext) library(janeaustenr) austen_bigrams <- austen_books() %>% unnest_tokens(bigram, text, token = "ngrams", n = 2) auste...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
library(dplyr)
library(tidytext)
library(janeaustenr)
austen_bigrams <- austen_books() %>%
                  unnest_tokens(bigram, text, token = "ngrams", n = 2)
austen_bigrams
austen_bigrams %>%
      count(bigram, sort = TRUE)


library(tidyr)
bigrams_separated <- austen_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
       filter(!word1 %in% stop_words$word) %>%
       filter(!word2 %in% stop_words$word)
# new bigram counts:
bigram_counts <- bigrams_filtered %>%
      count(word1, word2, sort = TRUE)