Difference between revisions of "R: bigram"

From OnnoWiki
Jump to navigation Jump to search
Line 49: Line 49:
 
     arrange(desc(tf_idf))
 
     arrange(desc(tf_idf))
 
  bigram_tf_idf
 
  bigram_tf_idf
 +
 +
 +
 +
==Pranala Menarik==
 +
 +
* [[R]]

Revision as of 12:43, 31 October 2018

library(dplyr)
library(tidytext)
library(janeaustenr)
austen_bigrams <- austen_books() %>%
                  unnest_tokens(bigram, text, token = "ngrams", n = 2)
austen_bigrams
austen_bigrams %>%
      count(bigram, sort = TRUE)


library(tidyr)
bigrams_separated <- austen_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
       filter(!word1 %in% stop_words$word) %>%
       filter(!word2 %in% stop_words$word)
# new bigram counts:
bigram_counts <- bigrams_filtered %>%
      count(word1, word2, sort = TRUE)



bigrams_united <- bigrams_filtered %>%
       unite(bigram, word1, word2, sep = " ")
bigrams_united



austen_books() %>%
   unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
   separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
   filter(!word1 %in% stop_words$word,
          !word2 %in% stop_words$word,
          !word3 %in% stop_words$word) %>%
   count(word1, word2, word3, sort = TRUE)


bigram_tf_idf <- bigrams_united %>%
   count(book, bigram) %>%
   bind_tf_idf(bigram, book, n) %>%
   arrange(desc(tf_idf))
bigram_tf_idf


Pranala Menarik