Difference between revisions of "R: trigram"

From OnnoWiki
Jump to navigation Jump to search
(Created page with " library(dplyr) library(tidytext) library(janeaustenr) library(tidyr) library(igraph) library(ggplot2) library(ggraph) text <- readtext("out.txt") text_trigrams <- t...")
 
 
(One intermediate revision by the same user not shown)
Line 7: Line 7:
 
  library(ggplot2)
 
  library(ggplot2)
 
  library(ggraph)
 
  library(ggraph)
 +
library(readtext)
  
 
  text <- readtext("out.txt")
 
  text <- readtext("out.txt")
Line 22: Line 23:
 
       count(word1, word2, word3, sort = TRUE)
 
       count(word1, word2, word3, sort = TRUE)
 
  trigram_graph <- trigram_counts %>%
 
  trigram_graph <- trigram_counts %>%
   filter(n > 10) %>%
+
   filter(n > 20) %>%
 
   graph_from_data_frame()
 
   graph_from_data_frame()
 
  trigram_graph
 
  trigram_graph

Latest revision as of 20:51, 4 November 2018

library(dplyr)
library(tidytext)
library(janeaustenr)
library(tidyr)
library(igraph)
library(ggplot2)
library(ggraph)
library(readtext)
text <- readtext("out.txt")
text_trigrams <- text %>%
                unnest_tokens(trigram, text, token = "ngrams", n = 3)
text_trigrams
trigrams_separated <- text_trigrams %>%
separate(trigram, c("word1", "word2", "word3"), sep = " ")
# stopwords Indonesia
trigrams_filtered <- trigrams_separated %>%
       filter(!word1 %in% stopwords::stopwords("id", source = "stopwords-iso")) %>%
       filter(!word2 %in% stopwords::stopwords("id", source = "stopwords-iso")) %>%
       filter(!word3 %in% stopwords::stopwords("id", source = "stopwords-iso"))
trigram_counts <- trigrams_filtered %>%
      count(word1, word2, word3, sort = TRUE)
trigram_graph <- trigram_counts %>%
  filter(n > 20) %>%
  graph_from_data_frame()
trigram_graph
set.seed(2017)
ggraph(trigram_graph, layout = "fr") +
  geom_edge_link() +
  geom_node_point() +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1)