Difference between revisions of "R: trigram"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with " library(dplyr) library(tidytext) library(janeaustenr) library(tidyr) library(igraph) library(ggplot2) library(ggraph) text <- readtext("out.txt") text_trigrams <- t...") |
Onnowpurbo (talk | contribs) |
||
(One intermediate revision by the same user not shown) | |||
Line 7: | Line 7: | ||
library(ggplot2) | library(ggplot2) | ||
library(ggraph) | library(ggraph) | ||
+ | library(readtext) | ||
text <- readtext("out.txt") | text <- readtext("out.txt") | ||
Line 22: | Line 23: | ||
count(word1, word2, word3, sort = TRUE) | count(word1, word2, word3, sort = TRUE) | ||
trigram_graph <- trigram_counts %>% | trigram_graph <- trigram_counts %>% | ||
− | filter(n > | + | filter(n > 20) %>% |
graph_from_data_frame() | graph_from_data_frame() | ||
trigram_graph | trigram_graph |
Latest revision as of 20:51, 4 November 2018
library(dplyr) library(tidytext) library(janeaustenr) library(tidyr) library(igraph) library(ggplot2) library(ggraph) library(readtext)
text <- readtext("out.txt") text_trigrams <- text %>% unnest_tokens(trigram, text, token = "ngrams", n = 3) text_trigrams trigrams_separated <- text_trigrams %>% separate(trigram, c("word1", "word2", "word3"), sep = " ") # stopwords Indonesia trigrams_filtered <- trigrams_separated %>% filter(!word1 %in% stopwords::stopwords("id", source = "stopwords-iso")) %>% filter(!word2 %in% stopwords::stopwords("id", source = "stopwords-iso")) %>% filter(!word3 %in% stopwords::stopwords("id", source = "stopwords-iso")) trigram_counts <- trigrams_filtered %>% count(word1, word2, word3, sort = TRUE) trigram_graph <- trigram_counts %>% filter(n > 20) %>% graph_from_data_frame() trigram_graph set.seed(2017) ggraph(trigram_graph, layout = "fr") + geom_edge_link() + geom_node_point() + geom_node_text(aes(label = name), vjust = 1, hjust = 1)