Difference between revisions of "R: sentiments analysis"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with " library(tidytext) sentiments ==Pranala Menarik== * R") |
Onnowpurbo (talk | contribs) |
||
(8 intermediate revisions by the same user not shown) | |||
Line 3: | Line 3: | ||
library(tidytext) | library(tidytext) | ||
sentiments | sentiments | ||
+ | |||
+ | |||
+ | get_sentiments("afinn") | ||
+ | get_sentiments("bing") | ||
+ | get_sentiments("nrc") | ||
+ | |||
+ | |||
+ | library(janeaustenr) | ||
+ | library(dplyr) | ||
+ | library(stringr) | ||
+ | tidy_books <- austen_books() %>% | ||
+ | group_by(book) %>% | ||
+ | mutate(linenumber = row_number(), | ||
+ | chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", | ||
+ | ignore_case = TRUE)))) %>% | ||
+ | ungroup() %>% | ||
+ | unnest_tokens(word, text) | ||
+ | |||
+ | |||
+ | |||
+ | nrcjoy <- get_sentiments("nrc") %>% | ||
+ | filter(sentiment == "joy") | ||
+ | tidy_books %>% | ||
+ | filter(book == "Emma") %>% | ||
+ | inner_join(nrcjoy) %>% | ||
+ | count(word, sort = TRUE) | ||
+ | |||
+ | |||
+ | library(tidyr) | ||
+ | janeaustensentiment <- tidy_books %>% | ||
+ | inner_join(get_sentiments("bing")) %>% | ||
+ | count(book, index = linenumber %/% 80, sentiment) %>% | ||
+ | spread(sentiment, n, fill = 0) %>% | ||
+ | mutate(sentiment = positive - negative) | ||
+ | |||
+ | library(ggplot2) | ||
+ | ggplot(janeaustensentiment, aes(index, sentiment, fill = book)) + | ||
+ | geom_col(show.legend = FALSE) + | ||
+ | facet_wrap(~book, ncol = 2, scales = "free_x") | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | pride_prejudice <- tidy_books %>% | ||
+ | filter(book == "Pride & Prejudice") | ||
+ | pride_prejudice | ||
+ | |||
+ | |||
+ | |||
+ | afinn <- pride_prejudice %>% | ||
+ | inner_join(get_sentiments("afinn")) %>% | ||
+ | group_by(index = linenumber %/% 80) %>% | ||
+ | summarise(sentiment = sum(score)) %>% | ||
+ | mutate(method = "AFINN") | ||
+ | bing_and_nrc <- bind_rows( | ||
+ | pride_prejudice %>% | ||
+ | inner_join(get_sentiments("bing")) %>% | ||
+ | mutate(method = "Bing et al."), | ||
+ | pride_prejudice %>% | ||
+ | inner_join(get_sentiments("nrc") %>% | ||
+ | filter(sentiment %in% c("positive", | ||
+ | "negative"))) %>% | ||
+ | mutate(method = "NRC")) %>% | ||
+ | count(method, index = linenumber %/% 80, sentiment) %>% | ||
+ | spread(sentiment, n, fill = 0) %>% | ||
+ | mutate(sentiment = positive - negative) | ||
+ | |||
+ | |||
+ | bind_rows(afinn, | ||
+ | bing_and_nrc) %>% | ||
+ | ggplot(aes(index, sentiment, fill = method)) + | ||
+ | geom_col(show.legend = FALSE) + | ||
+ | facet_wrap(~method, ncol = 1, scales = "free_y") | ||
+ | |||
+ | |||
+ | get_sentiments("nrc") %>% | ||
+ | filter(sentiment %in% c("positive", | ||
+ | "negative")) %>% | ||
+ | count(sentiment) | ||
+ | |||
+ | get_sentiments("bing") %>% | ||
+ | count(sentiment) | ||
+ | |||
+ | bing_word_counts <- tidy_books %>% | ||
+ | inner_join(get_sentiments("bing")) %>% | ||
+ | count(word, sentiment, sort = TRUE) %>% | ||
+ | ungroup() | ||
+ | bing_word_counts | ||
+ | |||
+ | bing_word_counts %>% | ||
+ | group_by(sentiment) %>% | ||
+ | top_n(10) %>% | ||
+ | ungroup() %>% | ||
+ | mutate(word = reorder(word, n)) %>% | ||
+ | ggplot(aes(word, n, fill = sentiment)) + | ||
+ | geom_col(show.legend = FALSE) + | ||
+ | facet_wrap(~sentiment, scales = "free_y") + | ||
+ | labs(y = "Contribution to sentiment", | ||
+ | x = NULL) + | ||
+ | coord_flip() | ||
+ | |||
+ | |||
+ | custom_stop_words <- bind_rows(data_frame(word = c("miss"), | ||
+ | lexicon = c("custom")), | ||
+ | stop_words) | ||
+ | custom_stop_words | ||
+ | |||
+ | |||
+ | library(wordcloud) | ||
+ | tidy_books %>% | ||
+ | anti_join(stop_words) %>% | ||
+ | count(word) %>% | ||
+ | with(wordcloud(word, n, max.words = 100)) | ||
+ | |||
+ | |||
+ | library(reshape2) | ||
+ | tidy_books %>% | ||
+ | inner_join(get_sentiments("bing")) %>% | ||
+ | count(word, sentiment, sort = TRUE) %>% | ||
+ | acast(word ~ sentiment, value.var = "n", fill = 0) %>% | ||
+ | comparison.cloud(colors = c("gray20", "gray80"), | ||
+ | max.words = 100) | ||
Latest revision as of 17:35, 8 November 2018
library(tidytext) sentiments
get_sentiments("afinn") get_sentiments("bing") get_sentiments("nrc")
library(janeaustenr) library(dplyr) library(stringr) tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text)
nrcjoy <- get_sentiments("nrc") %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrcjoy) %>% count(word, sort = TRUE)
library(tidyr) janeaustensentiment <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(book, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
library(ggplot2) ggplot(janeaustensentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>% filter(book == "Pride & Prejudice") pride_prejudice
afinn <- pride_prejudice %>% inner_join(get_sentiments("afinn")) %>% group_by(index = linenumber %/% 80) %>% summarise(sentiment = sum(score)) %>% mutate(method = "AFINN") bing_and_nrc <- bind_rows( pride_prejudice %>% inner_join(get_sentiments("bing")) %>% mutate(method = "Bing et al."), pride_prejudice %>% inner_join(get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative"))) %>% mutate(method = "NRC")) %>% count(method, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
bind_rows(afinn, bing_and_nrc) %>% ggplot(aes(index, sentiment, fill = method)) + geom_col(show.legend = FALSE) + facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative")) %>% count(sentiment)
get_sentiments("bing") %>% count(sentiment)
bing_word_counts <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup() bing_word_counts
bing_word_counts %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word, n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales = "free_y") + labs(y = "Contribution to sentiment", x = NULL) + coord_flip()
custom_stop_words <- bind_rows(data_frame(word = c("miss"), lexicon = c("custom")), stop_words) custom_stop_words
library(wordcloud) tidy_books %>% anti_join(stop_words) %>% count(word) %>% with(wordcloud(word, n, max.words = 100))
library(reshape2) tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% acast(word ~ sentiment, value.var = "n", fill = 0) %>% comparison.cloud(colors = c("gray20", "gray80"), max.words = 100)