Difference between revisions of "R: sentiments analysis"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 76: | Line 76: | ||
geom_col(show.legend = FALSE) + | geom_col(show.legend = FALSE) + | ||
facet_wrap(~method, ncol = 1, scales = "free_y") | facet_wrap(~method, ncol = 1, scales = "free_y") | ||
+ | |||
+ | |||
+ | get_sentiments("nrc") %>% | ||
+ | filter(sentiment %in% c("positive", | ||
+ | "negative")) %>% | ||
+ | count(sentiment) | ||
+ | |||
+ | get_sentiments("bing") %>% | ||
+ | count(sentiment) | ||
+ | |||
+ | bing_word_counts <- tidy_books %>% | ||
+ | inner_join(get_sentiments("bing")) %>% | ||
+ | count(word, sentiment, sort = TRUE) %>% | ||
+ | ungroup() | ||
+ | bing_word_counts | ||
+ | |||
+ | bing_word_counts %>% | ||
+ | group_by(sentiment) %>% | ||
+ | top_n(10) %>% | ||
+ | ungroup() %>% | ||
+ | mutate(word = reorder(word, n)) %>% | ||
+ | ggplot(aes(word, n, fill = sentiment)) + | ||
+ | geom_col(show.legend = FALSE) + | ||
+ | facet_wrap(~sentiment, scales = "free_y") + | ||
+ | labs(y = "Contribution to sentiment", | ||
+ | x = NULL) + | ||
+ | coord_flip() | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
Revision as of 17:25, 8 November 2018
library(tidytext) sentiments
get_sentiments("afinn") get_sentiments("bing") get_sentiments("nrc")
library(janeaustenr) library(dplyr) library(stringr) tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text)
nrcjoy <- get_sentiments("nrc") %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrcjoy) %>% count(word, sort = TRUE)
library(tidyr) janeaustensentiment <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(book, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
library(ggplot2) ggplot(janeaustensentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>% filter(book == "Pride & Prejudice") pride_prejudice
afinn <- pride_prejudice %>% inner_join(get_sentiments("afinn")) %>% group_by(index = linenumber %/% 80) %>% summarise(sentiment = sum(score)) %>% mutate(method = "AFINN") bing_and_nrc <- bind_rows( pride_prejudice %>% inner_join(get_sentiments("bing")) %>% mutate(method = "Bing et al."), pride_prejudice %>% inner_join(get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative"))) %>% mutate(method = "NRC")) %>% count(method, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
bind_rows(afinn, bing_and_nrc) %>% ggplot(aes(index, sentiment, fill = method)) + geom_col(show.legend = FALSE) + facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative")) %>% count(sentiment)
get_sentiments("bing") %>% count(sentiment)
bing_word_counts <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup() bing_word_counts
bing_word_counts %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word, n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales = "free_y") + labs(y = "Contribution to sentiment", x = NULL) + coord_flip()