R: sentiments analysis
Revision as of 17:25, 8 November 2018 by Onnowpurbo (talk | contribs)
library(tidytext) sentiments
get_sentiments("afinn") get_sentiments("bing") get_sentiments("nrc")
library(janeaustenr) library(dplyr) library(stringr) tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text)
nrcjoy <- get_sentiments("nrc") %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrcjoy) %>% count(word, sort = TRUE)
library(tidyr) janeaustensentiment <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(book, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
library(ggplot2) ggplot(janeaustensentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>% filter(book == "Pride & Prejudice") pride_prejudice
afinn <- pride_prejudice %>% inner_join(get_sentiments("afinn")) %>% group_by(index = linenumber %/% 80) %>% summarise(sentiment = sum(score)) %>% mutate(method = "AFINN") bing_and_nrc <- bind_rows( pride_prejudice %>% inner_join(get_sentiments("bing")) %>% mutate(method = "Bing et al."), pride_prejudice %>% inner_join(get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative"))) %>% mutate(method = "NRC")) %>% count(method, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
bind_rows(afinn, bing_and_nrc) %>% ggplot(aes(index, sentiment, fill = method)) + geom_col(show.legend = FALSE) + facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>% filter(sentiment %in% c("positive", "negative")) %>% count(sentiment)
get_sentiments("bing") %>% count(sentiment)
bing_word_counts <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup() bing_word_counts
bing_word_counts %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word, n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales = "free_y") + labs(y = "Contribution to sentiment", x = NULL) + coord_flip()