R: tidytext: sentiment Most common positive and negative words
Jump to navigation
Jump to search
# Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/02-sentiment-analysis.Rmd
library(knitr) opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE) options(width = 100, dplyr.width = 100) library(ggplot2) theme_set(theme_light())
bing_word_counts <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% ungroup() bing_word_counts
# plot bing_word_counts %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word = reorder(word, n)) %>% ggplot(aes(word, n, fill = sentiment)) + geom_col(show.legend = FALSE) + facet_wrap(~sentiment, scales = "free_y") + labs(y = "Contribution to sentiment", x = NULL) + coord_flip()
# spot an anomaly in the sentiment analysis; # the word "miss" is coded as negative but it is used as a title for # young, unmarried women in Jane Austen's work custom_stop_words <- bind_rows(tibble(word = c("miss"), lexicon = c("custom")), stop_words) custom_stop_words