R: tidytext: sentiment analysis basic
Jump to navigation
Jump to search
# Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/02-sentiment-analysis.Rmd
install.packages("textdata")
library(knitr) opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE) options(width = 100, dplyr.width = 100) library(ggplot2) theme_set(theme_light())
library(tidytext) get_sentiments("afinn") get_sentiments("bing") get_sentiments("nrc")
## Sentiment analysis with inner join library(janeaustenr) library(dplyr) library(stringr) tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text)
# First, let's use the NRC lexicon and `filter()` for the joy words nrc_joy <- get_sentiments("nrc") %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrc_joy) %>% count(word, sort = TRUE)
# Next, let's `filter()` the data frame with the text from the books # for the words from *Emma* and then use `inner_join()` # to perform the sentiment analysis. nrc_joy <- nrc %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrc_joy) %>% count(word, sort = TRUE)
# calculate sentiment using spread() library(tidyr) jane_austen_sentiment <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(book, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative)
# plot library(ggplot2) ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")