R: tidytext: sentiment analysis basic
Jump to navigation
Jump to search
# Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/02-sentiment-analysis.Rmd
install.packages("textdata")
library(knitr) opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE) options(width = 100, dplyr.width = 100) library(ggplot2) theme_set(theme_light())
library(tidytext)
get_sentiments("afinn")
get_sentiments("bing")
get_sentiments("nrc")
## Sentiment analysis with inner join
library(janeaustenr)
library(dplyr)
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
# First, let's use the NRC lexicon and `filter()` for the joy words
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
# Next, let's `filter()` the data frame with the text from the books # for the words from *Emma* and then use `inner_join()` # to perform the sentiment analysis. nrc_joy <- nrc %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrc_joy) %>% count(word, sort = TRUE)
# calculate sentiment using spread()
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
# plot library(ggplot2) ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")