R: tidytext: sentiment analysis basic

# Ref: https://github.com/dgrtwo/tidy-text-mining/blob/master/02-sentiment-analysis.Rmd

install.packages("textdata")

library(knitr)
opts_chunk$set(message = FALSE, warning = FALSE, cache = TRUE)
options(width = 100, dplyr.width = 100)
library(ggplot2)
theme_set(theme_light())

library(tidytext)
get_sentiments("afinn")
get_sentiments("bing")
get_sentiments("nrc")

## Sentiment analysis with inner join
library(janeaustenr)
library(dplyr)
library(stringr)
tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", 
                                                 ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)

#  First, let's use the NRC lexicon and `filter()` for the joy words
nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")
tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)

# Next, let's `filter()` the data frame with the text from the books
# for the words from *Emma* and then use `inner_join()`
# to perform the sentiment analysis.
nrc_joy <- nrc %>% 
  filter(sentiment == "joy")
tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)

# calculate sentiment using spread()
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = linenumber %/% 80, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

# plot
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~book, ncol = 2, scales = "free_x")

Referensi

https://github.com/dgrtwo/tidy-text-mining/blob/master/02-sentiment-analysis.Rmd

Pranala Menarik

R

R: tidytext: sentiment analysis basic

Referensi

Pranala Menarik

Navigation menu

Search