R: wordcloud dari dua sumber

From OnnoWiki
Jump to navigation Jump to search


library(NLP)
library(RColorBrewer)
library(tm)
library(wordcloud)

tryTolower <- function(x){
  y = NA
  try_error = tryCatch(tolower(x), error = function(e) e)
  if (!inherits(try_error, 'error'))
  y = tolower(x)
  return(y)
}
custom.stopwords <- c(stopwords('english'), 'sorry', 'amp', 'delta', 'amazon')
clean.vec <- function(text.vec){
  text.vec <- tryTolower(text.vec)
  text.vec <- removeWords(text.vec, custom.stopwords)
  text.vec <- removePunctuation(text.vec)
  text.vec <- stripWhitespace(text.vec)
  text.vec <- removeNumbers(text.vec)
  return(text.vec)
}
amzn<-read.csv('amzn_cs.csv')
delta<-read.csv('oct_delta.csv')

amzn.vec<-clean.vec(amzn$text)
delta.vec<-clean.vec(delta$text)

amzn.vec <- paste(amzn.vec, collapse=" ")
delta.vec <- paste(delta.vec, collapse=" ")
all <- c(amzn.vec, delta.vec)
all <- c(delta.vec, delta.vec)
corpus <- VCorpus(VectorSource(all))

tdm <- TermDocumentMatrix(corpus)
tdm.m <- as.matrix(tdm)
colnames(tdm.m) = c("Amazon", "delta")
display.brewer.all()
pal <- brewer.pal(8, "Purples")
pal <- pal[-(1:4)]

commonality.cloud(tdm.m, max.words=200, random.order=FALSE,colors=pal)


Pranala Menarik