Difference between revisions of "R: wordcloud"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with "The 5 main steps to create word clouds in R # Create a text file # Install and load the required packages # Text mining # Build a term-document matrix # Generate the Word clo...") |
Onnowpurbo (talk | contribs) |
||
| Line 52: | Line 52: | ||
# Text stemming | # Text stemming | ||
# docs <- tm_map(docs, stemDocument) | # docs <- tm_map(docs, stemDocument) | ||
| + | |||
| + | ==Term Document Matrix== | ||
| + | |||
| + | dtm <- TermDocumentMatrix(docs) | ||
| + | m <- as.matrix(dtm) | ||
| + | v <- sort(rowSums(m),decreasing=TRUE) | ||
| + | d <- data.frame(word = names(v),freq=v) | ||
| + | head(d, 10) | ||
| + | |||
| + | ==Wordcloud== | ||
| + | |||
| + | set.seed(1234) | ||
| + | wordcloud(words = d$word, freq = d$freq, min.freq = 1, | ||
| + | max.words=200, random.order=FALSE, rot.per=0.35, | ||
| + | colors=brewer.pal(8, "Dark2")) | ||
Revision as of 11:12, 1 November 2018
The 5 main steps to create word clouds in R
- Create a text file
- Install and load the required packages
- Text mining
- Build a term-document matrix
- Generate the Word cloud
Install Packages
# Install
install.packages("tm") # for text mining
install.packages("SnowballC") # for text stemming
install.packages("wordcloud") # word-cloud generator
install.packages("RColorBrewer") # color palettes
# Load
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
Text Mining
# dari file
text <- readLines(file.choose())
text <- read.delim("out.txt")
# Read the text file from internet filePath <- "http://www.sthda.com/sthda/RDoc/example-files/martin-luther-king-i-have-a-dream-speech.txt" text <- readLines(filePath)
# Load the data as a corpus docs <- Corpus(VectorSource(text)) inspect(docs)
Clean up
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
# docs <- tm_map(docs, stemDocument)
Term Document Matrix
dtm <- TermDocumentMatrix(docs) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) head(d, 10)
Wordcloud
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))