Difference between revisions of "R: wordcloud"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
(4 intermediate revisions by the same user not shown) | |||
Line 20: | Line 20: | ||
library("wordcloud") | library("wordcloud") | ||
library("RColorBrewer") | library("RColorBrewer") | ||
+ | library(readtext) | ||
==Text Mining== | ==Text Mining== | ||
Line 25: | Line 26: | ||
# dari file | # dari file | ||
text <- readLines(file.choose()) | text <- readLines(file.choose()) | ||
− | text <- | + | text <- readtext("out.txt") |
# Read the text file from internet | # Read the text file from internet | ||
Line 43: | Line 44: | ||
# Remove english common stopwords | # Remove english common stopwords | ||
docs <- tm_map(docs, removeWords, stopwords("english")) | docs <- tm_map(docs, removeWords, stopwords("english")) | ||
+ | # Remove Indonesia common stopwords | ||
+ | docs <- tm_map(docs, removeWords, stopwords::stopwords("id", source = "stopwords-iso")) | ||
# Remove your own stop word | # Remove your own stop word | ||
# specify your stopwords as a character vector | # specify your stopwords as a character vector | ||
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) | docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) | ||
+ | docs <- tm_map(docs, removeWords, c("tingkat", "bangun", "kembang", "laksana", "sasar")) | ||
# Remove punctuations | # Remove punctuations | ||
docs <- tm_map(docs, removePunctuation) | docs <- tm_map(docs, removePunctuation) | ||
Line 67: | Line 71: | ||
max.words=200, random.order=FALSE, rot.per=0.35, | max.words=200, random.order=FALSE, rot.per=0.35, | ||
colors=brewer.pal(8, "Dark2")) | colors=brewer.pal(8, "Dark2")) | ||
+ | |||
+ | |||
+ | ==Pranala Menarik== | ||
+ | |||
+ | * [[R]] |
Latest revision as of 16:26, 4 November 2018
The 5 main steps to create word clouds in R
- Create a text file
- Install and load the required packages
- Text mining
- Build a term-document matrix
- Generate the Word cloud
Install Packages
# Install install.packages("tm") # for text mining install.packages("SnowballC") # for text stemming install.packages("wordcloud") # word-cloud generator install.packages("RColorBrewer") # color palettes # Load library("tm") library("SnowballC") library("wordcloud") library("RColorBrewer") library(readtext)
Text Mining
# dari file text <- readLines(file.choose()) text <- readtext("out.txt")
# Read the text file from internet filePath <- "http://www.sthda.com/sthda/RDoc/example-files/martin-luther-king-i-have-a-dream-speech.txt" text <- readLines(filePath)
# Load the data as a corpus docs <- Corpus(VectorSource(text)) inspect(docs)
Clean up
# Convert the text to lower case docs <- tm_map(docs, content_transformer(tolower)) # Remove numbers docs <- tm_map(docs, removeNumbers) # Remove english common stopwords docs <- tm_map(docs, removeWords, stopwords("english")) # Remove Indonesia common stopwords docs <- tm_map(docs, removeWords, stopwords::stopwords("id", source = "stopwords-iso")) # Remove your own stop word # specify your stopwords as a character vector docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) docs <- tm_map(docs, removeWords, c("tingkat", "bangun", "kembang", "laksana", "sasar")) # Remove punctuations docs <- tm_map(docs, removePunctuation) # Eliminate extra white spaces docs <- tm_map(docs, stripWhitespace) # Text stemming # docs <- tm_map(docs, stemDocument)
Term Document Matrix
dtm <- TermDocumentMatrix(docs) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) head(d, 10)
Wordcloud
set.seed(1234) wordcloud(words = d$word, freq = d$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2"))