Difference between revisions of "R: stopwords"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) (Created page with " install.packages("stopwords") # atau install.packages("devtools") devtools::install_github("quanteda/stopwords") head(stopwords::stopwords("de", source = "snowball")...") |
Onnowpurbo (talk | contribs) |
||
Line 14: | Line 14: | ||
stopwords::stopwords_getlanguages("snowball") | stopwords::stopwords_getlanguages("snowball") | ||
stopwords::stopwords_getlanguages("stopwords-iso") | stopwords::stopwords_getlanguages("stopwords-iso") | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | documents = c("She had toast for breakfast", | ||
+ | "The coffee this morning was excellent", | ||
+ | "For lunch let's all have pancakes", | ||
+ | "Later in the day, there will be more talks", | ||
+ | "The talks on the first day were great", | ||
+ | "The second day should have good presentations too") | ||
+ | library(tm) | ||
+ | documents <- Corpus(VectorSource(documents)) | ||
+ | documents = tm_map(documents, content_transformer(tolower)) | ||
+ | documents = tm_map(documents, removePunctuation) | ||
+ | documents = tm_map(documents, removeWords, stopwords("english")) | ||
+ | documents | ||
+ | |||
==Pranala Menarik== | ==Pranala Menarik== | ||
* [[R]] | * [[R]] |
Revision as of 12:47, 1 November 2018
install.packages("stopwords")
# atau install.packages("devtools") devtools::install_github("quanteda/stopwords")
head(stopwords::stopwords("de", source = "snowball"), 20) head(stopwords::stopwords("id", source = "stopwords-iso"), 20)
stopwords::stopwords_getsources() stopwords::stopwords_getlanguages("snowball") stopwords::stopwords_getlanguages("stopwords-iso")
documents = c("She had toast for breakfast", "The coffee this morning was excellent", "For lunch let's all have pancakes", "Later in the day, there will be more talks", "The talks on the first day were great", "The second day should have good presentations too") library(tm) documents <- Corpus(VectorSource(documents)) documents = tm_map(documents, content_transformer(tolower)) documents = tm_map(documents, removePunctuation) documents = tm_map(documents, removeWords, stopwords("english")) documents