R: evaluasi twit
Jump to navigation
Jump to search
maruti_tweets = userTimeline("Maruti_suzuki", n=1000,cainfo="cacert.pem")
hyundai_tweets = userTimeline("HyundaiIndia", n=1000,cainfo="cacert.pem")
tata_tweets = userTimeline("TataMotors", n=1000,cainfo="cacert.pem")
toyota_tweets = userTimeline("Toyota_India", n=1000,cainfo="cacert.pem")
# get text
maruti_txt = sapply(maruti_tweets, function(x) x$getText())
hyundai_txt = sapply(hyundai_tweets, function(x) x$getText())
tata_txt = sapply(tata_tweets, function(x) x$getText())
toyota_txt = sapply(toyota_tweets, function(x) x$getText())
clean.text = function(x)
{
# tolower
x = tolower(x)
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("punct:", "", x)
# remove numbers
x = gsub("digit:", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
return(x)
}
# clean texts maruti_clean = clean.text(maruti_txt) hyundai_clean = clean.text(hyundai_txt) tata_clean = clean.text(tata_txt) toyota_clean = clean.text(toyota_txt) maruti = paste(maruti_clean, collapse=" ")