R: ngram dan frekuensi-nya
Revision as of 07:40, 5 November 2018 by Onnowpurbo (talk | contribs)
library(tau) library(data.table) # given a string vector and size of ngrams this function returns word ngrams with corresponding frequencies
createNgram <-function(stringVector, ngramSize){ ngram <- data.table() ng <- textcnt(stringVector, method = "string", n=ngramSize, tolower = FALSE) if(ngramSize==1){ ngram <- data.table(w1 = names(ng), freq = unclass(ng), length=nchar(names(ng))) } else { ngram <- data.table(w1w2 = names(ng), freq = unclass(ng), length=nchar(names(ng))) } return(ngram) }
text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." res <- createNgram(text, 2) res