Difference between revisions of "R: ngram dan frekuensi-nya"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 3: | Line 3: | ||
library(tau) | library(tau) | ||
library(data.table) | library(data.table) | ||
− | + | ||
createNgram <-function(stringVector, ngramSize){ | createNgram <-function(stringVector, ngramSize){ | ||
Line 17: | Line 17: | ||
} | } | ||
+ | # | ||
+ | text <- readtext("out.txt") | ||
text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." | text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." | ||
Line 22: | Line 24: | ||
res | res | ||
+ | # sort | ||
+ | res_sort <- res[order(-freq)] | ||
+ | res_sort | ||
Revision as of 08:09, 5 November 2018
library(tau) library(data.table)
createNgram <-function(stringVector, ngramSize){ ngram <- data.table() ng <- textcnt(stringVector, method = "string", n=ngramSize, tolower = FALSE) if(ngramSize==1){ ngram <- data.table(w1 = names(ng), freq = unclass(ng), length=nchar(names(ng))) } else { ngram <- data.table(w1w2 = names(ng), freq = unclass(ng), length=nchar(names(ng))) } return(ngram) }
# text <- readtext("out.txt")
text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter." res <- createNgram(text, 2) res
# sort res_sort <- res[order(-freq)] res_sort