Difference between revisions of "R: ngram dan frekuensi-nya"

From OnnoWiki
Jump to navigation Jump to search
(Created page with " library(tau) library(data.table) # given a string vector and size of ngrams this function returns word ngrams with corresponding frequencies createNgram <-function...")
 
 
(2 intermediate revisions by the same user not shown)
Line 3: Line 3:
 
  library(tau)  
 
  library(tau)  
 
  library(data.table)
 
  library(data.table)
# given a string vector and size of ngrams this function returns    word ngrams with corresponding frequencies
+
 
  
 
  createNgram <-function(stringVector, ngramSize){
 
  createNgram <-function(stringVector, ngramSize){
Line 16: Line 16:
 
   return(ngram)
 
   return(ngram)
 
  }
 
  }
 +
  
  
 
  text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter."
 
  text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter."
 +
#
 +
text <- readtext("out.txt")
 
  res <- createNgram(text, 2)
 
  res <- createNgram(text, 2)
 
  res
 
  res
 +
 +
# sort
 +
res_sort <- res[order(-freq)]
 +
res_sort
 +
head(res_sort,n=50)
 +
 +
 +
==Pranala Menarik==
 +
 +
* [[R]]

Latest revision as of 08:22, 5 November 2018


library(tau) 
library(data.table)


createNgram <-function(stringVector, ngramSize){
  ngram <- data.table()
  ng <- textcnt(stringVector, method = "string", n=ngramSize, tolower = FALSE)
  if(ngramSize==1){
    ngram <- data.table(w1 = names(ng), freq = unclass(ng), length=nchar(names(ng)))  
  }
  else {
    ngram <- data.table(w1w2 = names(ng), freq = unclass(ng), length=nchar(names(ng)))
  }
  return(ngram)
}


text <- "This is my little R text example and I want to count the frequency of some pattern (and - is - my - of). This is my little R text example and I want to count the frequency of some patter."
#
text <- readtext("out.txt")
res <- createNgram(text, 2)
res
# sort
res_sort <- res[order(-freq)]
res_sort
head(res_sort,n=50)


Pranala Menarik