<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_bigram_2</id>
	<title>R: bigram 2 - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=R%3A_bigram_2"/>
	<link rel="alternate" type="text/html" href="https://onnocenter.or.id/wiki/index.php?title=R:_bigram_2&amp;action=history"/>
	<updated>2026-04-21T13:41:53Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.35.4</generator>
	<entry>
		<id>https://onnocenter.or.id/wiki/index.php?title=R:_bigram_2&amp;diff=52428&amp;oldid=prev</id>
		<title>Onnowpurbo: Created page with &quot;Sumber: http://www.rpubs.com/rgcmme/PLN-09    library(tm)  library(ggplot2)  library(reshape2)  library(wordcloud)  library(RWeka)  # Needed for a bug when calculating n-grams...&quot;</title>
		<link rel="alternate" type="text/html" href="https://onnocenter.or.id/wiki/index.php?title=R:_bigram_2&amp;diff=52428&amp;oldid=prev"/>
		<updated>2018-11-01T07:55:27Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Sumber: http://www.rpubs.com/rgcmme/PLN-09    library(tm)  library(ggplot2)  library(reshape2)  library(wordcloud)  library(RWeka)  # Needed for a bug when calculating n-grams...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Sumber: http://www.rpubs.com/rgcmme/PLN-09&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 library(tm)&lt;br /&gt;
 library(ggplot2)&lt;br /&gt;
 library(reshape2)&lt;br /&gt;
 library(wordcloud)&lt;br /&gt;
 library(RWeka)&lt;br /&gt;
 # Needed for a bug when calculating n-grams with weka&lt;br /&gt;
 options(mc.cores=1)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 setwd(&amp;quot;~/Youtube&amp;quot;)&lt;br /&gt;
 path = &amp;quot;./review_polarity/txt_sentoken/&amp;quot;&lt;br /&gt;
&lt;br /&gt;
 dir = DirSource(paste(path,&amp;quot;pos/&amp;quot;,sep=&amp;quot;&amp;quot;), encoding = &amp;quot;UTF-8&amp;quot;)&lt;br /&gt;
 corpus = Corpus(dir)&lt;br /&gt;
&lt;br /&gt;
  # check how many document loaded&lt;br /&gt;
 length(corpus)&lt;br /&gt;
 ## [1] 1000&lt;br /&gt;
&lt;br /&gt;
 # access 1st entry&lt;br /&gt;
 corpus[[1]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Create bigram&lt;br /&gt;
&lt;br /&gt;
 corpus.ng = tm_map(corpus,removeWords,c(stopwords(),&amp;quot;s&amp;quot;,&amp;quot;ve&amp;quot;))&lt;br /&gt;
 corpus.ng = tm_map(corpus.ng,removePunctuation)&lt;br /&gt;
 corpus.ng = tm_map(corpus.ng,removeNumbers)&lt;br /&gt;
&lt;br /&gt;
 BigramTokenizer &amp;lt;- function(x) NGramTokenizer(x, Weka_control(min = 2, max = 2))&lt;br /&gt;
 tdm.bigram = TermDocumentMatrix(corpus.ng,&lt;br /&gt;
 control = list(tokenize = BigramTokenizer))&lt;br /&gt;
&lt;br /&gt;
 freq = sort(rowSums(as.matrix(tdm.bigram)),decreasing = TRUE)&lt;br /&gt;
 freq.df = data.frame(word=names(freq), freq=freq)&lt;br /&gt;
 head(freq.df, 20)&lt;br /&gt;
&lt;br /&gt;
 pal=brewer.pal(8,&amp;quot;Blues&amp;quot;)&lt;br /&gt;
 pal=pal[-(1:3)]&lt;br /&gt;
 wordcloud(freq.df$word,freq.df$freq,max.words=100,random.order = F, colors=pal)&lt;br /&gt;
&lt;br /&gt;
 ggplot(head(freq.df,15), aes(reorder(word,freq), freq)) +&lt;br /&gt;
   geom_bar(stat = &amp;quot;identity&amp;quot;) + coord_flip() +&lt;br /&gt;
   xlab(&amp;quot;Bigrams&amp;quot;) + ylab(&amp;quot;Frequency&amp;quot;) +&lt;br /&gt;
   ggtitle(&amp;quot;Most frequent bigrams&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Trigram&lt;br /&gt;
&lt;br /&gt;
 TrigramTokenizer &amp;lt;- function(x) NGramTokenizer(x, Weka_control(min = 3, max = 3))&lt;br /&gt;
 tdm.trigram = TermDocumentMatrix(corpus.ng,&lt;br /&gt;
 control = list(tokenize = TrigramTokenizer))&lt;br /&gt;
&lt;br /&gt;
 freq = sort(rowSums(as.matrix(tdm.trigram)),decreasing = TRUE)&lt;br /&gt;
 freq.df = data.frame(word=names(freq), freq=freq)&lt;br /&gt;
 head(freq.df, 20)&lt;br /&gt;
&lt;br /&gt;
 wordcloud(freq.df$word,freq.df$freq,max.words=100,random.order = F, colors=pal)&lt;br /&gt;
&lt;br /&gt;
 ggplot(head(freq.df,15), aes(reorder(word,freq), freq)) +   &lt;br /&gt;
   geom_bar(stat=&amp;quot;identity&amp;quot;) + coord_flip() + &lt;br /&gt;
   xlab(&amp;quot;Trigrams&amp;quot;) + ylab(&amp;quot;Frequency&amp;quot;) +&lt;br /&gt;
   ggtitle(&amp;quot;Most frequent trigrams&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Referensi==&lt;br /&gt;
&lt;br /&gt;
* http://www.rpubs.com/rgcmme/PLN-09&lt;br /&gt;
&lt;br /&gt;
==Pranala Menarik==&lt;br /&gt;
&lt;br /&gt;
* [[R]]&lt;/div&gt;</summary>
		<author><name>Onnowpurbo</name></author>
	</entry>
</feed>