Python: cari-stopwords-freq.py

From OnnoWiki
Revision as of 08:08, 2 February 2017 by Onnowpurbo (talk | contribs) (Created page with " import os,nltk,os.path,re,string import argparse from nltk.stem.porter import PorterStemmer from collections import Counter import re ps=PorterStemmer() def hanya_...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
import os,nltk,os.path,re,string
import argparse
from nltk.stem.porter import PorterStemmer
from collections import Counter
import re

ps=PorterStemmer() 

def hanya_huruf( input ):
   r=re.match('^[a-zA-Z]+$', input)
   if r==None:
      return False
   else:
      return True

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', default=, help='input filename')
    return parser.parse_args()

args = parse_args()
infile = args.infile

words = re.findall(r'\w+', open(infile).read().lower())
wordfreqs = Counter(words)
for word, count in wordfreqs.items():
    if count < 200:
        print word