Python: cari-stopwords.py
Revision as of 05:05, 2 February 2017 by Onnowpurbo (talk | contribs) (Created page with "Install dulu pip install nltk Source code import os,nltk,os.path,re,string import argparse from nltk.stem.porter import PorterStemmer ps=PorterStemmer() def hany...")
Install dulu
pip install nltk
Source code
import os,nltk,os.path,re,string
import argparse
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()
def hanya_huruf( input ):
r=re.match('^[a-zA-Z]+$', input)
if r==None:
return False
else:
return True
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', default=, help='input filename')
return parser.parse_args()
args = parse_args()
infile = args.infile
filename = open(infile,'r')
fcontent=filename.read()
filename.close()
fs = fcontent.split()
wordlist=[]
for word in fs:
if hanya_huruf(word) and len(word)<15 and len(word)>1 and word!='Iing' :
print(word)
word = ps.stem(word.strip(string.punctuation).lower())
if word not in nltk.corpus.stopwords.words('english'):
if word not in nltk.corpus.stopwords.words('indonesia'):
if word not in wordlist:
wordlist.append(word)
print( word )
else:
pass
else:
pass