Python: cari-stopwords.py
Jump to navigation
Jump to search
Install dulu
pip install nltk
Source code
import os,nltk,os.path,re,string import argparse from nltk.stem.porter import PorterStemmer ps=PorterStemmer() def hanya_huruf( input ): r=re.match('^[a-zA-Z]+$', input) if r==None: return False else: return True def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', default=, help='input filename') return parser.parse_args() args = parse_args() infile = args.infile filename = open(infile,'r') fcontent=filename.read() filename.close() fs = fcontent.split() wordlist=[] for word in fs: if hanya_huruf(word) and len(word)<15 and len(word)>1 and word!='Iing' : print(word) word = ps.stem(word.strip(string.punctuation).lower()) if word not in nltk.corpus.stopwords.words('english'): if word not in nltk.corpus.stopwords.words('indonesia'): if word not in wordlist: wordlist.append(word) print( word ) else: pass else: pass