Python: Siapkan stopwords Indonesia
Revision as of 10:59, 30 October 2018 by Onnowpurbo (talk | contribs) (Created page with "==Download ID-Stopwords== sudo apt install git git clone https://github.com/masdevid/ID-Stopwords.git File ada di ID-Stopwords/id.stopwords.02.01.2016.txt ==Mencari S...")
Download ID-Stopwords
sudo apt install git git clone https://github.com/masdevid/ID-Stopwords.git
File ada di
ID-Stopwords/id.stopwords.02.01.2016.txt
Mencari Stopwords Sendiri
import os,nltk,os.path,re,string
import argparse, re
from collections import Counter
def hanya_huruf( input ):
r=re.match('^[a-zA-Z]+$', input)
if r==None:
return False
else:
return True
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', default=, help='input filename')
return parser.parse_args()
args = parse_args()
infile = args.infile
words = re.findall(r'\w+', open(infile).read().lower())
wordfreqs = Counter(words)
for word, count in wordfreqs.items():
if hanya_huruf(word) and len(word)>1 and word!='Iing' and count<3 :
word = word.strip(string.punctuation).lower()
if word not in nltk.corpus.stopwords.words('english'):
if word not in nltk.corpus.stopwords.words('indonesia'):
print word