Python: File Stemming dengan Sastrawi
Jump to navigation
Jump to search
import sys, getopt
import argparse
import os,nltk,os.path,re,string
import argparse
import Sastrawi
from nltk.stem.porter import PorterStemmer
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', default=, help='input filename')
parser.add_argument('-o', '--outfile', default=, help='output filename')
return parser.parse_args()
def hanya_huruf( input ):
r=re.match('^[a-zA-Z]+$', input)
if r==None:
return False
else:
return True
def main():
args = parse_args()
outfile = args.outfile
infile = args.infile
f = open(infile,"r")
fcontent = f.read()
lines = fcontent.split()
f.close()
factory = StemmerFactory()
stemmer = factory.create_stemmer()
f = open(outfile,"w")
for word in lines:
if hanya_huruf(word) and len(word)<20 and len(word)>1 and word!='Iing' :
word = word.strip(string.punctuation).lower()
word = stemmer.stem(word)
if word not in nltk.corpus.stopwords.words('english'):
if word not in nltk.corpus.stopwords.words('indonesian'):
f.write(word)
f.write(" ")
else:
pass
f.close()
main()