Python: File Stemming dengan Sastrawi
Jump to navigation
Jump to search
import sys, getopt import argparse import os,nltk,os.path,re,string import argparse import Sastrawi from nltk.stem.porter import PorterStemmer from Sastrawi.Stemmer.StemmerFactory import StemmerFactory def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', default=, help='input filename') parser.add_argument('-o', '--outfile', default=, help='output filename') return parser.parse_args() def hanya_huruf( input ): r=re.match('^[a-zA-Z]+$', input) if r==None: return False else: return True def main(): args = parse_args() outfile = args.outfile infile = args.infile f = open(infile,"r") fcontent = f.read() lines = fcontent.split() f.close() factory = StemmerFactory() stemmer = factory.create_stemmer() f = open(outfile,"w") for word in lines: if hanya_huruf(word) and len(word)<20 and len(word)>1 and word!='Iing' : word = word.strip(string.punctuation).lower() word = stemmer.stem(word) if word not in nltk.corpus.stopwords.words('english'): if word not in nltk.corpus.stopwords.words('indonesian'): f.write(word) f.write(" ") else: pass f.close()
main()