Python: File Stemming dengan Sastrawi
Revision as of 11:17, 30 October 2018 by Onnowpurbo (talk | contribs) (Created page with " import sys, getopt import argparse import os,nltk,os.path,re,string import argparse import Sastrawi from nltk.stem.porter import PorterStemmer from Sastrawi.Stemmer.S...")
import sys, getopt import argparse import os,nltk,os.path,re,string import argparse import Sastrawi from nltk.stem.porter import PorterStemmer from Sastrawi.Stemmer.StemmerFactory import StemmerFactory def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', default=, help='input filename') parser.add_argument('-o', '--outfile', default=, help='output filename') return parser.parse_args() def hanya_huruf( input ): r=re.match('^[a-zA-Z]+$', input) if r==None: return False else: return True def main(): args = parse_args() outfile = args.outfile infile = args.infile f = open(infile,"r") fcontent = f.read() lines = fcontent.split() f.close() factory = StemmerFactory() stemmer = factory.create_stemmer() f = open(outfile,"w") for word in lines: if hanya_huruf(word) and len(word)<20 and len(word)>1 and word!='Iing' : word = word.strip(string.punctuation).lower() word = stemmer.stem(word) if word not in nltk.corpus.stopwords.words('english'): if word not in nltk.corpus.stopwords.words('indonesian'): f.write(word) f.write(" ") else: pass f.close()
main()