Difference between revisions of "NLTK: Sentiment Strength Detection in Bahasa Indonesia"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
(5 intermediate revisions by the same user not shown) | |||
Line 25: | Line 25: | ||
This is work in progress. Experimental for my Master Thesis | This is work in progress. Experimental for my Master Thesis | ||
+ | |||
+ | |||
+ | ==Source Code Asli== | ||
+ | |||
+ | import pymongo, requests, json, urllib, re, collections | ||
+ | from pymongo import MongoClient | ||
+ | |||
+ | client = MongoClient('mongodb://localhost:27017/') | ||
+ | #mongodb database and collection | ||
+ | db = client.dataset_agnezmo | ||
+ | dbPre = db.preprocessing | ||
+ | |||
+ | def main(): | ||
+ | ss = sentiStrength() | ||
+ | sc = spellCheck() | ||
+ | tweets = dbPre.find().skip(0).limit(100) | ||
+ | for t in tweets: | ||
+ | print ss.main(t['text']) | ||
+ | print "=====================" | ||
+ | print ss.getSentimenScore() | ||
+ | |||
+ | main() | ||
+ | |||
+ | ==Test Kalimat== | ||
+ | |||
+ | import requests, json, urllib, re, collections | ||
+ | |||
+ | def main(): | ||
+ | ss = sentiStrength() | ||
+ | sc = spellCheck() | ||
+ | t = 'film godzilla amat sangat menarik sekali' | ||
+ | print ss.main(t) | ||
+ | print "=====================" | ||
+ | print ss.getSentimenScore() | ||
+ | |||
+ | main() | ||
+ | |||
+ | Hasilnya: | ||
+ | |||
+ | film godzilla amat sangat menarik [3] sekali [score:-1,3][result: +positive] | ||
+ | ===================== | ||
+ | [Positive:1] [Negative:0] [Neutral:0] | ||
+ | |||
+ | ==Ubah Source Code== | ||
+ | |||
+ | import argparse | ||
+ | |||
+ | def parse_args(): | ||
+ | parser = argparse.ArgumentParser() | ||
+ | parser.add_argument('-i', '--infile', default='', help='input filename') | ||
+ | return parser.parse_args() | ||
+ | |||
+ | def main(): | ||
+ | args = parse_args() | ||
+ | infile = args.infile | ||
+ | |||
+ | filename = open(infile,'r') | ||
+ | fcontent=filename.readlines() | ||
+ | filename.close() | ||
+ | |||
+ | ss = sentiStrength() | ||
+ | sc = spellCheck() | ||
+ | for t in fcontent: | ||
+ | print ss.main(t) | ||
+ | print "=====================" | ||
+ | print ss.getSentimenScore() | ||
+ | |||
+ | main() | ||
==Referensi== | ==Referensi== | ||
* https://github.com/masdevid/SentiStrengthID | * https://github.com/masdevid/SentiStrengthID |
Latest revision as of 10:45, 26 February 2017
SentiStrengthID
Sentiment Strength Detection in Bahasa Indonesia. This is unsupervised version of SentiStrength (http://sentistrength.wlv.ac.uk/) in Bahasa Indonesia. Core Feature:
- Sentiment Lookup
- Negation Word Lookup
- Booster Word Lookup
- Emoticon Lookup
- Idiom Lookup
- Question Word Lookup
- Slang Word Lookup
- Spelling Correction (optional) using Pater Norvig (http://norvig.com/spell-correct.html)
- Negative emotion ignored in question
- Exclamation marks count as +2
- Repeated Punctuation boosts sentiment
Ignored Rule:
repeated letters more than 2 boosts sentiment score. This rule do not applied due to my own pre-processing rule which removing word's extra character score +2, -2 in word "miss". Do not apply in Bahasa Indonesia.
Warning!
This is work in progress. Experimental for my Master Thesis
Source Code Asli
import pymongo, requests, json, urllib, re, collections from pymongo import MongoClient client = MongoClient('mongodb://localhost:27017/') #mongodb database and collection db = client.dataset_agnezmo dbPre = db.preprocessing def main(): ss = sentiStrength() sc = spellCheck() tweets = dbPre.find().skip(0).limit(100) for t in tweets: print ss.main(t['text']) print "=====================" print ss.getSentimenScore() main()
Test Kalimat
import requests, json, urllib, re, collections def main(): ss = sentiStrength() sc = spellCheck() t = 'film godzilla amat sangat menarik sekali' print ss.main(t) print "=====================" print ss.getSentimenScore() main()
Hasilnya:
film godzilla amat sangat menarik [3] sekali [score:-1,3][result: +positive] ===================== [Positive:1] [Negative:0] [Neutral:0]
Ubah Source Code
import argparse def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', default=, help='input filename') return parser.parse_args() def main(): args = parse_args() infile = args.infile filename = open(infile,'r') fcontent=filename.readlines() filename.close() ss = sentiStrength() sc = spellCheck() for t in fcontent: print ss.main(t) print "=====================" print ss.getSentimenScore() main()