Python: browse-url.py
Revision as of 05:25, 6 February 2017 by Onnowpurbo (talk | contribs) (Created page with " from selenium import webdriver import sys, getopt import argparse firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference('permissions.default.image...")
from selenium import webdriver
import sys, getopt
import argparse
firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference('permissions.default.image', 2)
firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
driver = webdriver.Firefox(firefox_profile=firefox_profile)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', default=, help='input filename')
parser.add_argument('-o', '--outfile', default=, help='output filename')
return parser.parse_args()
def main():
args = parse_args()
outfile = args.outfile
infile = args.infile
with open(infile) as f:
content = f.read().splitlines()
f.close()
f = open(outfile,"w")
for u in content:
driver.get(u)
elems = driver.find_element_by_tag_name('body').text
f.write( elems.encode('ascii', 'ignore').decode('ascii') )
driver.close()
f.close()
f = open(outfile,"r")
lines = f.readlines()
f.close()
f = open(outfile,"w")
for line in lines:
if len(line.split())>5:
f.write(line)
f.close()
main()