Python: browse-url.py
Jump to navigation
Jump to search
from selenium import webdriver import sys, getopt import argparse firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference('permissions.default.image', 2) firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false') driver = webdriver.Firefox(firefox_profile=firefox_profile) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', default=, help='input filename') parser.add_argument('-o', '--outfile', default=, help='output filename') return parser.parse_args() def main(): args = parse_args() outfile = args.outfile infile = args.infile with open(infile) as f: content = f.read().splitlines() f.close() f = open(outfile,"w") for u in content: driver.get(u) elems = driver.find_element_by_tag_name('body').text f.write( elems.encode('ascii', 'ignore').decode('ascii') ) driver.close() f.close() f = open(outfile,"r") lines = f.readlines() f.close() f = open(outfile,"w") for line in lines: if len(line.split())>5: f.write(line) f.close() main()