Python: browse-url.py

From OnnoWiki
Jump to navigation Jump to search
from selenium import webdriver
import sys, getopt
import argparse

firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference('permissions.default.image', 2)
firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
driver = webdriver.Firefox(firefox_profile=firefox_profile)

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', default=, help='input filename')
    parser.add_argument('-o', '--outfile', default=, help='output filename')
    return parser.parse_args()

def main():
    args = parse_args()
    outfile = args.outfile
    infile = args.infile 

    with open(infile) as f:
        content = f.read().splitlines()
    f.close()

    f = open(outfile,"w")
    for u in content:
        driver.get(u)
        elems = driver.find_element_by_tag_name('body').text
        f.write( elems.encode('ascii', 'ignore').decode('ascii') )
    driver.close()
    f.close()

    f = open(outfile,"r")
    lines = f.readlines()
    f.close()

    f = open(outfile,"w")
    for line in lines:
      if len(line.split())>5:
         f.write(line)
    f.close()

main()