Python: search-google.py
Revision as of 06:24, 30 January 2017 by Onnowpurbo (talk | contribs) (Created page with " #!/usr/bin/env python2 # -*- coding: utf8 -*- import sys import time import random import argparse from selenium import webdriver from selenium.webdriver.support.u...")
#!/usr/bin/env python2 # -*- coding: utf8 -*- import sys import time import random import argparse from selenium import webdriver from selenium.webdriver.support.ui import Select, WebDriverWait from selenium.common.exceptions import NoSuchFrameException from selenium.webdriver.common.keys import Keys # If this script no longer fetches any results check the XPath def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--search', help='Enter the search term') parser.add_argument('-p', '--pages', default='1', help='Enter how many pages to scrape (1 page = 100 results)') return parser.parse_args() def start_browser(): br = webdriver.Firefox() br.implicitly_wait(10) return br def get_ua(): ua_list = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0'] ua = random.choice(ua_list) return ua def scrape_results(br): # Xpath will find a subnode of h3, a[@href] specifies that we only want <a> nodes with
# any href attribute that are subnodes of