Python: search-google.py
Revision as of 06:24, 30 January 2017 by Onnowpurbo (talk | contribs) (Created page with " #!/usr/bin/env python2 # -*- coding: utf8 -*- import sys import time import random import argparse from selenium import webdriver from selenium.webdriver.support.u...")
#!/usr/bin/env python2
# -*- coding: utf8 -*-
import sys
import time
import random
import argparse
from selenium import webdriver
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.common.exceptions import NoSuchFrameException
from selenium.webdriver.common.keys import Keys
# If this script no longer fetches any results check the XPath
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--search', help='Enter the search term')
parser.add_argument('-p', '--pages', default='1', help='Enter how many pages to scrape (1 page = 100 results)')
return parser.parse_args()
def start_browser():
br = webdriver.Firefox()
br.implicitly_wait(10)
return br
def get_ua():
ua_list = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0']
ua = random.choice(ua_list)
return ua
def scrape_results(br):
# Xpath will find a subnode of h3, a[@href] specifies that we only want <a> nodes with
# any href attribute that are subnodes of