Python: Selenium Login ke Twitter
Jump to navigation
Jump to search
#!/usr/bin/env python # -*- coding: UTF-8 -*- import os import sys import codecs import pprint import unittest from selenium import webdriver from selenium.webdriver.common.keys import Keys import selenium.webdriver.support.ui as ui import logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) class PythonOrgSearch(unittest.TestCase): def setUp(self): self.driver = webdriver.Firefox() def test_search_in_python_org(self): driver = self.driver driver.get("https://twitter.com/") driver.maximize_window() username = driver.find_element_by_class_name("js-username-field") password = driver.find_element_by_class_name("js-password-field") username.send_keys("myusername") password.send_keys("mypassword") wait = ui.WebDriverWait(driver, 5) driver.find_element_by_css_selector("button.submit.btn.primary-btn").click() def tearDown(self): #self.driver.close() print "close?" if __name__ == "__main__": unittest.main()
Recipe 2
Logon on to twitter with Firefox (headlessly) and scrape followers from a user profile:
#!/usr/bin/env python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys from pyvirtualdisplay import Display def correct_url(url): if not url.startswith("http://") and not url.startswith("https://"): url = "http://" + url return url def scrollDown(browser, numberOfScrollDowns): body = browser.find_element_by_tag_name("body") while numberOfScrollDowns >=0: body.send_keys(Keys.PAGE_DOWN) numberOfScrollDowns -= 1 if numberOfScrollDowns % 10 == 0: print 'remaining scroll downs ... {}'.format(numberOfScrollDowns) return browser def crawl_url(url, run_headless=True): if run_headless: display = Display(visible=0, size=(1024, 768)) display.start() url = correct_url(url) browser = webdriver.Firefox() browser.get(url) username = browser.find_element_by_class_name("js-username-field") password = browser.find_element_by_class_name("js-password-field") username.send_keys("username") password.send_keys("password") browser.find_element_by_css_selector("button.submit.btn.primary-btn").click() while True: target_set = set() browser = scrollDown(browser, 500) all_targets = browser.find_elements_by_class_name("u-linkComplex-target") for a_target in all_targets: target_set.add(a_target.text) fo = open('followers.dat', 'w') for target in target_set: fo.write(target + '\n') fo.close() print 'wrote {} to file'.format(len(target_set)) browser.quit() if __name__=='__main__': url = "https://twitter.com/username/followers/" crawl_url(url)
References
[TidbitsOfProgramming] Crawling Websites that Loads Contents