Python: Selenium Login ke Twitter
Jump to navigation
Jump to search
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import sys
import codecs
import pprint
import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.webdriver.support.ui as ui
import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class PythonOrgSearch(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
def test_search_in_python_org(self):
driver = self.driver
driver.get("https://twitter.com/")
driver.maximize_window()
username = driver.find_element_by_class_name("js-username-field")
password = driver.find_element_by_class_name("js-password-field")
username.send_keys("myusername")
password.send_keys("mypassword")
wait = ui.WebDriverWait(driver, 5)
driver.find_element_by_css_selector("button.submit.btn.primary-btn").click()
def tearDown(self):
#self.driver.close()
print "close?"
if __name__ == "__main__":
unittest.main()
Recipe 2
Logon on to twitter with Firefox (headlessly) and scrape followers from a user profile:
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display
def correct_url(url):
if not url.startswith("http://") and not url.startswith("https://"):
url = "http://" + url
return url
def scrollDown(browser, numberOfScrollDowns):
body = browser.find_element_by_tag_name("body")
while numberOfScrollDowns >=0:
body.send_keys(Keys.PAGE_DOWN)
numberOfScrollDowns -= 1
if numberOfScrollDowns % 10 == 0:
print 'remaining scroll downs ... {}'.format(numberOfScrollDowns)
return browser
def crawl_url(url, run_headless=True):
if run_headless:
display = Display(visible=0, size=(1024, 768))
display.start()
url = correct_url(url)
browser = webdriver.Firefox()
browser.get(url)
username = browser.find_element_by_class_name("js-username-field")
password = browser.find_element_by_class_name("js-password-field")
username.send_keys("username")
password.send_keys("password")
browser.find_element_by_css_selector("button.submit.btn.primary-btn").click()
while True:
target_set = set()
browser = scrollDown(browser, 500)
all_targets = browser.find_elements_by_class_name("u-linkComplex-target")
for a_target in all_targets:
target_set.add(a_target.text)
fo = open('followers.dat', 'w')
for target in target_set:
fo.write(target + '\n')
fo.close()
print 'wrote {} to file'.format(len(target_set))
browser.quit()
if __name__=='__main__':
url = "https://twitter.com/username/followers/"
crawl_url(url)
References
[TidbitsOfProgramming] Crawling Websites that Loads Contents