Python: Selenium Login ke Twitter

From OnnoWiki
Jump to navigation Jump to search


#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import sys
import codecs
import pprint

import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.webdriver.support.ui as ui

import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

class PythonOrgSearch(unittest.TestCase):

 def setUp(self):
  self.driver = webdriver.Firefox()

 def test_search_in_python_org(self):
  driver = self.driver
  driver.get("https://twitter.com/")

  driver.maximize_window()
  
  username = driver.find_element_by_class_name("js-username-field")
  password = driver.find_element_by_class_name("js-password-field")

  username.send_keys("myusername")
  password.send_keys("mypassword")

  wait = ui.WebDriverWait(driver, 5)
  driver.find_element_by_css_selector("button.submit.btn.primary-btn").click()
 

 def tearDown(self):
  #self.driver.close()
  print "close?"

if __name__ == "__main__":
 unittest.main()



Recipe 2

Logon on to twitter with Firefox (headlessly) and scrape followers from a user profile:

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display

def correct_url(url): 
 if not url.startswith("http://") and not url.startswith("https://"):
  url = "http://" + url
 return url

def scrollDown(browser, numberOfScrollDowns):
 body = browser.find_element_by_tag_name("body")
 while numberOfScrollDowns >=0:
  body.send_keys(Keys.PAGE_DOWN)
  numberOfScrollDowns -= 1
  if numberOfScrollDowns % 10 == 0:
   print 'remaining scroll downs ... {}'.format(numberOfScrollDowns)
 return browser

def crawl_url(url, run_headless=True):
 if run_headless:
  display = Display(visible=0, size=(1024, 768))
  display.start()

 url = correct_url(url)
 browser = webdriver.Firefox()
 browser.get(url)

 username = browser.find_element_by_class_name("js-username-field")
 password = browser.find_element_by_class_name("js-password-field")

 username.send_keys("username")
 password.send_keys("password")

 browser.find_element_by_css_selector("button.submit.btn.primary-btn").click() 

 while True:
  target_set = set()

  browser = scrollDown(browser, 500)

  all_targets = browser.find_elements_by_class_name("u-linkComplex-target")
  for a_target in all_targets:
   target_set.add(a_target.text) 

  fo = open('followers.dat', 'w')
  for target in target_set:
   fo.write(target + '\n')
  fo.close() 

  print 'wrote {} to file'.format(len(target_set)) 

 browser.quit()

if __name__=='__main__':
 url = "https://twitter.com/username/followers/"
 crawl_url(url)



References

   [TidbitsOfProgramming] Crawling Websites that Loads Contents



Referensi