selenium应用指南

1.安装selenium,以管理员身份运行cmd,输入以下命名

pip install selenium


2.下载chromedriver(),放到python.exe同级目录中

selenium设置请求头

selenium设置phantomjs请求头:

from selenium import webdriver

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

dcap = dict(DesiredCapabilities.PHANTOMJS)

dcap["phantomjs.page.settings.userAgent"] = (

"Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36")

driver = webdriver.PhantomJS(desired_capabilities=dcap)

driver.get("https://httpbin.org/get?show_env=1")

driver.get_screenshot_as_file('01.png')

driver.quit()


phantomJS详细配置问题

隐式等待不一定靠谱,所以尽量使用python自身函接口

from selenium import webdriver

# 引入配置对象DesiredCapabilities

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

dcap = dict(DesiredCapabilities.PHANTOMJS)

#从USER_AGENTS列表中随机选一个浏览器头,伪装浏览器

dcap["phantomjs.page.settings.userAgent"] = (random.choice(USER_AGENTS))

# 不载入图片,爬页面速度会快很多

dcap["phantomjs.page.settings.loadImages"] = False

# 设置代理

service_args = ['--proxy=127.0.0.1:9999','--proxy-type=socks5']

#打开带配置信息的phantomJS浏览器

driver = webdriver.PhantomJS(phantomjs_driver_path, desired_capabilities=dcap,service_args=service_args)

# 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项

driver.set_page_load_timeout(10)

# 设置10秒脚本超时时间

driver.set_script_timeout(10)


携带cookie

from selenium import webdriver

browser = webdriver.Chrome()

url = "https://www.baidu.com/"

browser.get(url)

browser.delete_all_cookies()

browser.add_cookie({'name':'ABC','value':'DEF'})

input("查看效果")

browser.quit()


超时设置

from selenium import webdriver

d= webdriver.PhantomJS()

#这两种设置都进行才有效

d.set_page_load_timeout(10)

d.set_script_timeout(10)