安装selenium

selenium可以直接可以用pip安装。

python -m pip install selenium

安装chromedriver

首先查看Chrome版本,在浏览器中输入chrome://version/ 下载chromedriver的版本一定要与Chrome的版本一致。 有两个下载地址:

1、http://chromedriver.storage.googleapis.com/index.html
2、https://npm.taobao.org/mirrors/chromedriver/

配置

解压压缩包,找到chromedriver.exe复制到chrome的安装目录(其实也可以随便放一个文件夹)。

  1. 复制chromedriver.exe文件的路径并加入到电脑的环境变量中去,右击我的电脑,属性,高级设置,环境变量,
  2. 进入环境变量编辑界面,添加到用户变量即可,双击PATH, 将文件位置C:\Program Files (x86)\Google\Chrome\Application\添加到后面,与前面的用分号分隔。

不配置环境变量也可以,需要在脚本中指定

chrome_driver = r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
browser = webdriver.Chrome(executable_path = chrome_driver)

验证是否安装成功

完成后在cmd下输入chromedriver验证是否安装成功.

测试

from selenium import webdriver
import time

def main():
    #chrome_driver = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'  #chromedriver的文件位置
    #browser = webdriver.Chrome(executable_path = chrome_driver)
	browser = webdriver.Chrome()
    browser.get('https://www.baidu.com')
    time.sleep(5)
    browser.quit()
'''
open url with chrome and right-click an element then select "Inspect"
go to "Elements" window and right-click the selected source code
then select "Copy", "Copy selector" or "Copy XPath" or "Copy full XPath"
'''
if __name__ == '__main__':
    main()

selenium元素定位技巧

chrome打开页面右击要定位的元素,选择Inspect(检视),会自动打开并定位到Elements(元素)页面源代码, 右击元素源代码可以选择复制,复制css选择器或XPath或full XPath等;

另外,如有页面跳转,请注意增加一定的延时或智能等待;

selenium元素定位方法分析

以某度为例 输入框

<input id="kw" name="wd" class="s_ipt" value="" maxlength="255" autocomplete="off">

搜索按钮

<span class="bg s_btn_wr"><input type="submit" id="su" value="百度一下" class="bg s_btn btn_h btnhover"></span>

find_element_by_xpath("xpath")

准确定位的首选方式

可以直接在chrome中复制XPath,如 //*[@id="kw"]

find_element_by_xpath(r"//*[@id="kw"]")

一般不需要使用full XPath如 /html/body/div[1]/div[1]/div[5]/div/div/form/span[1]/input

find_element_by_css_selector

单属性查找

1.用 标签名 定位查找

driver.find_element_by_css_selector("input")

2.用 id 属性定位查找

driver.find_element_by_css_selector("kw")

3.用 class 属性定位查找

driver.find_element_by_css_selector("s_ipt")

4.其他属性定位

driver.find_element_by_css_selector("[name="wd"]")

组合属性查找

1. 标签名及id属性值组合定位

driver.find_element_by_css_selector("input#kw")

2. 标签名及class属性值组合定位

driver.find_element_by_css_selector("input.s_ipt")

3. 标签名及属性(含属性值)组合定位

driver.find_element_by_css_selector("input[name="wd"]")

4. 标签及属性名组合定位

driver.find_element_by_css_selector("input[name]")

5. 多个属性组合定位

driver.find_element_by_css_selector("[class="s_ipt"][name="wd"]")

模糊匹配

1. class拥有多个属性值,只匹配其中一个时

driver.find_element_by_css_selector("input[class ~= "bg"]")

2. 匹配以字符串开头的属性值

driver.find_element_by_css_selector("input[class ^= "bg"]")

3. 匹配以字符串结尾的属性值

driver.find_element_by_css_selector("input[class $= "s_btn"]")

4. 匹配被下划线分隔的属性值

driver.find_element_by_css_selector("input[class |= "s"]")

层级匹配

1.直接子元素层级关系,input为span的直接子元素(用 > 表示)

driver.find_element_by_css_selector(".bg.s_btn_wr > input") #class为bg和s_btn_wr 的span标签的子元素input

2.只要元素包含在父元素里面,不一定是直接子元素,用空格隔开,如图一所示,form 下面的 span 里面的input

driver.find_element_by_css_selector("#form input") #id是form的form标签里面的input标签

3.多级关系

driver.find_element_by_css_selector("#form > span > input") #id是form的form标签下面的span标签的下面的input标签

其他

  • p:nth-child(1) # 选择第一个p标签,还可写为
  • p:first-child
  • p:nth-last-child(1) # 选择倒数第一个p标签(要保证最后一个标签是p)
  • p:only-child # 唯一的p标签

find_element_by_id("id_vaule")

直接填入元素的id即可 driver.find_element_by_id("kw")

find_element_by_name("name_vaule")

填入元素的name值 driver.find_element_by_name("wd")

find_element_by_link_text("text_vaule")

只对被a标签包围的标签有效

<a class="c-color-gray2" href="http://ir.baidu.com" target="_blank">About Baidu</a>

find_element_by_link_text("About Baidu")

 

打开并登陆某搜索引擎示例

#! /usr/bin/env python
# coding=utf-8

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains

import random
import time

try: input = raw_input
except: pass

class MyBrowser(object):
    def __init__(self, url, exe_driver=''):
        print("url: %s" % url)
        print("driver: %s" % exe_driver)
        if exe_driver:
            self.driver = webdriver.Chrome(executable_path = exe_driver)
        else:
            self.driver = webdriver.Chrome()
        self.driver.set_page_load_timeout(5)
        self.driver.set_script_timeout(5)#这两种设置都进行才有效
        try:
            self.driver.get(url)
        except Exception as reason:
            print("error: %s" % reason)
            self.driver.execute_script('window.stop()')

    def quit(self):
        self.driver.quit()

    def wait_until_with_timeout(self, condition, timeout=5):
        try:
            WebDriverWait(self.driver, timeout).until(condition)
        except Exception as reason:
            print("WebDriverWait error: %s" % reason)
            return

    def random_sleep(self):
        time.sleep(random.randint(5, 10)/10.0)

    def switch_to_active_window(self):
        self.driver.switch_to.active_element

    def click(self, element):
        if element:
            element.click()

    def alert_click(self, accept=True):
        self.wait_until_with_timeout(expected_conditions.alert_is_present())
        #element = self.driver.switch_to.active_element
        #element.click()
        #return
        alert = self.driver.switch_to.alert #切到弹出框
        if alert:
            print("alert: %s, accept:%s" % (alert.text, accept))
            if accept:
                alert.accept() #确定
            else:
                alert.dismiss() #取消

    def swipe_screen(self, start, dx, dy):
        # 模拟滑动屏幕
        if not start:
            return
        print("swipe_screen %s to: %d,%d" % (start.text, dx, dy))
        # 点击第一个点
        ActionChains(self.driver).move_to_element(start).click_and_hold().perform()
        time.sleep(0.2)
        # 拖动
        times = 0
        if times == 0:
            ActionChains(self.driver).move_by_offset(dx, dy).perform()
        for i in range(times):
            print("swipe_screen %s to: %d,%d" % (start.text, dx, dy))
            ActionChains(self.driver).move_by_offset(dx/times, dy/times).perform()
            time.sleep(1/times)
        # 松开鼠标
        ActionChains(self.driver).release().perform()

    def swipe_screen(self, start, end):
        if not start:
            return
        if not end:
            return
        self.swipe_screen(start, end.location['x'], end.location['y'])

    def hover(self, by, value):
        element = self.find_element(by, value)
        if element:
            ActionChains(self.driver).move_to_element(element).perform()
        return element

    def find_element(self, by, value):
        '''
        open url with chrome and right-click an element then select "Inspect"
        go to "Elements" window and right-click the selected source code
        then select "Copy", "Copy selector" or "Copy XPath" or "Copy full XPath"
        '''
        print("find element by:%s, value:%s" % (by, value))
        try:
            if (by == "id"):
                element = self.driver.find_element_by_id(value)
            elif (by == "name"):
                element = self.driver.find_element_by_name(value)
            elif (by == "xpath"):
                #"Copy XPath" or "Copy full XPath"
                element = self.driver.find_element_by_xpath(value)
            elif (by == "classname"):
                element = self.driver.find_element_by_class_name(value)
            elif (by == "css"):
                #"Copy selector"
                element = self.driver.find_element_by_css_selector(value)
            elif (by == "link_text"):
                #should be included by tag <a></a>
                element = self.driver.find_element_by_link_text(value)
            else:
                print("unknown method:%s, please check" % by)
                return None
        except:
            return None
        if element:
            ""
            #print("tag:%s, text:%s, loc:%s, size:%s" % (element.tag_name, element.text, element.location, element.size))
        return element

def open():
    chrome_driver = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
    url = 'https://www.baidu.com'

    # create browser
    browser = MyBrowser(url, chrome_driver)
    condition = expected_conditions.title_is("百度一下,你就知道")
    browser.wait_until_with_timeout(condition)

    return browser

def alert_test():
    chrome_driver = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
    url = r'D:\selenium\alert.txt.html'
    '''
<html>
<head>
	<title>alert test</title>
	<script>
		function show(cmd, value) {
			var id = "show_result";
			var inpObj = document.getElementById(id);
			document.getElementById(id).innerHTML = cmd + ": " + value;
		}
		function test(cmd, str) {
			if (cmd == "alert") {
				ack = alert(str);
				return;
			} else if (cmd == "confirm"){
				ack = confirm(str);
			} else {
				ack = prompt(str);
			}
			show(cmd, ack);
		}
	</script>
</head>
<body>
	<h1>alert test</h1>
	<button id="btn1" onclick="test('alert', 'hello alert')">alert</button>
	<button id="btn2" onclick="test('confirm', 'hello confirm')">confirm</button>
	<button id="btn3" onclick="test('prompt', 'hello prompt')">prompt</button>
	<p id="show_result"></p>
</body>
</html>
    '''
    # create browser
    browser = MyBrowser(url, chrome_driver)
    condition = expected_conditions.title_is("alert test")
    browser.wait_until_with_timeout(condition)

    browser.click(browser.find_element("xpath", '//*[@id="btn1"]'))
    browser.alert_click()
    time.sleep(1)
    browser.click(browser.find_element("xpath", '//*[@id="btn2"]'))
    browser.alert_click(True)
    time.sleep(1)
    browser.click(browser.find_element("xpath", '//*[@id="btn3"]'))
    browser.alert_click(False)
    time.sleep(5)

def login(browser):
    username = 'test'
    password = '123456'

    # btn_login selector: #u1 > a
    btn_login = browser.find_element('css', '#u1 > a')
    browser.click(btn_login)
    time.sleep(1)

    # btn_longin_by_username xpath: //*[@id="TANGRAM__PSP_11__footerULoginBtn"]
    by, value = locator = ('xpath', r'//*[@id="TANGRAM__PSP_11__footerULoginBtn"]')
    condition = expected_conditions.visibility_of_element_located(locator)
    browser.wait_until_with_timeout(condition)
    btn_longin_by_username = browser.find_element(by, value)
    print(btn_longin_by_username.text)
    btn_longin_by_username.click()

    # input_username xpath: //*[@id="TANGRAM__PSP_11__userName"]
    by, value = locator = ('xpath', r'//*[@id="TANGRAM__PSP_11__userName"]')
    condition = expected_conditions.visibility_of_element_located(locator)
    browser.wait_until_with_timeout(condition)
    input_username = browser.find_element(by, value)
    input_username.send_keys(username)

    # input_password xpath: //*[@id="TANGRAM__PSP_11__password"]
    input_password = browser.find_element('xpath', r'//*[@id="TANGRAM__PSP_11__password"]')
    input_password.send_keys(password)

    # btn_login xpath: //*[@id="TANGRAM__PSP_11__submit"]
    btn_login = browser.find_element('xpath', r'//*[@id="TANGRAM__PSP_11__submit"]')
    print(btn_login.text)
    btn_login.click()

if __name__ == '__main__':
    browser = open()
    login(browser)
    #alert_test()
    time.sleep(20)
    #browser.quit()