首先得安装浏览器:
yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
查看下你安装的版本
yum list | grep chrome
然后根据网址:https://npm.taobao.org/mirrors/chromedriver 下载对应版本的驱动
下载完后记得解压
完事之后贴个案例:
maven
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.141.59</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-remote-driver</artifactId>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-api</artifactId>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-support</artifactId>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.9</version>
</dependency>
java代码:
public static void main(String[] args) throws Exception{
SpringApplication.run(JavapcApplication.class, args);
System.setProperty(
"webdriver.chrome.driver",
"/home/software/chromedriver"); //指定驱动解压的地址
ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.addArguments("--headless");//无页面使用
chromeOptions.addArguments("--no-sandbox");//解决DevToolsActivePort文件不存在的报错
chromeOptions.addArguments("window-size=1920x3000");//指定浏览器分辨率
chromeOptions.addArguments("--disable-gpu");//谷歌文档提到需要加上这个属性来规避bug
chromeOptions.addArguments("blink-settings=imagesEnabled=false");//不加载图片, 提升速度
chromeOptions.setBinary("/usr/bin/google-chrome");//手动指定使用的浏览器位置
ChromeDriver driver = new ChromeDriver(chromeOptions);
driver.get("http://top.youku.com/rank/detail/?m=97&type=1&");
// 休眠1s,为了让js执行完
Thread.sleep(1000L);
// 网页源码
String pageSource = driver.getPageSource();
//使用xpath解析
HtmlCleaner hc = new HtmlCleaner();
TagNode tn = hc.clean(pageSource);
Object[] objects = tn.evaluateXPath("//div[@class='exp-left']/dl/dd[1]/a/text()");
for (Object object : objects) {
System.out.println(object);
}
}