一、首先,引入依赖jar包


org.jsoup
jsoup
1.11.3

二、编写方法
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/*

  • 爬虫
    /
    public class MainActivity {
    /

    * Jsoup爬取糗事百科数据
    */
    public static List<Map<String,String>> spider(String url,int pages){

              List<Map<String,String>> list = new ArrayList<Map<String,String>>();
      try {
          Document mozilla = Jsoup.connect(url+"/page/"+pages)
                  .userAgent("Mozilla")
                  .timeout(3000)
                  .get();
          Elements select1 = mozilla.select("div.article.block");
          for (Element element : select1) {
              Document parse = Jsoup.parse(element.toString());
              Elements select = parse.select("a h2");//作者
              Elements select2 = parse.select("a img");//作者头像
              Elements select3 = parse.select("a.contentHerf");//内容
              Elements select4 = parse.select("div.thumb img");//内容图片
              Elements select5 = parse.select("span.stats-vote i");//赞数量
              Map<String,String> map = new HashMap<String, String>();
                          map.put("author", select.text());//作者
                          map.put("author_img", select2.size()>0?"http:"+select2.attr("src"):"");//作者头像
                          map.put("content", select3.text());//内容
                          map.put("content_img",  select4.size()>0?"http:"+select4.attr("src"):"");//内容图片
                          map.put("number",  select5.text());//赞数量
                          list.add(map);
          }
          
      } catch (IOException e) {
              list = null;
      }
      return list;
      }
    

}

三,测试
String url = “https://www.qiushibaike.com/8hr/page/”;
List<Map<String, String>> list = MainActivity.spider(url, 1);