nodejs ---- 起步

原创

ChaimChen 2021-11-12 15:29:03 ©著作权

文章标签 nodejs ide sed html ooc 文章分类 代码人生

©著作权归作者所有：来自51CTO博客作者ChaimChen的原创作品，请联系作者获取转载授权，否则将追究法律责任

跟着慕课敲了一波使用nodejs爬取慕课网

废话不多说，直接上代码，太晚了。安装的教程以及这篇的注释明天补上
爬取这个页面：https://www.imooc.com/learn/348
创建一个：crawler.js

var http = require("https")
var cheerio = require("cheerio")
var url = "https://www.imooc.com/learn/348"

function filterChapters(html){
    var $ = cheerio.load(html)
    var chapters = $(".chapter")

    var courseData = []

    chapters.each(function (item) {
        var chapter = $(this)
        var chapterTitle = chapter.find("h3").text()
        var videos = chapter.find(".video").children("li")
        var chapterData = {
            chapterTitle : chapterTitle,
            videos : []
        }
        videos.each(function (item) {
            var video = $(this).find(".J-media-item")
            var videoTitle = video.text()
            var id = video.attr("href").split("video/")[1]
            chapterData.videos.push({
                title: videoTitle,
                id: id
            })
        })
        courseData.push(chapterData)
    })
    return courseData
}

function printCourseInfo(courseData){
    courseData.forEach(function (item) {
        var chapterTitle = item.chapterTitle
        // console.log(chapterTitle + "\n")
        console.log(chapterTitle)

        item.videos.forEach(function (video) {
            // console.log(" 【" + video.id + "】 " + video.title +"\n")
            console.log("【" + video.id + "】" + video.title)
        })
    })
}

http.get(url, function (res) {
    var html = ""
    res.on("data", function (data) {
        html += data;
    })

    res.on("end", function () {
        var courseData = filterChapters(html)
        printCourseInfo(courseData)
    })
}).on("error", function () {
    console.log("获取失败！")
})

运行结果：
nodejs ---- 起步_ide