前言

我是歌谣 微信公众号关注前端小歌谣一起学习前端知识 今天继续给大家讲解node中爬虫的讲解

安装

npm init -y
npm i cheerio

案例

var http=require("http")
var url=require("url")
var https=require("https")
var cheerio=require("cheerio")
http.createServer((req,res)=>{
     var urlobj=url.parse(req.url)
     res.writeHead(200,{
        "content-Type":"application/json;charset=utf-8",
        "access-control-allow-origin":"*"
     })
     switch(urlobj.pathname){
        case "/api/aaa":
            httpget((data)=>{
                res.end(spider(data))
            })
            break
        default:
            res.end("404")
     }
}).listen(3000)

function httpget(cb){
    var data=""
    https.get(`https://i.maoyan.com/`,(res)=>{
        res.on("data",(chunk)=>{
           data+=chunk
        })
        res.on("end",()=>{
           cb(data)
        })
    })
}


function spider(data){
    let $=cheerio.load(data)
    let $movielist=$("")
     console.log($)
     let movie=[]
     $movielist.each((index,value)=>{
        movie.push($(value).find(".title").text())
     })
    return JSON.stringify(movie)
}

运行结果

前端歌谣-第四拾柒课-node之http模块之爬虫_http