const puppeteer = require('puppeteer-core');
const fs = require('fs')
const cheerio = require('cheerio')
// 查找 谷歌浏览器位置
const findChrome = require('carlo/lib/find_chrome');
let host = 'https://www.wukong.com';
let keyword = '买新房';
async function getList(){
// 获取谷歌浏览器位置
const chromePath = await findChrome({});
// 初始化一个 浏览器
const browser = await puppeteer.launch({
executablePath:chromePath.executablePath,
headless: false, // 无头 默认值是 true
// 设置网页视图区域大小
defaultViewport:{
width:1280,
height: 1200
}
});
const page = await browser.newPage() // 新 实例出一个页面
await page.goto(host)
// 选择器, 搜索内容, 输入的间隔时间
await page.type('.input-group input',keyword,{delay: 150})
// 点击 选择器
await page.click('.btn-submit');
// 等待 类 出现的时候 执行后续操作
await page.waitFor('.question-v3 .question-title h2 a')
await page.waitFor(3000)
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 200;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
console.log(scrollHeight);
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 200);
});
});
await page.waitFor(1000)
let ct = await page.content(); // 获取真个 页面 html 字符串形式
let $ = cheerio.load(ct);
let links = []
$('.question-v3 .question-title h2 a').each((i,item)=>{
// 获取 链接 地址
links.push('https://www.wukong.com' + $(item).attr('href'))
})
console.log(links);
console.log(links.length);
return links;
// 关闭浏览器
// await browser.close();
}
module.exports = {
getList
}
const puppeteer = require('puppeteer');
const fs = require('fs');
const { getList } = require('./wukong')
function getInfo(url){
return new Promise(async (resolve,reject)=>{
const browser = await puppeteer.launch();
const page = await browser.newPage();
page.on('load', async ()=>{
let desc = await page.evaluate(() => {
const images = document.querySelectorAll('.answer-text-full');
let data = Array.prototype.map.call(images,img => {
return img.innerHTML
})
return {
title: document.querySelector('div.question.question-single > div > h1 > span').innerHTML,
data : data.join(',')
}
});
resolve(desc)
})
await page.goto(url);
await browser.close();
})
}
async function getlink(){
let arr = await getList();
// 按顺序 依次执行 -> 详情采集
let i=1;
arr.reduce((res,url)=>{
return res.then(()=>{
return new Promise(async (resolve)=>{
let res = await getInfo(url);
console.log('完成第' + i++ + '个文章采集');
fs.writeFileSync('./article/' + res.title + '.json', JSON.stringify(res));
resolve();
})
})
},Promise.resolve())
}
getlink()
-----------------------
taobaoi
const puppeteer = require('puppeteer-core');
const fs = require('fs')
const cheerio = require('cheerio')
// 查找 谷歌浏览器位置
const findChrome = require('carlo/lib/find_chrome');
async function fetchPrice(pid){
let price = 0;
try {
// 获取谷歌浏览器位置
const chromePath = await findChrome({});
// 初始化一个 浏览器
const browser = await puppeteer.launch({
executablePath:chromePath.executablePath,
headless: true, // 无头 默认值是 true
timeout:12000
// // 设置网页视图区域大小
// defaultViewport:{
// width:1280,
// height: 1200
// }
});
const page = await browser.newPage() // 新 实例出一个页面
await page.goto(`https://detail.m.tmall.com/item.htm?id=${pid}`) ;
// 等待 必要元素加载完成
await page.waitForSelector('#J_mod4 > div > div')
// 模拟点击按钮
// await page.click("#J_StrPriceModBox > dd > span")
// // 等待 弹窗选择器 加载完成
// await page.waitForSelector('#J_StrPriceModBox > dd > span')
price = await page.evaluate(()=>{
// 在 页面 内 模拟 规格按钮点击
// document.querySelector('#J_StrPriceModBox > dd > span').click();
return document.querySelector('#J_mod4 > div > div > span > span').innerHTML;
})
// 截图
// await page.screenshot({path:'./example.png'})
// 关闭浏览器
// await browser.close();
}catch(err){
console.log(err);
}
return price;
}
async function main(){
// https://detail.tmall.com/item.htm?id=642151127964&ali_refid=a3_430673_1006:1104553693:N:emtiAWsF8%20zhhxaiwzc0Aw==:4ea4a8f474df079843afa39157963fb3&ali_trackid=1_4ea4a8f474df079843afa39157963fb3&spm=a2e0b.20350158.31919782.1
const price = await fetchPrice('642151127964');
console.log(`price is ${price}`);
}
main()