const puppeteer = require('puppeteer'); const cheerio = require('cheerio') let mainurl = "http://www.pufei8.com" let gethtml = (name) => { return new Promise(async (res, rej) => { puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox','--blink-settings=imagesEnabled=false'] }).then(async (browser) => { const page = await browser.newPage(); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: ()=> false }); }); try { await page.goto(mainurl); } catch (error) { browser.close() res([]) } await page.click(".searchtext1") await page.type(".searchtext1", name); // const usernameValue = await page.$eval(".searchtext1", el => el.value); await page.click("#btnSend1") let openpages = [] let num = 0; let set = setInterval(async () => { await browser.pages().then((res) => { // console.log(res.length) openpages = res }) try { await openpages[2].content().then((r) => { // console.log(r) if (r.indexOf("没有搜索到相关的内容") != -1 || r.indexOf("您当前的位置") != -1) { // console.log(r) res(r) clearInterval(set) browser.close() } else { throw error; } }) } catch (error) { num++ // console.log(num) if (num > 10) { res([]) clearInterval(set) browser.close() } // console.log(err) // res([]) } }, 500); // setTimeout(async () => { // await browser.pages().then((res) => { // // console.log(res.length) // openpages = res // }) // try { // await openpages[2].content().then((r) => { // // console.log(r) // res(r) // }) // browser.close() // } catch (error) { // res([]) // browser.close() // } // // console.log(a) // // await page.content().then((r)=>{ // // res(r) // // }) // }, 2000); }) }) } let get = (name) => { return new Promise(async (res, rej) => { try { let html; await gethtml(name).then((res) => { html = res }) // console.log(html) let $ = cheerio.load(html) let list = [] await $("#dmList ul li").each((index, ele) => { // console.log(index,123) let item = cheerio.load($(ele).html(), { decodeEntities: false }) let name = item("dl dt a").html() let date = item("dl dd p").eq(0).html() let url = mainurl + item("dl dt a").attr("href") let update = item("dl dd p").eq(1).html().replace("", "").replace('', "").replace('

', "").replace("", "") date = date.replace("", "").replace('', "").replace('

', "").replace("", "") // console.log({ // date, // name, // url, // update // }) let l = { date, name, url, update } list.push(l) }) // console.log(list) list = { name: "扑飞漫画", list } res(list); } catch (error) { rej([]) } }) } module.exports = get