core/bin/reptile/taduo_net/find.js

126 lines
4.3 KiB
JavaScript

const puppeteer = require('puppeteer');
const cheerio = require('cheerio')
let mainurl = "http://www.taduo.net/"
let gethtml = (name) => {
return new Promise(async (res, rej) => {
puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox','--blink-settings=imagesEnabled=false']
}).then(async (browser) => {
const page = await browser.newPage();
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: ()=> false });
});
// await page.goto(mainurl);
try {
await page.goto(mainurl);
} catch (error) {
console.log("err")
browser.close()
res([])
}
await page.type(".searchtext1", name);
// const usernameValue = await page.$eval(".searchtext1", el => el.value);
await page.click("#btnSend1")
let openpages = []
let num = 0;
let set = setInterval(async () => {
await browser.pages().then((res) => {
// console.log(res.length)
openpages = res
})
try {
await openpages[2].content().then((r) => {
// console.log(r)
if(r.indexOf("没有搜索到相关的内容") != -1 || r.indexOf("您当前的位置") !=-1){
res(r)
clearInterval(set)
browser.close()
}else{
throw error;
}
})
} catch (error) {
num++
// console.log(num)
if(num > 10){
res([])
clearInterval(set)
browser.close()
}
// console.log(err)
// res([])
}
}, 500);
// setTimeout(async () => {
// await browser.pages().then((res) => {
// // console.log(res.length)
// openpages = res
// })
// try {
// await openpages[2].content().then((r) => {
// res(r)
// })
// browser.close()
// } catch (error) {
// res([])
// browser.close()
// }
// }, 1500);
})
})
}
let get = (name) => {
return new Promise(async (res,rej)=>{
try {
let html;
await gethtml(name).then((res) => {
html = res
})
// console.log(html)
let $ = cheerio.load(html)
let list = []
await $("#dmList ul li").each((index, ele) => {
let item = cheerio.load($(ele).html(), { decodeEntities: false })
let name = item("dl dt a").html()
let date = item("dl dd p").eq(0).html()
let url = mainurl + item("dl dt a").attr("href")
let update = item("dl dd p").eq(1).html().replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
date = date.replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
// console.log({
// date,
// name,
// url,
// update
// })
let l = {
date,
name,
url,
update
}
list.push(l)
})
// console.log(list)
list = {name:"塔多漫画",list}
res(list);
} catch (error) {
rej([])
}
})
}
module.exports = get