core/bin/reptile/taduo_net/find.js

112 lines
3.6 KiB
JavaScript
Raw Normal View History

2020-01-03 14:29:01 +08:00
const puppeteer = require('puppeteer');
const cheerio = require('cheerio')
let mainurl = "http://www.taduo.net/"
let gethtml = (name) => {
return new Promise(async (res, rej) => {
puppeteer.launch({
headless: true,
2020-01-04 00:03:13 +08:00
args: ['--no-sandbox', '--disable-setuid-sandbox']
2020-01-03 14:29:01 +08:00
}).then(async (browser) => {
const page = await browser.newPage();
2020-01-02 21:58:17 +08:00
2020-01-03 14:29:01 +08:00
await page.goto(mainurl);
await page.type(".searchtext1", name);
// const usernameValue = await page.$eval(".searchtext1", el => el.value);
await page.click("#btnSend1")
let openpages = []
2020-01-03 23:57:08 +08:00
let num = 0;
let set = setInterval(async () => {
2020-01-03 14:29:01 +08:00
await browser.pages().then((res) => {
// console.log(res.length)
openpages = res
})
2020-01-03 23:57:08 +08:00
try {
await openpages[2].content().then((r) => {
// console.log(r)
if(r.indexOf("没有搜索到相关的内容") != -1 || r.indexOf("您当前的位置") !=-1){
res(r)
clearInterval(set)
browser.close()
}else{
throw error;
}
})
} catch (error) {
num++
console.log(num)
if(num > 10){
res([])
clearInterval(set)
browser.close()
}
// console.log(err)
// res([])
}
}, 500);
// setTimeout(async () => {
// await browser.pages().then((res) => {
// // console.log(res.length)
// openpages = res
// })
// try {
// await openpages[2].content().then((r) => {
// res(r)
// })
// browser.close()
// } catch (error) {
// res([])
// browser.close()
// }
// }, 1500);
2020-01-03 14:29:01 +08:00
})
2020-01-02 21:58:17 +08:00
})
}
2020-01-03 14:29:01 +08:00
2020-01-03 23:57:08 +08:00
let get = (name) => {
return new Promise(async (res,rej)=>{
let html;
await gethtml(name).then((res) => {
html = res
})
// console.log(html)
let $ = cheerio.load(html)
let list = []
await $("#dmList ul li").each((index, ele) => {
let item = cheerio.load($(ele).html(), { decodeEntities: false })
let name = item("dl dt a").html()
let date = item("dl dd p").eq(0).html()
let url = mainurl + item("dl dt a").attr("href")
let update = item("dl dd p").eq(1).html().replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
date = date.replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
// console.log({
// date,
// name,
// url,
// update
// })
let l = {
date,
name,
url,
update
}
list.push(l)
})
// console.log(list)
list = {name:"塔多漫画",list}
res(list);
2020-01-03 14:29:01 +08:00
})
2020-01-03 23:57:08 +08:00
2020-01-02 21:58:17 +08:00
}
2020-01-03 14:29:01 +08:00
module.exports = get