core/bin/reptile/taduo_net/find.js

71 lines
2.2 KiB
JavaScript
Raw Normal View History

2020-01-03 14:29:01 +08:00
const puppeteer = require('puppeteer');
const cheerio = require('cheerio')
let mainurl = "http://www.taduo.net/"
let gethtml = (name) => {
return new Promise(async (res, rej) => {
puppeteer.launch({
headless: true,
2020-01-03 15:05:12 +08:00
args: ['--no-sandbox', '--disable-setuid-sandbox']
2020-01-03 14:29:01 +08:00
}).then(async (browser) => {
const page = await browser.newPage();
2020-01-02 21:58:17 +08:00
2020-01-03 14:29:01 +08:00
await page.goto(mainurl);
await page.type(".searchtext1", name);
// const usernameValue = await page.$eval(".searchtext1", el => el.value);
await page.click("#btnSend1")
let openpages = []
setTimeout(async () => {
await browser.pages().then((res) => {
// console.log(res.length)
openpages = res
})
await openpages[2].content().then((r) => {
res(r)
})
// console.log(a)
// await page.content().then((r)=>{
// res(r)
// })
browser.close()
}, 1500);
})
2020-01-02 21:58:17 +08:00
})
}
2020-01-03 14:29:01 +08:00
let get = async (name) => {
2020-01-02 21:58:17 +08:00
let html;
2020-01-03 14:29:01 +08:00
await gethtml(name).then((res) => {
html = res
})
// console.log(html)
let $ = cheerio.load(html)
let list = []
await $("#dmList ul li").each((index, ele) => {
let item = cheerio.load($(ele).html(), { decodeEntities: false })
let name = item("dl dt a").html()
let date = item("dl dd p").eq(0).html()
let url = mainurl + item("dl dt a").attr("href")
let update = item("dl dd p").eq(1).html().replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
date = date.replace("<em>", "").replace('</em><span class="red">', "").replace('</span></p>', "").replace("</span>", "")
// console.log({
// date,
// name,
// url,
// update
// })
let l = {
date,
name,
url,
update
}
list.push(l)
})
// console.log(list)
return list;
2020-01-02 21:58:17 +08:00
}
2020-01-03 14:29:01 +08:00
module.exports = get