另一个爬虫
This commit is contained in:
40
bin/reptile/taduo_net/picture.js
Normal file
40
bin/reptile/taduo_net/picture.js
Normal file
@@ -0,0 +1,40 @@
|
||||
const cheerio = require('cheerio');
|
||||
const axios = require('axios')
|
||||
let getscript = async (url) => {
|
||||
let text;
|
||||
url = "https://www.gufengmh8.com" + url
|
||||
await axios.get(url).then((res)=>{
|
||||
// text = res.text
|
||||
// console.log(res.data)
|
||||
text = res.data
|
||||
})
|
||||
let $ = cheerio.load(text);
|
||||
let list;
|
||||
$('script').each((index, ele) => {
|
||||
// console.log(ele)
|
||||
let text = $(ele).html()
|
||||
if (text.search('chapterImages') != -1) {
|
||||
eval(text)
|
||||
var reg = /^http(s)?:\/\/(.*?)\//
|
||||
imghost = reg.exec(pageImage)[2]
|
||||
// imghost 图片域名
|
||||
// chapterPath 图片基本链接path
|
||||
// chapterImages 图片地址数组
|
||||
// pageTitle 标题
|
||||
// pageUrl 页面基础url
|
||||
// prevChapterData 上一页信息
|
||||
// nextChapterData 下一页信息
|
||||
// 页面地址为 基础url + 页信息.id
|
||||
// console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
|
||||
let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
|
||||
let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
|
||||
list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
|
||||
// console.log(list)
|
||||
}
|
||||
})
|
||||
return list;
|
||||
|
||||
}
|
||||
|
||||
// getscript('')
|
||||
module.exports = getscript
|
||||
Reference in New Issue
Block a user