另一个爬虫

2020-01-02 21:58:17 +08:00
parent 85691155d2
commit 0ee00e6bec
7 changed files with 243 additions and 5 deletions
--- a/bin/reptile/taduo_net/picture.js
+++ b/bin/reptile/taduo_net/picture.js
@@ -0,0 +1,40 @@
+const cheerio = require('cheerio');
+const axios = require('axios')
+let getscript = async (url) => {
+    let text;
+    url = "https://www.gufengmh8.com" + url
+    await axios.get(url).then((res)=>{
+        // text = res.text
+        // console.log(res.data)
+        text = res.data
+    })
+    let $ = cheerio.load(text);
+    let list;
+    $('script').each((index, ele) => {
+        // console.log(ele)
+        let text = $(ele).html()
+        if (text.search('chapterImages') != -1) {
+            eval(text)
+            var reg = /^http(s)?:\/\/(.*?)\//
+            imghost = reg.exec(pageImage)[2]
+            // imghost 图片域名
+            // chapterPath 图片基本链接path
+            // chapterImages 图片地址数组
+            // pageTitle 标题
+            // pageUrl 页面基础url
+            // prevChapterData 上一页信息 
+            // nextChapterData 下一页信息
+            // 页面地址为 基础url + 页信息.id
+            // console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
+            let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
+            let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
+            list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
+            // console.log(list)
+        }
+    })
+    return list;
+
+}
+
+// getscript('')
+module.exports = getscript