另一个爬虫

2020-01-02 21:58:17 +08:00
parent 85691155d2
commit 0ee00e6bec
7 changed files with 243 additions and 5 deletions
--- a/bin/reptile/taduo_net/README.md
+++ b/bin/reptile/taduo_net/README.md
@@ -0,0 +1,4 @@
+古风漫画网
+www.gufengmh8.com
+find.js 查找功能
+section.js 获取章节列表
--- a/bin/reptile/taduo_net/config.js
+++ b/bin/reptile/taduo_net/config.js
@@ -0,0 +1,17 @@
+// import find from "./find"
+// import imglist from "./picture"
+// import section from "./section"
+find = require("./find")
+imglist = require("./picture")
+section = require("./section")
+let name = "塔多漫画"
+let version = "20.01.02"
+let type = 1
+module.exports = {
+    name,
+    version,
+    type,
+    find,
+    imglist,
+    section
+}
--- a/bin/reptile/taduo_net/find.js
+++ b/bin/reptile/taduo_net/find.js
@@ -0,0 +1,43 @@
+let axios = require('axios')
+const cheerio = require('cheerio');
+const qs = require('querystring');
+let getcontlist = async (text) => {
+    let $ = cheerio.load(text)
+    let list = []
+    $ =  cheerio.load($("#contList").eq(0).html())
+    // console.log($("li").eq(0).html())
+    $("li").each((index,ele)=>{
+        let j = cheerio.load($(ele).html(),{decodeEntities: false})
+        // console.log(unescape(j('.tt').eq(0).html()))
+        let obj = {};
+        obj.update = j('.tt').eq(0).html()
+        obj.name = j(".ell a").eq(0).html()
+        obj.date = j(".updateon").eq(0).html().replace(/\s+/g,"").replace('<em>1.0</em>','')
+        obj.url = j(".cover").eq(0).attr('href')
+        // console.log(obj)
+        list.push(obj)
+    })
+    return list
+}
+
+let gethtml = async (name, page) => {
+    name = qs.escape(name)
+    let url = `https://www.gufengmh8.com/search/?keywords=${name}&page=${page}`
+    // console.log(url)
+    let text = ""
+    await axios.get(url).then((a) => {
+        // console.log(a.text)
+        // res(a.text)
+        text = a.data
+    })
+    return await getcontlist(text)
+}
+
+let getlist = async (name) => {
+    let html;
+    html = await gethtml(name, 1)
+    // console.log(JSON.stringify(html))
+    return html;
+}
+// getlist("偷星九月天")
+module.exports = getlist
--- a/bin/reptile/taduo_net/picture.js
+++ b/bin/reptile/taduo_net/picture.js
@@ -0,0 +1,40 @@
+const cheerio = require('cheerio');
+const axios = require('axios')
+let getscript = async (url) => {
+    let text;
+    url = "https://www.gufengmh8.com" + url
+    await axios.get(url).then((res)=>{
+        // text = res.text
+        // console.log(res.data)
+        text = res.data
+    })
+    let $ = cheerio.load(text);
+    let list;
+    $('script').each((index, ele) => {
+        // console.log(ele)
+        let text = $(ele).html()
+        if (text.search('chapterImages') != -1) {
+            eval(text)
+            var reg = /^http(s)?:\/\/(.*?)\//
+            imghost = reg.exec(pageImage)[2]
+            // imghost 图片域名
+            // chapterPath 图片基本链接path
+            // chapterImages 图片地址数组
+            // pageTitle 标题
+            // pageUrl 页面基础url
+            // prevChapterData 上一页信息 
+            // nextChapterData 下一页信息
+            // 页面地址为 基础url + 页信息.id
+            // console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
+            let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
+            let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
+            list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
+            // console.log(list)
+        }
+    })
+    return list;
+
+}
+
+// getscript('')
+module.exports = getscript
--- a/bin/reptile/taduo_net/section.js
+++ b/bin/reptile/taduo_net/section.js
@@ -0,0 +1,39 @@
+let axios = require('axios')
+const cheerio = require('cheerio');
+
+let getsection = async (text)=>{
+    let $ = cheerio.load(text)
+    let list = [];
+    // console.log($(".comic-chapters ").eq(0).html())
+    $(".comic-chapters").each((index,ele)=>{
+        // $(ele)
+        // console.log($(ele).html())
+        let obj = {}
+        let j = cheerio.load($(ele).html(),{decodeEntities: false})
+        obj.title =  j(".pull-left").eq(0).html().replace('<span>','').replace('</span>','')
+        obj.list = []
+        j('li').each(function (index,ele){
+            let con = {}
+            con.url =  j(this).find("a").eq(0).attr('href')
+            con.title = j(this).find("a").eq(0).html().replace('<span>','').replace('</span>','').replace(/\s+/g,"").replace('\\n','')
+            obj.list.push(con)
+        })
+        // console.log(obj)
+        list.push(obj)
+    })
+    // console.log(list)
+    return list
+}
+
+let gethtml = async (url)=>{
+    let text;
+    await axios.get(url).then((res)=>{
+        text = res.data
+    })
+    let list = await getsection(text)
+    // console.log(JSON.stringify(list))
+    return list
+}
+
+// gethtml('https://www.gufengmh8.com/manhua/touxingjiuyuetian/')
+module.exports = gethtml