另一个爬虫
This commit is contained in:
4
bin/reptile/taduo_net/README.md
Normal file
4
bin/reptile/taduo_net/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
古风漫画网
|
||||
www.gufengmh8.com
|
||||
find.js 查找功能
|
||||
section.js 获取章节列表
|
||||
17
bin/reptile/taduo_net/config.js
Normal file
17
bin/reptile/taduo_net/config.js
Normal file
@@ -0,0 +1,17 @@
|
||||
// import find from "./find"
|
||||
// import imglist from "./picture"
|
||||
// import section from "./section"
|
||||
find = require("./find")
|
||||
imglist = require("./picture")
|
||||
section = require("./section")
|
||||
let name = "塔多漫画"
|
||||
let version = "20.01.02"
|
||||
let type = 1
|
||||
module.exports = {
|
||||
name,
|
||||
version,
|
||||
type,
|
||||
find,
|
||||
imglist,
|
||||
section
|
||||
}
|
||||
43
bin/reptile/taduo_net/find.js
Normal file
43
bin/reptile/taduo_net/find.js
Normal file
@@ -0,0 +1,43 @@
|
||||
let axios = require('axios')
|
||||
const cheerio = require('cheerio');
|
||||
const qs = require('querystring');
|
||||
let getcontlist = async (text) => {
|
||||
let $ = cheerio.load(text)
|
||||
let list = []
|
||||
$ = cheerio.load($("#contList").eq(0).html())
|
||||
// console.log($("li").eq(0).html())
|
||||
$("li").each((index,ele)=>{
|
||||
let j = cheerio.load($(ele).html(),{decodeEntities: false})
|
||||
// console.log(unescape(j('.tt').eq(0).html()))
|
||||
let obj = {};
|
||||
obj.update = j('.tt').eq(0).html()
|
||||
obj.name = j(".ell a").eq(0).html()
|
||||
obj.date = j(".updateon").eq(0).html().replace(/\s+/g,"").replace('<em>1.0</em>','')
|
||||
obj.url = j(".cover").eq(0).attr('href')
|
||||
// console.log(obj)
|
||||
list.push(obj)
|
||||
})
|
||||
return list
|
||||
}
|
||||
|
||||
let gethtml = async (name, page) => {
|
||||
name = qs.escape(name)
|
||||
let url = `https://www.gufengmh8.com/search/?keywords=${name}&page=${page}`
|
||||
// console.log(url)
|
||||
let text = ""
|
||||
await axios.get(url).then((a) => {
|
||||
// console.log(a.text)
|
||||
// res(a.text)
|
||||
text = a.data
|
||||
})
|
||||
return await getcontlist(text)
|
||||
}
|
||||
|
||||
let getlist = async (name) => {
|
||||
let html;
|
||||
html = await gethtml(name, 1)
|
||||
// console.log(JSON.stringify(html))
|
||||
return html;
|
||||
}
|
||||
// getlist("偷星九月天")
|
||||
module.exports = getlist
|
||||
40
bin/reptile/taduo_net/picture.js
Normal file
40
bin/reptile/taduo_net/picture.js
Normal file
@@ -0,0 +1,40 @@
|
||||
const cheerio = require('cheerio');
|
||||
const axios = require('axios')
|
||||
let getscript = async (url) => {
|
||||
let text;
|
||||
url = "https://www.gufengmh8.com" + url
|
||||
await axios.get(url).then((res)=>{
|
||||
// text = res.text
|
||||
// console.log(res.data)
|
||||
text = res.data
|
||||
})
|
||||
let $ = cheerio.load(text);
|
||||
let list;
|
||||
$('script').each((index, ele) => {
|
||||
// console.log(ele)
|
||||
let text = $(ele).html()
|
||||
if (text.search('chapterImages') != -1) {
|
||||
eval(text)
|
||||
var reg = /^http(s)?:\/\/(.*?)\//
|
||||
imghost = reg.exec(pageImage)[2]
|
||||
// imghost 图片域名
|
||||
// chapterPath 图片基本链接path
|
||||
// chapterImages 图片地址数组
|
||||
// pageTitle 标题
|
||||
// pageUrl 页面基础url
|
||||
// prevChapterData 上一页信息
|
||||
// nextChapterData 下一页信息
|
||||
// 页面地址为 基础url + 页信息.id
|
||||
// console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
|
||||
let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
|
||||
let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
|
||||
list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
|
||||
// console.log(list)
|
||||
}
|
||||
})
|
||||
return list;
|
||||
|
||||
}
|
||||
|
||||
// getscript('')
|
||||
module.exports = getscript
|
||||
39
bin/reptile/taduo_net/section.js
Normal file
39
bin/reptile/taduo_net/section.js
Normal file
@@ -0,0 +1,39 @@
|
||||
let axios = require('axios')
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
let getsection = async (text)=>{
|
||||
let $ = cheerio.load(text)
|
||||
let list = [];
|
||||
// console.log($(".comic-chapters ").eq(0).html())
|
||||
$(".comic-chapters").each((index,ele)=>{
|
||||
// $(ele)
|
||||
// console.log($(ele).html())
|
||||
let obj = {}
|
||||
let j = cheerio.load($(ele).html(),{decodeEntities: false})
|
||||
obj.title = j(".pull-left").eq(0).html().replace('<span>','').replace('</span>','')
|
||||
obj.list = []
|
||||
j('li').each(function (index,ele){
|
||||
let con = {}
|
||||
con.url = j(this).find("a").eq(0).attr('href')
|
||||
con.title = j(this).find("a").eq(0).html().replace('<span>','').replace('</span>','').replace(/\s+/g,"").replace('\\n','')
|
||||
obj.list.push(con)
|
||||
})
|
||||
// console.log(obj)
|
||||
list.push(obj)
|
||||
})
|
||||
// console.log(list)
|
||||
return list
|
||||
}
|
||||
|
||||
let gethtml = async (url)=>{
|
||||
let text;
|
||||
await axios.get(url).then((res)=>{
|
||||
text = res.data
|
||||
})
|
||||
let list = await getsection(text)
|
||||
// console.log(JSON.stringify(list))
|
||||
return list
|
||||
}
|
||||
|
||||
// gethtml('https://www.gufengmh8.com/manhua/touxingjiuyuetian/')
|
||||
module.exports = gethtml
|
||||
Reference in New Issue
Block a user