另一个爬虫

This commit is contained in:
2020-01-02 21:58:17 +08:00
parent 85691155d2
commit 0ee00e6bec
7 changed files with 243 additions and 5 deletions

View File

@@ -0,0 +1,4 @@
古风漫画网
www.gufengmh8.com
find.js 查找功能
section.js 获取章节列表

View File

@@ -0,0 +1,17 @@
// import find from "./find"
// import imglist from "./picture"
// import section from "./section"
find = require("./find")
imglist = require("./picture")
section = require("./section")
let name = "塔多漫画"
let version = "20.01.02"
let type = 1
module.exports = {
name,
version,
type,
find,
imglist,
section
}

View File

@@ -0,0 +1,43 @@
let axios = require('axios')
const cheerio = require('cheerio');
const qs = require('querystring');
let getcontlist = async (text) => {
let $ = cheerio.load(text)
let list = []
$ = cheerio.load($("#contList").eq(0).html())
// console.log($("li").eq(0).html())
$("li").each((index,ele)=>{
let j = cheerio.load($(ele).html(),{decodeEntities: false})
// console.log(unescape(j('.tt').eq(0).html()))
let obj = {};
obj.update = j('.tt').eq(0).html()
obj.name = j(".ell a").eq(0).html()
obj.date = j(".updateon").eq(0).html().replace(/\s+/g,"").replace('<em>1.0</em>','')
obj.url = j(".cover").eq(0).attr('href')
// console.log(obj)
list.push(obj)
})
return list
}
let gethtml = async (name, page) => {
name = qs.escape(name)
let url = `https://www.gufengmh8.com/search/?keywords=${name}&page=${page}`
// console.log(url)
let text = ""
await axios.get(url).then((a) => {
// console.log(a.text)
// res(a.text)
text = a.data
})
return await getcontlist(text)
}
let getlist = async (name) => {
let html;
html = await gethtml(name, 1)
// console.log(JSON.stringify(html))
return html;
}
// getlist("偷星九月天")
module.exports = getlist

View File

@@ -0,0 +1,40 @@
const cheerio = require('cheerio');
const axios = require('axios')
let getscript = async (url) => {
let text;
url = "https://www.gufengmh8.com" + url
await axios.get(url).then((res)=>{
// text = res.text
// console.log(res.data)
text = res.data
})
let $ = cheerio.load(text);
let list;
$('script').each((index, ele) => {
// console.log(ele)
let text = $(ele).html()
if (text.search('chapterImages') != -1) {
eval(text)
var reg = /^http(s)?:\/\/(.*?)\//
imghost = reg.exec(pageImage)[2]
// imghost 图片域名
// chapterPath 图片基本链接path
// chapterImages 图片地址数组
// pageTitle 标题
// pageUrl 页面基础url
// prevChapterData 上一页信息
// nextChapterData 下一页信息
// 页面地址为 基础url + 页信息.id
// console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
// console.log(list)
}
})
return list;
}
// getscript('')
module.exports = getscript

View File

@@ -0,0 +1,39 @@
let axios = require('axios')
const cheerio = require('cheerio');
let getsection = async (text)=>{
let $ = cheerio.load(text)
let list = [];
// console.log($(".comic-chapters ").eq(0).html())
$(".comic-chapters").each((index,ele)=>{
// $(ele)
// console.log($(ele).html())
let obj = {}
let j = cheerio.load($(ele).html(),{decodeEntities: false})
obj.title = j(".pull-left").eq(0).html().replace('<span>','').replace('</span>','')
obj.list = []
j('li').each(function (index,ele){
let con = {}
con.url = j(this).find("a").eq(0).attr('href')
con.title = j(this).find("a").eq(0).html().replace('<span>','').replace('</span>','').replace(/\s+/g,"").replace('\\n','')
obj.list.push(con)
})
// console.log(obj)
list.push(obj)
})
// console.log(list)
return list
}
let gethtml = async (url)=>{
let text;
await axios.get(url).then((res)=>{
text = res.data
})
let list = await getsection(text)
// console.log(JSON.stringify(list))
return list
}
// gethtml('https://www.gufengmh8.com/manhua/touxingjiuyuetian/')
module.exports = gethtml