另一个爬虫

This commit is contained in:
pplokijuhyg 2020-01-02 21:58:17 +08:00
parent 85691155d2
commit 0ee00e6bec
7 changed files with 243 additions and 5 deletions

View File

@ -0,0 +1,4 @@
古风漫画网
www.gufengmh8.com
find.js 查找功能
section.js 获取章节列表

View File

@ -0,0 +1,17 @@
// import find from "./find"
// import imglist from "./picture"
// import section from "./section"
find = require("./find")
imglist = require("./picture")
section = require("./section")
let name = "塔多漫画"
let version = "20.01.02"
let type = 1
module.exports = {
name,
version,
type,
find,
imglist,
section
}

View File

@ -0,0 +1,43 @@
let axios = require('axios')
const cheerio = require('cheerio');
const qs = require('querystring');
let getcontlist = async (text) => {
let $ = cheerio.load(text)
let list = []
$ = cheerio.load($("#contList").eq(0).html())
// console.log($("li").eq(0).html())
$("li").each((index,ele)=>{
let j = cheerio.load($(ele).html(),{decodeEntities: false})
// console.log(unescape(j('.tt').eq(0).html()))
let obj = {};
obj.update = j('.tt').eq(0).html()
obj.name = j(".ell a").eq(0).html()
obj.date = j(".updateon").eq(0).html().replace(/\s+/g,"").replace('<em>1.0</em>','')
obj.url = j(".cover").eq(0).attr('href')
// console.log(obj)
list.push(obj)
})
return list
}
let gethtml = async (name, page) => {
name = qs.escape(name)
let url = `https://www.gufengmh8.com/search/?keywords=${name}&page=${page}`
// console.log(url)
let text = ""
await axios.get(url).then((a) => {
// console.log(a.text)
// res(a.text)
text = a.data
})
return await getcontlist(text)
}
let getlist = async (name) => {
let html;
html = await gethtml(name, 1)
// console.log(JSON.stringify(html))
return html;
}
// getlist("偷星九月天")
module.exports = getlist

View File

@ -0,0 +1,40 @@
const cheerio = require('cheerio');
const axios = require('axios')
let getscript = async (url) => {
let text;
url = "https://www.gufengmh8.com" + url
await axios.get(url).then((res)=>{
// text = res.text
// console.log(res.data)
text = res.data
})
let $ = cheerio.load(text);
let list;
$('script').each((index, ele) => {
// console.log(ele)
let text = $(ele).html()
if (text.search('chapterImages') != -1) {
eval(text)
var reg = /^http(s)?:\/\/(.*?)\//
imghost = reg.exec(pageImage)[2]
// imghost 图片域名
// chapterPath 图片基本链接path
// chapterImages 图片地址数组
// pageTitle 标题
// pageUrl 页面基础url
// prevChapterData 上一页信息
// nextChapterData 下一页信息
// 页面地址为 基础url + 页信息.id
// console.log({ imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage })
let down = pageUrl.replace("https://www.gufengmh8.com","") + nextChapterData.id + ".html"
let upurl = pageUrl.replace("https://www.gufengmh8.com","") + prevChapterData.id + ".html"
list = { imghost, chapterPath, chapterImages, pageTitle, pageUrl, prevChapterData, nextChapterData, pageImage,down,upurl }
// console.log(list)
}
})
return list;
}
// getscript('')
module.exports = getscript

View File

@ -0,0 +1,39 @@
let axios = require('axios')
const cheerio = require('cheerio');
let getsection = async (text)=>{
let $ = cheerio.load(text)
let list = [];
// console.log($(".comic-chapters ").eq(0).html())
$(".comic-chapters").each((index,ele)=>{
// $(ele)
// console.log($(ele).html())
let obj = {}
let j = cheerio.load($(ele).html(),{decodeEntities: false})
obj.title = j(".pull-left").eq(0).html().replace('<span>','').replace('</span>','')
obj.list = []
j('li').each(function (index,ele){
let con = {}
con.url = j(this).find("a").eq(0).attr('href')
con.title = j(this).find("a").eq(0).html().replace('<span>','').replace('</span>','').replace(/\s+/g,"").replace('\\n','')
obj.list.push(con)
})
// console.log(obj)
list.push(obj)
})
// console.log(list)
return list
}
let gethtml = async (url)=>{
let text;
await axios.get(url).then((res)=>{
text = res.data
})
let list = await getsection(text)
// console.log(JSON.stringify(list))
return list
}
// gethtml('https://www.gufengmh8.com/manhua/touxingjiuyuetian/')
module.exports = gethtml

View File

@ -17,6 +17,7 @@
"koa-router": "^7.4.0",
"mongodb": "^3.3.5",
"path": "^0.12.7",
"puppeteer": "^2.0.0",
"querystring": "^0.2.0",
"request": "^2.88.0",
"superagent": "^5.1.2",

104
yarn.lock
View File

@ -15,6 +15,13 @@ accepts@^1.3.5:
mime-types "~2.1.24"
negotiator "0.6.2"
agent-base@^4.3.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==
dependencies:
es6-promisify "^5.0.0"
ajv@^6.5.5:
version "6.10.2"
resolved "https://registry.npm.taobao.org/ajv/download/ajv-6.10.2.tgz#d3cea04d6b017b2894ad69040fec8b623eb4bd52"
@ -108,6 +115,11 @@ async-each-series@^1.1.0:
resolved "https://registry.npm.taobao.org/async-each-series/download/async-each-series-1.1.0.tgz#f42fd8155d38f21a5b8ea07c28e063ed1700b138"
integrity sha1-9C/YFV048hpbjqB8KOBj7RcAsTg=
async-limiter@~1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/async-limiter/-/async-limiter-1.0.1.tgz#dd379e94f0db8310b08291f9d64c3209766617fd"
integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==
asynckit@^0.4.0:
version "0.4.0"
resolved "https://registry.npm.taobao.org/asynckit/download/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
@ -403,7 +415,7 @@ concat-map@0.0.1:
resolved "https://registry.npm.taobao.org/concat-map/download/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
concat-stream@^1.4.6, concat-stream@^1.4.7:
concat-stream@1.6.2, concat-stream@^1.4.6, concat-stream@^1.4.7:
version "1.6.2"
resolved "https://registry.npm.taobao.org/concat-stream/download/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
integrity sha1-kEvfGUzTEi/Gdcd/xKw9T/D9GjQ=
@ -510,6 +522,13 @@ dateformat@^2.0.0:
resolved "https://registry.npm.taobao.org/dateformat/download/dateformat-2.2.0.tgz#4065e2013cf9fb916ddfd82efb506ad4c6769062"
integrity sha1-QGXiATz5+5Ft39gu+1Bq1MZ2kGI=
debug@2.6.9:
version "2.6.9"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
dependencies:
ms "2.0.0"
debug@=3.1.0, debug@~3.1.0:
version "3.1.0"
resolved "https://registry.npm.taobao.org/debug/download/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261"
@ -524,7 +543,7 @@ debug@^3.1.0:
dependencies:
ms "^2.1.1"
debug@^4.1.1:
debug@^4.1.0, debug@^4.1.1:
version "4.1.1"
resolved "https://registry.npm.taobao.org/debug/download/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791"
integrity sha1-O3ImAlUQnGtYnO4FDx1RYTlmR5E=
@ -785,6 +804,18 @@ error-inject@^1.0.0:
resolved "https://registry.npm.taobao.org/error-inject/download/error-inject-1.0.0.tgz#e2b3d91b54aed672f309d950d154850fa11d4f37"
integrity sha1-4rPZG1Su1nLzCdlQ0VSFD6EdTzc=
es6-promise@^4.0.3:
version "4.2.8"
resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a"
integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==
es6-promisify@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203"
integrity sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM=
dependencies:
es6-promise "^4.0.3"
escape-html@^1.0.3:
version "1.0.3"
resolved "https://registry.npm.taobao.org/escape-html/download/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
@ -843,6 +874,16 @@ extglob@^0.3.1:
dependencies:
is-extglob "^1.0.0"
extract-zip@^1.6.6:
version "1.6.7"
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-1.6.7.tgz#a840b4b8af6403264c8db57f4f1a74333ef81fe9"
integrity sha1-qEC0uK9kAyZMjbV/Txp0Mz74H+k=
dependencies:
concat-stream "1.6.2"
debug "2.6.9"
mkdirp "0.5.1"
yauzl "2.4.1"
extsprintf@1.3.0:
version "1.3.0"
resolved "https://registry.npm.taobao.org/extsprintf/download/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
@ -878,6 +919,13 @@ fast-safe-stringify@^2.0.7:
resolved "https://registry.npm.taobao.org/fast-safe-stringify/download/fast-safe-stringify-2.0.7.tgz#124aa885899261f68aedb42a7c080de9da608743"
integrity sha1-EkqohYmSYfaK7bQqfAgN6dpgh0M=
fd-slicer@~1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.0.1.tgz#8b5bcbd9ec327c5041bf9ab023fd6750f1177e65"
integrity sha1-i1vL2ewyfFBBv5qwI/1nUPEXfmU=
dependencies:
pend "~1.2.0"
fd-slicer@~1.1.0:
version "1.1.0"
resolved "https://registry.npm.taobao.org/fd-slicer/download/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
@ -1264,6 +1312,14 @@ http-signature@~1.2.0:
jsprim "^1.2.2"
sshpk "^1.7.0"
https-proxy-agent@^3.0.0:
version "3.0.1"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-3.0.1.tgz#b8c286433e87602311b01c8ea34413d856a4af81"
integrity sha512-+ML2Rbh6DAuee7d07tYGEKOEi2voWPUGan+ExdPbPW6Z3svq+JCqr0v8WmKPOkz1vOVykPCBSuobe7G8GJUtVg==
dependencies:
agent-base "^4.3.0"
debug "^3.1.0"
iconv-lite@0.4.24:
version "0.4.24"
resolved "https://registry.npm.taobao.org/iconv-lite/download/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
@ -1871,7 +1927,7 @@ mime-types@^2.1.12, mime-types@^2.1.18, mime-types@~2.1.19, mime-types@~2.1.24:
dependencies:
mime-db "1.42.0"
mime@^2.4.4:
mime@^2.0.3, mime@^2.4.4:
version "2.4.4"
resolved "https://registry.npm.taobao.org/mime/download/mime-2.4.4.tgz#bd7b91135fc6b01cde3e9bae33d659b63d8857e5"
integrity sha1-vXuRE1/GsBzePpuuM9ZZtj2IV+U=
@ -1893,7 +1949,7 @@ minimist@^1.1.0, minimist@^1.1.3, minimist@^1.2.0:
resolved "https://registry.npm.taobao.org/minimist/download/minimist-1.2.0.tgz#a35008b20f41383eec1fb914f4cd5df79a264284"
integrity sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=
mkdirp@^0.5.0:
mkdirp@0.5.1, mkdirp@^0.5.0:
version "0.5.1"
resolved "https://registry.npm.taobao.org/mkdirp/download/mkdirp-0.5.1.tgz?cache=0&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Fmkdirp%2Fdownload%2Fmkdirp-0.5.1.tgz#30057438eac6cf7f8c4767f38648d6697d75c903"
integrity sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=
@ -2164,6 +2220,16 @@ process@^0.11.1:
resolved "https://registry.npm.taobao.org/process/download/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
progress@^2.0.1:
version "2.0.3"
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
proxy-from-env@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.0.0.tgz#33c50398f70ea7eb96d21f7b817630a55791c7ee"
integrity sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4=
psl@^1.1.24:
version "1.6.0"
resolved "https://registry.npm.taobao.org/psl/download/psl-1.6.0.tgz#60557582ee23b6c43719d9890fb4170ecd91e110"
@ -2179,6 +2245,20 @@ punycode@^2.1.0:
resolved "https://registry.npm.taobao.org/punycode/download/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha1-tYsBCsQMIsVldhbI0sLALHv0eew=
puppeteer@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-2.0.0.tgz#0612992e29ec418e0a62c8bebe61af1a64d7ec01"
integrity sha512-t3MmTWzQxPRP71teU6l0jX47PHXlc4Z52sQv4LJQSZLq1ttkKS2yGM3gaI57uQwZkNaoGd0+HPPMELZkcyhlqA==
dependencies:
debug "^4.1.0"
extract-zip "^1.6.6"
https-proxy-agent "^3.0.0"
mime "^2.0.3"
progress "^2.0.1"
proxy-from-env "^1.0.0"
rimraf "^2.6.1"
ws "^6.1.0"
qs@^6.5.2, qs@^6.9.1:
version "6.9.1"
resolved "https://registry.npm.taobao.org/qs/download/qs-6.9.1.tgz#20082c65cb78223635ab1a9eaca8875a29bf8ec9"
@ -2378,7 +2458,7 @@ resolve@^1.10.0:
dependencies:
path-parse "^1.0.6"
rimraf@^2.2.6:
rimraf@^2.2.6, rimraf@^2.6.1:
version "2.7.1"
resolved "https://registry.npm.taobao.org/rimraf/download/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
integrity sha1-NXl/E6f9rcVmFCwp1PB8ytSD4+w=
@ -2954,11 +3034,25 @@ wrappy@1:
resolved "https://registry.npm.taobao.org/wrappy/download/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
ws@^6.1.0:
version "6.2.1"
resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb"
integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA==
dependencies:
async-limiter "~1.0.0"
"xtend@>=4.0.0 <4.1.0-0", xtend@^4.0.0, xtend@~4.0.0, xtend@~4.0.1:
version "4.0.2"
resolved "https://registry.npm.taobao.org/xtend/download/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
integrity sha1-u3J3n1+kZRhrH0OPZ0+jR/2121Q=
yauzl@2.4.1:
version "2.4.1"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.4.1.tgz#9528f442dab1b2284e58b4379bb194e22e0c4005"
integrity sha1-lSj0QtqxsihOWLQ3m7GU4i4MQAU=
dependencies:
fd-slicer "~1.0.1"
yauzl@^2.2.1:
version "2.10.0"
resolved "https://registry.npm.taobao.org/yauzl/download/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"