ptSend/util/ptgen/index.js

217 lines
9.9 KiB
JavaScript
Raw Normal View History

2023-06-20 22:34:27 +08:00
const axios = require("axios")
2023-07-05 20:36:42 +08:00
const cheerio = require("cheerio"); // HTML页面解析
async function search(name) {
2023-07-05 20:36:42 +08:00
const list = await axios.get(`https://movie.douban.com/j/subject_suggest?q=${name}`, {
2023-06-20 22:34:27 +08:00
proxy: false,
})
2023-07-05 20:36:42 +08:00
return {
data: list.data.map(d => {
2023-07-12 14:51:00 +08:00
d.img = "http://127.0.0.1:3050/doubanImg?url=" + d.img
2023-07-05 20:36:42 +08:00
return {
...d,
link: `https://movie.douban.com/subject/${d.id}/`
}
})
}
2023-06-20 22:34:27 +08:00
}
2023-07-05 20:36:42 +08:00
// 解析HTML页面
function page_parser(responseText) {
return cheerio.load(responseText, {
decodeEntities: false
});
}
// 解析JSONP返回
function jsonp_parser(responseText) {
try {
responseText = responseText.replace(/\n/ig, '').match(/[^(]+\((.+)\)/)[1];
return JSON.parse(responseText);
} catch (e) {
return {}
}
}
2023-06-20 22:34:27 +08:00
2023-07-05 20:36:42 +08:00
async function gen_douban(sid) {
let data = {
site: "douban",
sid: sid
};
// 下面开始正常的豆瓣处理流程
let douban_link = `https://movie.douban.com/subject/${sid}/`; // 构造链接
let db_page_resp = await fetch(douban_link); // 请求豆瓣对应项目主页面
let douban_page_raw = await db_page_resp.text();
// 对异常进行处理
if (douban_page_raw.match(/你想访问的页面不存在/)) {
return Object.assign(data, {
error: NONE_EXIST_ERROR
});
} else if (douban_page_raw.match(/检测到有异常请求/)) { // 真的会有这种可能吗?
return Object.assign(data, {
error: "GenHelp was temporary banned by Douban, Please wait...."
});
} else {
let awards_page_req = fetch(`${douban_link}awards`) // 马上请求豆瓣获奖界面
// 解析主页面
let $ = page_parser(douban_page_raw);
let title = $("title").text().replace("(豆瓣)", "").trim();
// 从ld+json中获取原来API返回的部分信息
let ld_json = JSON.parse($('head > script[type="application/ld+json"]').html().replace(/(\r\n|\n|\r|\t)/gm, ''));
// 元素获取方法
let fetch_anchor = function (anchor) {
return anchor[0].nextSibling.nodeValue.trim();
};
// 所有需要的元素
let poster;
let this_title, trans_title, aka;
let year, region, genre, language, playdate;
let imdb_link, imdb_id, imdb_average_rating, imdb_votes, imdb_rating;
let douban_average_rating, douban_votes, douban_rating;
let episodes, duration;
let director, writer, cast;
let tags, introduction, awards;
// 提前imdb相关请求
let imdb_api_req;
let imdb_anchor = $('#info span.pl:contains("IMDb")');
if (imdb_anchor.length > 0) {
data["imdb_id"] = imdb_id = fetch_anchor(imdb_anchor);
data["imdb_link"] = imdb_link = `https://www.imdb.com/title/${imdb_id}/`
imdb_api_req = fetch(`https://p.media-imdb.com/static-content/documents/v1/title/${imdb_id}/ratings%3Fjsonp=imdb.rating.run:imdb.api.title.ratings/data.json`);
}
let chinese_title = data["chinese_title"] = title;
let foreign_title = data["foreign_title"] = $('span[property="v:itemreviewed"]').text().replace(data["chinese_title"], "").trim();
let aka_anchor = $('#info span.pl:contains("又名")');
if (aka_anchor.length > 0) {
aka = fetch_anchor(aka_anchor).split(" / ").sort(function (a, b) { //首字(母)排序
return a.localeCompare(b);
}).join("/");
data["aka"] = aka.split("/");
}
if (foreign_title) {
trans_title = chinese_title + (aka ? ("/" + aka) : "");
this_title = foreign_title;
} else {
trans_title = aka ? aka : "";
this_title = chinese_title;
}
data["trans_title"] = trans_title.split("/");
data["this_title"] = this_title.split("/");
let regions_anchor = $('#info span.pl:contains("制片国家/地区")'); //产地
let language_anchor = $('#info span.pl:contains("语言")'); //语言
let episodes_anchor = $('#info span.pl:contains("集数")'); //集数
let duration_anchor = $('#info span.pl:contains("单集片长")'); //片长
data["year"] = year = " " + $("#content > h1 > span.year").text().substr(1, 4);
data["region"] = region = regions_anchor[0] ? fetch_anchor(regions_anchor).split(" / ") : "";
data["genre"] = genre = $("#info span[property=\"v:genre\"]").map(function () { //类别
return $(this).text().trim();
}).toArray();
data["language"] = language = language_anchor[0] ? fetch_anchor(language_anchor).split(" / ") : "";
data["playdate"] = playdate = $("#info span[property=\"v:initialReleaseDate\"]").map(function () { //上映日期
return $(this).text().trim();
}).toArray().sort(function (a, b) { //按上映日期升序排列
return new Date(a) - new Date(b);
});
data["episodes"] = episodes = episodes_anchor[0] ? fetch_anchor(episodes_anchor) : "";
data["duration"] = duration = duration_anchor[0] ? fetch_anchor(duration_anchor) : $("#info span[property=\"v:runtime\"]").text().trim();
// 简介 首先检查是不是有隐藏的如果有则直接使用隐藏span的内容作为简介不然则用 span[property="v:summary"] 的内容
// 20221201 issue#34 豆瓣将上一层div的id从 link-report 变为 link-report-intra
let introduction_another = $('#link-report-intra > span.all.hidden, #link-report-intra > [property="v:summary"], #link-report > span.all.hidden, #link-report > [property="v:summary"]')
data["introduction"] = introduction = (
introduction_another.length > 0 ? introduction_another.text() : '暂无相关剧情介绍'
).split('\n').map(a => a.trim()).filter(a => a.length > 0).join('\n'); // 处理简介缩进
// 从ld_json中获取信息
data["douban_rating_average"] = douban_average_rating = ld_json['aggregateRating'] ? ld_json['aggregateRating']['ratingValue'] : 0;
data["douban_votes"] = douban_votes = ld_json['aggregateRating'] ? ld_json['aggregateRating']['ratingCount'] : 0;
data["douban_rating"] = douban_rating = `${douban_average_rating}/10 from ${douban_votes} users`;
data["poster"] = poster = ld_json['image']
.replace(/s(_ratio_poster|pic)/g, "l$1")
.replace("img3", "img1");
data["director"] = director = ld_json['director'] ? ld_json['director'] : [];
data["writer"] = writer = ld_json['author'] ? ld_json['author'] : [];
data["cast"] = cast = ld_json['actor'] ? ld_json['actor'] : [];
let tag_another = $('div.tags-body > a[href^="/tag"]');
if (tag_another.length > 0) {
data["tags"] = tags = tag_another.map(function () {
return $(this).text()
}).get();
}
let awards_page_resp = await awards_page_req;
let awards_page_raw = await awards_page_resp.text();
let awards_page = page_parser(awards_page_raw);
data["awards"] = awards = awards_page("#content > div > div.article").html()
.replace(/[ \n]/g, "")
.replace(/<\/li><li>/g, "</li> <li>")
.replace(/<\/a><span/g, "</a> <span")
.replace(/<(div|ul)[^>]*>/g, "\n")
.replace(/<[^>]+>/g, "")
.replace(/&nbsp;/g, " ")
.replace(/ +\n/g, "\n")
.trim();
if (imdb_api_req) {
let imdb_api_resp = await imdb_api_req;
let imdb_api_raw = await imdb_api_resp.text();
let imdb_json = jsonp_parser(imdb_api_raw);
if (imdb_json["resource"]) {
data["imdb_rating_average"] = imdb_average_rating = imdb_json["resource"]["rating"] || 0;
data["imdb_votes"] = imdb_votes = imdb_json["resource"]["ratingCount"] || 0;
data["imdb_rating"] = imdb_rating = `${imdb_average_rating}/10 from ${imdb_votes} users`;
}
}
// 生成format
let descr = poster ? `[img]${poster}[/img]\n\n` : "";
descr += trans_title ? `◎译  名 ${trans_title}\n` : "";
descr += this_title ? `◎片  名 ${this_title}\n` : "";
descr += year ? `◎年  代 ${year.trim()}\n` : "";
descr += region ? `◎产  地 ${region}\n` : "";
descr += genre ? `◎类  别 ${genre.join(" / ")}\n` : "";
descr += language ? `◎语  言 ${language}\n` : "";
descr += playdate ? `◎上映日期 ${playdate.join(" / ")}\n` : "";
descr += imdb_rating ? `◎IMDb评分 ${imdb_rating}\n` : "";
descr += imdb_link ? `◎IMDb链接 ${imdb_link}\n` : "";
descr += douban_rating ? `◎豆瓣评分 ${douban_rating}\n` : "";
descr += douban_link ? `◎豆瓣链接 ${douban_link}\n` : "";
descr += episodes ? `◎集  数 ${episodes}\n` : "";
descr += duration ? `◎片  长 ${duration}\n` : "";
descr += director && director.length > 0 ? `◎导  演 ${director.map(x => x['name']).join(" / ")}\n` : "";
descr += writer && writer.length > 0 ? `◎编  剧 ${writer.map(x => x['name']).join(" / ")}\n` : "";
descr += cast && cast.length > 0 ? `◎主  演 ${cast.map(x => x['name']).join("\n" + " ".repeat(4) + "  ").trim()}\n` : "";
descr += tags && tags.length > 0 ? `\n◎标  签 ${tags.join(" | ")}\n` : "";
descr += introduction ? `\n◎简  介\n\n  ${introduction.replace(/\n/g, "\n" + " ".repeat(2))}\n` : "";
descr += awards ? `\n◎获奖情况\n\n  ${awards.replace(/\n/g, "\n" + " ".repeat(2))}\n` : "";
data["format"] = descr.trim();
data["success"] = true; // 更新状态为成功
return data;
}
2023-06-20 22:34:27 +08:00
}
module.exports = {
serach: search,
2023-07-05 20:36:42 +08:00
gen_douban
2023-06-20 22:34:27 +08:00
}