const axios = require("axios") const puppeteer = require("puppeteer") const cheerio = require("cheerio"); // HTML页面解析 async function search(name) { const list = await axios.get(`https://movie.douban.com/j/subject_suggest?q=${name}`, { proxy: false, }) return { data: list.data.map(d => { return { ...d, link: `https://movie.douban.com/subject/${d.id}/` } }) } } // 解析HTML页面 function page_parser(responseText) { return cheerio.load(responseText, { decodeEntities: false }); } // 解析JSONP返回 function jsonp_parser(responseText) { try { responseText = responseText.replace(/\n/ig, '').match(/[^(]+\((.+)\)/)[1]; return JSON.parse(responseText); } catch (e) { return {} } } async function gen_douban(sid) { let data = { site: "douban", sid: sid }; // 下面开始正常的豆瓣处理流程 let douban_link = `https://movie.douban.com/subject/${sid}/`; // 构造链接 let db_page_resp = await fetch(douban_link); // 请求豆瓣对应项目主页面 let douban_page_raw = await db_page_resp.text(); // 对异常进行处理 if (douban_page_raw.match(/你想访问的页面不存在/)) { return Object.assign(data, { error: NONE_EXIST_ERROR }); } else if (douban_page_raw.match(/检测到有异常请求/)) { // 真的会有这种可能吗? return Object.assign(data, { error: "GenHelp was temporary banned by Douban, Please wait...." }); } else { let awards_page_req = fetch(`${douban_link}awards`) // 马上请求豆瓣获奖界面 // 解析主页面 let $ = page_parser(douban_page_raw); let title = $("title").text().replace("(豆瓣)", "").trim(); // 从ld+json中获取原来API返回的部分信息 let ld_json = JSON.parse($('head > script[type="application/ld+json"]').html().replace(/(\r\n|\n|\r|\t)/gm, '')); // 元素获取方法 let fetch_anchor = function (anchor) { return anchor[0].nextSibling.nodeValue.trim(); }; // 所有需要的元素 let poster; let this_title, trans_title, aka; let year, region, genre, language, playdate; let imdb_link, imdb_id, imdb_average_rating, imdb_votes, imdb_rating; let douban_average_rating, douban_votes, douban_rating; let episodes, duration; let director, writer, cast; let tags, introduction, awards; // 提前imdb相关请求 let imdb_api_req; let imdb_anchor = $('#info span.pl:contains("IMDb")'); if (imdb_anchor.length > 0) { data["imdb_id"] = imdb_id = fetch_anchor(imdb_anchor); data["imdb_link"] = imdb_link = `https://www.imdb.com/title/${imdb_id}/` imdb_api_req = fetch(`https://p.media-imdb.com/static-content/documents/v1/title/${imdb_id}/ratings%3Fjsonp=imdb.rating.run:imdb.api.title.ratings/data.json`); } let chinese_title = data["chinese_title"] = title; let foreign_title = data["foreign_title"] = $('span[property="v:itemreviewed"]').text().replace(data["chinese_title"], "").trim(); let aka_anchor = $('#info span.pl:contains("又名")'); if (aka_anchor.length > 0) { aka = fetch_anchor(aka_anchor).split(" / ").sort(function (a, b) { //首字(母)排序 return a.localeCompare(b); }).join("/"); data["aka"] = aka.split("/"); } if (foreign_title) { trans_title = chinese_title + (aka ? ("/" + aka) : ""); this_title = foreign_title; } else { trans_title = aka ? aka : ""; this_title = chinese_title; } data["trans_title"] = trans_title.split("/"); data["this_title"] = this_title.split("/"); let regions_anchor = $('#info span.pl:contains("制片国家/地区")'); //产地 let language_anchor = $('#info span.pl:contains("语言")'); //语言 let episodes_anchor = $('#info span.pl:contains("集数")'); //集数 let duration_anchor = $('#info span.pl:contains("单集片长")'); //片长 data["year"] = year = " " + $("#content > h1 > span.year").text().substr(1, 4); data["region"] = region = regions_anchor[0] ? fetch_anchor(regions_anchor).split(" / ") : ""; data["genre"] = genre = $("#info span[property=\"v:genre\"]").map(function () { //类别 return $(this).text().trim(); }).toArray(); data["language"] = language = language_anchor[0] ? fetch_anchor(language_anchor).split(" / ") : ""; data["playdate"] = playdate = $("#info span[property=\"v:initialReleaseDate\"]").map(function () { //上映日期 return $(this).text().trim(); }).toArray().sort(function (a, b) { //按上映日期升序排列 return new Date(a) - new Date(b); }); data["episodes"] = episodes = episodes_anchor[0] ? fetch_anchor(episodes_anchor) : ""; data["duration"] = duration = duration_anchor[0] ? fetch_anchor(duration_anchor) : $("#info span[property=\"v:runtime\"]").text().trim(); // 简介 首先检查是不是有隐藏的,如果有,则直接使用隐藏span的内容作为简介,不然则用 span[property="v:summary"] 的内容 // 20221201 issue#34 豆瓣将上一层div的id从 link-report 变为 link-report-intra let introduction_another = $('#link-report-intra > span.all.hidden, #link-report-intra > [property="v:summary"], #link-report > span.all.hidden, #link-report > [property="v:summary"]') data["introduction"] = introduction = ( introduction_another.length > 0 ? introduction_another.text() : '暂无相关剧情介绍' ).split('\n').map(a => a.trim()).filter(a => a.length > 0).join('\n'); // 处理简介缩进 // 从ld_json中获取信息 data["douban_rating_average"] = douban_average_rating = ld_json['aggregateRating'] ? ld_json['aggregateRating']['ratingValue'] : 0; data["douban_votes"] = douban_votes = ld_json['aggregateRating'] ? ld_json['aggregateRating']['ratingCount'] : 0; data["douban_rating"] = douban_rating = `${douban_average_rating}/10 from ${douban_votes} users`; data["poster"] = poster = ld_json['image'] .replace(/s(_ratio_poster|pic)/g, "l$1") .replace("img3", "img1"); data["director"] = director = ld_json['director'] ? ld_json['director'] : []; data["writer"] = writer = ld_json['author'] ? ld_json['author'] : []; data["cast"] = cast = ld_json['actor'] ? ld_json['actor'] : []; let tag_another = $('div.tags-body > a[href^="/tag"]'); if (tag_another.length > 0) { data["tags"] = tags = tag_another.map(function () { return $(this).text() }).get(); } let awards_page_resp = await awards_page_req; let awards_page_raw = await awards_page_resp.text(); let awards_page = page_parser(awards_page_raw); data["awards"] = awards = awards_page("#content > div > div.article").html() .replace(/[ \n]/g, "") .replace(/<\/li>