Refactor ProxyScraper to limit proxy attempts to 3 and improve error handling. Added a flag to switch to direct connection if proxy fails. Enhanced logging for proxy testing outcomes.
This commit is contained in:
parent
a4127509af
commit
4cefbbbcd6
@ -91,19 +91,25 @@ class ProxyScraper {
|
|||||||
return null; // 无本地代理,使用直连
|
return null; // 无本地代理,使用直连
|
||||||
}
|
}
|
||||||
|
|
||||||
// 尝试几个代理,找到可用的
|
// 尝试几个代理,找到可用的(最多尝试3个,避免耗时过长)
|
||||||
for (let i = 0; i < Math.min(5, this.localProxies.length); i++) {
|
const maxAttempts = Math.min(3, this.localProxies.length);
|
||||||
|
for (let i = 0; i < maxAttempts; i++) {
|
||||||
const proxyConfig = this.getNextLocalProxy();
|
const proxyConfig = this.getNextLocalProxy();
|
||||||
|
|
||||||
if (await this.testProxyForScraping(proxyConfig)) {
|
try {
|
||||||
console.log(`✓ 代理 ${proxyConfig.host}:${proxyConfig.port} 可用`);
|
if (await this.testProxyForScraping(proxyConfig)) {
|
||||||
return proxyConfig;
|
console.log(`✓ 代理 ${proxyConfig.host}:${proxyConfig.port} 可用`);
|
||||||
} else {
|
return proxyConfig;
|
||||||
console.log(`✗ 代理 ${proxyConfig.host}:${proxyConfig.port} 不可用,尝试下一个`);
|
} else {
|
||||||
|
console.log(`✗ 代理 ${proxyConfig.host}:${proxyConfig.port} 不可用,尝试下一个`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`✗ 代理 ${proxyConfig.host}:${proxyConfig.port} 测试出错: ${error.message}`);
|
||||||
|
// 继续尝试下一个代理
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('测试的本地代理都不可用,使用直连');
|
console.log(`测试了 ${maxAttempts} 个本地代理都不可用,将使用直连`);
|
||||||
return null; // 所有测试的代理都不可用,使用直连
|
return null; // 所有测试的代理都不可用,使用直连
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,6 +187,8 @@ class ProxyScraper {
|
|||||||
|
|
||||||
console.log(`正在抓取第 ${pageNum} 页: ${url}`);
|
console.log(`正在抓取第 ${pageNum} 页: ${url}`);
|
||||||
|
|
||||||
|
let useDirectConnection = false; // 标志:是否应该直接使用直连
|
||||||
|
|
||||||
for (let attempt = 1; attempt <= retryCount; attempt++) {
|
for (let attempt = 1; attempt <= retryCount; attempt++) {
|
||||||
let proxyConfig = null;
|
let proxyConfig = null;
|
||||||
let proxyUsed = '';
|
let proxyUsed = '';
|
||||||
@ -188,8 +196,14 @@ class ProxyScraper {
|
|||||||
try {
|
try {
|
||||||
const userAgent = this.getRandomUserAgent();
|
const userAgent = this.getRandomUserAgent();
|
||||||
|
|
||||||
// 获取可用代理配置(每次请求都尝试不同的代理)
|
// 如果之前使用代理失败过,或者标记为使用直连,则跳过代理
|
||||||
proxyConfig = await this.getWorkingProxy();
|
if (!useDirectConnection && this.localProxies.length > 0) {
|
||||||
|
// 尝试获取可用代理配置
|
||||||
|
proxyConfig = await this.getWorkingProxy();
|
||||||
|
} else {
|
||||||
|
proxyConfig = null;
|
||||||
|
console.log('跳过代理,直接使用直连');
|
||||||
|
}
|
||||||
|
|
||||||
const requestConfig = {
|
const requestConfig = {
|
||||||
headers: {
|
headers: {
|
||||||
@ -242,6 +256,12 @@ class ProxyScraper {
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`第 ${attempt} 次尝试抓取第 ${pageNum} 页失败 (${proxyUsed}):`, error.message);
|
console.error(`第 ${attempt} 次尝试抓取第 ${pageNum} 页失败 (${proxyUsed}):`, error.message);
|
||||||
|
|
||||||
|
// 如果使用代理失败,下次重试时使用直连
|
||||||
|
if (proxyConfig) {
|
||||||
|
console.log(`代理 ${proxyConfig.host}:${proxyConfig.port} 抓取失败,下次重试将使用直连`);
|
||||||
|
useDirectConnection = true; // 标记为使用直连
|
||||||
|
}
|
||||||
|
|
||||||
if (attempt === retryCount) {
|
if (attempt === retryCount) {
|
||||||
throw new Error(`抓取第 ${pageNum} 页失败,已重试 ${retryCount} 次: ${error.message}`);
|
throw new Error(`抓取第 ${pageNum} 页失败,已重试 ${retryCount} 次: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user