This commit is contained in:
2025-10-30 23:05:24 +08:00
commit 480f224ab6
30 changed files with 11271 additions and 0 deletions

329
src/app.js Normal file
View File

@@ -0,0 +1,329 @@
const express = require('express');
const path = require('path');
const Database = require('./database/db');
const ProxyModel = require('./database/models/proxy');
const HistoryModel = require('./database/models/history');
const LogsModel = require('./database/models/logs');
const ProxyScheduler = require('./services/scheduler');
const proxyRoutes = require('./routes/proxies');
const dashboardRoutes = require('./routes/dashboard');
const historyRoutes = require('./routes/history');
class ProxyApp {
constructor() {
this.app = express();
this.port = process.env.PORT || 3000;
this.scheduler = new ProxyScheduler();
this.isShuttingDown = false;
}
setupMiddleware() {
// 静态文件服务
this.app.use(express.static(path.join(__dirname, '../public')));
// JSON 解析中间件
this.app.use(express.json({ limit: '10mb' }));
this.app.use(express.urlencoded({ extended: true }));
// CORS 支持
this.app.use((req, res, next) => {
res.header('Access-Control-Allow-Origin', '*');
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
if (req.method === 'OPTIONS') {
res.sendStatus(200);
} else {
next();
}
});
// 请求日志中间件
this.app.use((req, res, next) => {
const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
console.log(`[${timestamp}] ${req.method} ${req.url}`);
next();
});
}
setupRoutes() {
// 健康检查端点
this.app.get('/api/health', async (req, res) => {
try {
const stats = await this.scheduler.getSystemStats();
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
system: stats
});
} catch (error) {
res.status(500).json({
status: 'unhealthy',
error: error.message,
timestamp: new Date().toISOString()
});
}
});
// 根路径 - 重定向到仪表板
this.app.get('/', (req, res) => {
res.redirect('/index.html');
});
// API信息端点
this.app.get('/api/info', (req, res) => {
res.json({
name: 'Proxy IP Service',
version: '1.0.0',
description: '代理IP抓取、验证和管理服务',
web_interface: {
dashboard: '/index.html',
proxies: '/proxies.html',
history: '/history.html',
monitoring: '/monitoring.html'
},
endpoints: {
'GET /api/health': '健康检查',
'GET /api/info': 'API信息',
'GET /api/dashboard/stats': '仪表板统计数据',
'GET /api/dashboard/status': '实时系统状态',
'POST /api/dashboard/actions/scrape': '手动抓取',
'POST /api/dashboard/actions/validate': '手动验证',
'GET /api/proxies': '获取代理列表',
'GET /api/proxies/random': '获取随机代理',
'GET /api/proxies/stats': '获取统计信息',
'POST /api/proxies/verify': '验证单个代理',
'POST /api/proxies/verify-batch': '批量验证代理',
'POST /api/proxies/scrape': '手动触发抓取',
'POST /api/proxies/validate-all': '手动触发验证',
'GET /api/proxies/search': '搜索代理',
'DELETE /api/proxies/cleanup': '清理无效代理',
'GET /api/history': '执行历史',
'GET /api/history/logs/system': '系统日志',
'GET /api/history/stats': '历史统计'
},
scheduler: {
scrape: '每小时整点执行',
validation: '每10分钟执行',
healthCheck: '每小时30分执行'
}
});
});
// API 路由
this.app.use('/api/proxies', proxyRoutes);
this.app.use('/api/dashboard', dashboardRoutes);
this.app.use('/api/history', historyRoutes);
// 404 处理 - API端点
this.app.use('/api', (req, res, next) => {
// 检查是否已处理的路由
if (!req.route) {
res.status(404).json({
error: 'API接口不存在',
message: `路径 ${req.originalUrl} 未找到`,
available_endpoints: [
'GET /api/health',
'GET /api/info',
'GET /api/dashboard/stats',
'GET /api/dashboard/status',
'POST /api/dashboard/actions/scrape',
'POST /api/dashboard/actions/validate',
'GET /api/proxies',
'GET /api/proxies/random',
'GET /api/proxies/stats',
'POST /api/proxies/verify',
'POST /api/proxies/verify-batch',
'GET /api/history',
'GET /api/history/logs/system',
'GET /api/history/stats'
]
});
} else {
next();
}
});
// 404 处理 - Web页面
this.app.use((req, res) => {
res.status(404).send(`
<!DOCTYPE html>
<html>
<head>
<title>页面未找到 - 代理IP管理系统</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body class="bg-light">
<div class="container mt-5">
<div class="row justify-content-center">
<div class="col-md-6">
<div class="card">
<div class="card-body text-center">
<h1 class="text-danger mb-4">404</h1>
<h3 class="mb-3">页面未找到</h3>
<p class="text-muted mb-4">您访问的页面 ${req.originalUrl} 不存在</p>
<a href="/index.html" class="btn btn-primary">返回首页</a>
</div>
</div>
</div>
</div>
</div>
</body>
</html>
`);
});
// 错误处理中间件
this.app.use((err, req, res, next) => {
console.error('未处理的错误:', err);
res.status(500).json({
error: '服务器内部错误',
message: err.message,
timestamp: new Date().toISOString()
});
});
}
async initializeDatabase() {
try {
console.log('初始化数据库连接...');
await Database.connect();
console.log('数据库连接成功');
console.log('创建数据表...');
await ProxyModel.initTable();
await HistoryModel.initTable();
await LogsModel.initTable();
console.log('数据表初始化完成');
// 获取初始统计信息
const totalCount = await ProxyModel.count();
const validCount = await ProxyModel.count(true);
console.log(`数据库初始化完成,共 ${totalCount} 个代理,其中 ${validCount} 个可用`);
// 记录系统启动日志
await LogsModel.logInfo('系统启动成功', 'system', {
total_proxies: totalCount,
valid_proxies: validCount,
node_version: process.version,
platform: process.platform
});
} catch (error) {
console.error('数据库初始化失败:', error);
throw error;
}
}
setupGracefulShutdown() {
const gracefulShutdown = async (signal) => {
if (this.isShuttingDown) {
console.log('正在关闭中,忽略信号:', signal);
return;
}
this.isShuttingDown = true;
console.log(`收到 ${signal} 信号,开始优雅关闭...`);
// 停止定时任务
console.log('停止定时任务...');
this.scheduler.stop();
// 记录系统关闭日志
try {
await LogsModel.logInfo('系统正常关闭', 'system', {
signal: signal,
uptime: process.uptime()
});
} catch (error) {
console.error('记录关闭日志失败:', error);
}
// 关闭数据库连接
console.log('关闭数据库连接...');
try {
await Database.close();
} catch (error) {
console.error('关闭数据库连接失败:', error);
}
// 关闭HTTP服务器
console.log('关闭HTTP服务器...');
process.exit(0);
};
// 监听关闭信号
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
// 监听未捕获的异常
process.on('uncaughtException', (error) => {
console.error('未捕获的异常:', error);
gracefulShutdown('uncaughtException');
});
process.on('unhandledRejection', (reason, promise) => {
console.error('未处理的Promise拒绝:', reason);
console.error('Promise:', promise);
});
}
async start() {
try {
console.log('=== 代理IP服务启动中 ===');
console.log('服务端口:', this.port);
console.log('启动时间:', new Date().toISOString());
// 初始化数据库
await this.initializeDatabase();
// 设置中间件和路由
this.setupMiddleware();
this.setupRoutes();
// 设置优雅关闭
this.setupGracefulShutdown();
// 启动定时任务
console.log('启动定时任务调度器...');
this.scheduler.start();
// 启动HTTP服务器
this.server = this.app.listen(this.port, () => {
console.log('=== 代理IP服务启动成功 ===');
console.log('服务地址: http://localhost:' + this.port);
console.log('Web管理界面: http://localhost:' + this.port);
console.log(' - 仪表板: http://localhost:' + this.port + '/index.html');
console.log(' - 代理管理: http://localhost:' + this.port + '/proxies.html');
console.log(' - 执行历史: http://localhost:' + this.port + '/history.html');
console.log(' - 系统监控: http://localhost:' + this.port + '/monitoring.html');
console.log('API文档: http://localhost:' + this.port + '/api/info');
console.log('健康检查: http://localhost:' + this.port + '/api/health');
console.log('定时任务已自动启动');
});
// 设置服务器超时
this.server.timeout = 30000; // 30秒超时
} catch (error) {
console.error('服务启动失败:', error);
process.exit(1);
}
}
async stop() {
if (this.server) {
this.server.close();
}
}
}
// 创建并启动应用
const app = new ProxyApp();
app.start().catch(error => {
console.error('应用启动失败:', error);
process.exit(1);
});
module.exports = ProxyApp;

82
src/database/db.js Normal file
View File

@@ -0,0 +1,82 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const config = require('../../config/database');
class Database {
constructor() {
this.db = null;
}
connect() {
return new Promise((resolve, reject) => {
this.db = new sqlite3.Database(
config.development.storage,
sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE,
(err) => {
if (err) {
console.error('数据库连接失败:', err.message);
reject(err);
} else {
console.log('已连接到 SQLite 数据库');
resolve();
}
}
);
});
}
close() {
return new Promise((resolve, reject) => {
if (this.db) {
this.db.close((err) => {
if (err) {
reject(err);
} else {
console.log('数据库连接已关闭');
resolve();
}
});
} else {
resolve();
}
});
}
run(sql, params = []) {
return new Promise((resolve, reject) => {
this.db.run(sql, params, function(err) {
if (err) {
reject(err);
} else {
resolve({ id: this.lastID, changes: this.changes });
}
});
});
}
get(sql, params = []) {
return new Promise((resolve, reject) => {
this.db.get(sql, params, (err, row) => {
if (err) {
reject(err);
} else {
resolve(row);
}
});
});
}
all(sql, params = []) {
return new Promise((resolve, reject) => {
this.db.all(sql, params, (err, rows) => {
if (err) {
reject(err);
} else {
resolve(rows);
}
});
});
}
}
module.exports = new Database();

View File

@@ -0,0 +1,260 @@
const db = require('../db');
class HistoryModel {
static async initTable() {
const sql = `
CREATE TABLE IF NOT EXISTS execution_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_type TEXT NOT NULL,
task_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
start_time DATETIME NOT NULL,
end_time DATETIME,
duration INTEGER,
result_summary TEXT,
error_message TEXT,
details TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
`;
try {
await db.run(sql);
console.log('执行历史表创建成功或已存在');
} catch (error) {
console.error('创建执行历史表失败:', error);
throw error;
}
}
static async create(historyData) {
const sql = `
INSERT INTO execution_history
(task_type, task_name, status, start_time, end_time, duration, result_summary, error_message, details)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
const params = [
historyData.task_type,
historyData.task_name,
historyData.status || 'pending',
historyData.start_time,
historyData.end_time || null,
historyData.duration || null,
historyData.result_summary || null,
historyData.error_message || null,
historyData.details ? JSON.stringify(historyData.details) : null
];
try {
const result = await db.run(sql, params);
return result;
} catch (error) {
console.error('创建执行历史记录失败:', error);
throw error;
}
}
static async update(id, updateData) {
const fields = [];
const params = [];
Object.keys(updateData).forEach(key => {
if (updateData[key] !== undefined) {
fields.push(`${key} = ?`);
if (key === 'details') {
params.push(JSON.stringify(updateData[key]));
} else {
params.push(updateData[key]);
}
}
});
if (fields.length === 0) {
throw new Error('没有要更新的字段');
}
const sql = `
UPDATE execution_history
SET ${fields.join(', ')}
WHERE id = ?
`;
params.push(id);
try {
const result = await db.run(sql, params);
return result;
} catch (error) {
console.error('更新执行历史记录失败:', error);
throw error;
}
}
static async findAll(taskType = null, status = null, limit = 50, offset = 0) {
let sql = 'SELECT * FROM execution_history WHERE 1=1';
const params = [];
if (taskType) {
sql += ' AND task_type = ?';
params.push(taskType);
}
if (status) {
sql += ' AND status = ?';
params.push(status);
}
sql += ' ORDER BY start_time DESC LIMIT ? OFFSET ?';
params.push(limit, offset);
try {
const histories = await db.all(sql, params);
// 解析details字段
return histories.map(history => ({
...history,
details: history.details ? JSON.parse(history.details) : null
}));
} catch (error) {
console.error('查询执行历史失败:', error);
throw error;
}
}
static async findById(id) {
const sql = 'SELECT * FROM execution_history WHERE id = ?';
try {
const history = await db.get(sql, [id]);
if (history && history.details) {
history.details = JSON.parse(history.details);
}
return history;
} catch (error) {
console.error('根据ID查询执行历史失败:', error);
throw error;
}
}
static async getLatest(taskType, limit = 5) {
const sql = 'SELECT * FROM execution_history WHERE task_type = ? ORDER BY start_time DESC LIMIT ?';
try {
const histories = await db.all(sql, [taskType, limit]);
// 解析details字段
return histories.map(history => ({
...history,
details: history.details ? JSON.parse(history.details) : null
}));
} catch (error) {
console.error('查询最新执行历史失败:', error);
throw error;
}
}
static async getStatistics(taskType = null, days = 7) {
let sql = `
SELECT
COUNT(*) as total,
COUNT(CASE WHEN status = 'success' THEN 1 END) as success,
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
COUNT(CASE WHEN status = 'running' THEN 1 END) as running,
AVG(duration) as avg_duration
FROM execution_history
WHERE start_time >= datetime('now', '-${days} days')
`;
const params = [];
if (taskType) {
sql += ' AND task_type = ?';
params.push(taskType);
}
try {
const stats = await db.get(sql, params);
return {
total: stats.total || 0,
success: stats.success || 0,
failed: stats.failed || 0,
running: stats.running || 0,
success_rate: stats.total > 0 ? ((stats.success / stats.total) * 100).toFixed(2) : '0',
avg_duration: stats.avg_duration ? Math.round(stats.avg_duration) : 0
};
} catch (error) {
console.error('查询执行历史统计失败:', error);
throw error;
}
}
static async getDailyStats(taskType = null, days = 7) {
let sql = `
SELECT
DATE(start_time) as date,
COUNT(*) as total,
COUNT(CASE WHEN status = 'success' THEN 1 END) as success,
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
AVG(duration) as avg_duration
FROM execution_history
WHERE start_time >= datetime('now', '-${days} days')
`;
const params = [];
if (taskType) {
sql += ' AND task_type = ?';
params.push(taskType);
}
sql += ' GROUP BY DATE(start_time) ORDER BY date DESC';
try {
const dailyStats = await db.all(sql, params);
return dailyStats;
} catch (error) {
console.error('查询每日统计失败:', error);
throw error;
}
}
static async deleteOldRecords(days = 30) {
const sql = 'DELETE FROM execution_history WHERE start_time < datetime("now", "-' + days + ' days")';
try {
const result = await db.run(sql);
return result;
} catch (error) {
console.error('删除旧执行历史记录失败:', error);
throw error;
}
}
static async count(taskType = null, status = null) {
let sql = 'SELECT COUNT(*) as count FROM execution_history WHERE 1=1';
const params = [];
if (taskType) {
sql += ' AND task_type = ?';
params.push(taskType);
}
if (status) {
sql += ' AND status = ?';
params.push(status);
}
try {
const result = await db.get(sql, params);
return result.count;
} catch (error) {
console.error('统计执行历史记录数量失败:', error);
throw error;
}
}
}
module.exports = HistoryModel;

287
src/database/models/logs.js Normal file
View File

@@ -0,0 +1,287 @@
const db = require('../db');
class LogsModel {
static async initTable() {
const sql = `
CREATE TABLE IF NOT EXISTS system_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
level TEXT NOT NULL,
message TEXT NOT NULL,
category TEXT,
details TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
source TEXT,
user_agent TEXT,
ip_address TEXT
)
`;
try {
await db.run(sql);
console.log('系统日志表创建成功或已存在');
} catch (error) {
console.error('创建系统日志表失败:', error);
throw error;
}
}
static async create(logData) {
const sql = `
INSERT INTO system_logs
(level, message, category, details, timestamp, source, user_agent, ip_address)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`;
const params = [
logData.level,
logData.message,
logData.category || null,
logData.details ? JSON.stringify(logData.details) : null,
logData.timestamp || new Date().toISOString(),
logData.source || 'system',
logData.user_agent || null,
logData.ip_address || null
];
try {
const result = await db.run(sql, params);
return result;
} catch (error) {
console.error('创建系统日志记录失败:', error);
throw error;
}
}
static async log(level, message, category = null, details = null, source = 'system') {
return await this.create({
level,
message,
category,
details,
source
});
}
static async logInfo(message, category = null, details = null, source = 'system') {
return await this.log('info', message, category, details, source);
}
static async logWarning(message, category = null, details = null, source = 'system') {
return await this.log('warning', message, category, details, source);
}
static async logError(message, category = null, details = null, source = 'system') {
return await this.log('error', message, category, details, source);
}
static async logDebug(message, category = null, details = null, source = 'system') {
return await this.log('debug', message, category, details, source);
}
static async findAll(level = null, category = null, limit = 100, offset = 0) {
let sql = 'SELECT * FROM system_logs WHERE 1=1';
const params = [];
if (level) {
sql += ' AND level = ?';
params.push(level);
}
if (category) {
sql += ' AND category = ?';
params.push(category);
}
sql += ' ORDER BY timestamp DESC LIMIT ? OFFSET ?';
params.push(limit, offset);
try {
const logs = await db.all(sql, params);
// 解析details字段
return logs.map(log => ({
...log,
details: log.details ? JSON.parse(log.details) : null
}));
} catch (error) {
console.error('查询系统日志失败:', error);
throw error;
}
}
static async findById(id) {
const sql = 'SELECT * FROM system_logs WHERE id = ?';
try {
const log = await db.get(sql, [id]);
if (log && log.details) {
log.details = JSON.parse(log.details);
}
return log;
} catch (error) {
console.error('根据ID查询系统日志失败:', error);
throw error;
}
}
static async getLatest(level = null, category = null, limit = 20) {
let sql = 'SELECT * FROM system_logs WHERE 1=1';
const params = [];
if (level) {
sql += ' AND level = ?';
params.push(level);
}
if (category) {
sql += ' AND category = ?';
params.push(category);
}
sql += ' ORDER BY timestamp DESC LIMIT ?';
params.push(limit);
try {
const logs = await db.all(sql, params);
// 解析details字段
return logs.map(log => ({
...log,
details: log.details ? JSON.parse(log.details) : null
}));
} catch (error) {
console.error('查询最新系统日志失败:', error);
throw error;
}
}
static async getStatistics(days = 7) {
const sql = `
SELECT
COUNT(*) as total,
COUNT(CASE WHEN level = 'error' THEN 1 END) as error,
COUNT(CASE WHEN level = 'warning' THEN 1 END) as warning,
COUNT(CASE WHEN level = 'info' THEN 1 END) as info,
COUNT(CASE WHEN level = 'debug' THEN 1 END) as debug
FROM system_logs
WHERE timestamp >= datetime('now', '-${days} days')
`;
try {
const stats = await db.get(sql);
return {
total: stats.total || 0,
error: stats.error || 0,
warning: stats.warning || 0,
info: stats.info || 0,
debug: stats.debug || 0
};
} catch (error) {
console.error('查询系统日志统计失败:', error);
throw error;
}
}
static async getHourlyStats(days = 1) {
const sql = `
SELECT
strftime('%Y-%m-%d %H:00:00', timestamp) as hour,
level,
COUNT(*) as count
FROM system_logs
WHERE timestamp >= datetime('now', '-${days} days')
GROUP BY hour, level
ORDER BY hour DESC
`;
try {
const hourlyStats = await db.all(sql);
return hourlyStats;
} catch (error) {
console.error('查询每小时统计失败:', error);
throw error;
}
}
static async search(keyword, level = null, category = null, limit = 50) {
let sql = 'SELECT * FROM system_logs WHERE message LIKE ?';
const params = [`%${keyword}%`];
if (level) {
sql += ' AND level = ?';
params.push(level);
}
if (category) {
sql += ' AND category = ?';
params.push(category);
}
sql += ' ORDER BY timestamp DESC LIMIT ?';
params.push(limit);
try {
const logs = await db.all(sql, params);
// 解析details字段
return logs.map(log => ({
...log,
details: log.details ? JSON.parse(log.details) : null
}));
} catch (error) {
console.error('搜索系统日志失败:', error);
throw error;
}
}
static async deleteOldRecords(days = 30) {
const sql = 'DELETE FROM system_logs WHERE timestamp < datetime("now", "-' + days + ' days")';
try {
const result = await db.run(sql);
return result;
} catch (error) {
console.error('删除旧系统日志记录失败:', error);
throw error;
}
}
static async count(level = null, category = null) {
let sql = 'SELECT COUNT(*) as count FROM system_logs WHERE 1=1';
const params = [];
if (level) {
sql += ' AND level = ?';
params.push(level);
}
if (category) {
sql += ' AND category = ?';
params.push(category);
}
try {
const result = await db.get(sql, params);
return result.count;
} catch (error) {
console.error('统计系统日志记录数量失败:', error);
throw error;
}
}
static async getCategories() {
const sql = 'SELECT DISTINCT category FROM system_logs WHERE category IS NOT NULL ORDER BY category';
try {
const categories = await db.all(sql);
return categories.map(row => row.category);
} catch (error) {
console.error('获取日志分类失败:', error);
throw error;
}
}
}
module.exports = LogsModel;

View File

@@ -0,0 +1,178 @@
const db = require('../db');
class ProxyModel {
static async initTable() {
const sql = `
CREATE TABLE IF NOT EXISTS proxies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ip TEXT NOT NULL,
port INTEGER NOT NULL,
location TEXT,
speed INTEGER,
last_check_time TEXT,
is_valid INTEGER DEFAULT 1,
response_time INTEGER,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(ip, port)
)
`;
try {
await db.run(sql);
console.log('代理表创建成功或已存在');
} catch (error) {
console.error('创建代理表失败:', error);
throw error;
}
}
static async create(proxyData) {
const sql = `
INSERT OR REPLACE INTO proxies
(ip, port, location, speed, last_check_time, is_valid, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
`;
const params = [
proxyData.ip,
proxyData.port,
proxyData.location || null,
proxyData.speed || null,
proxyData.last_check_time || null,
proxyData.is_valid !== undefined ? proxyData.is_valid : 1
];
try {
const result = await db.run(sql, params);
return result;
} catch (error) {
console.error('插入代理数据失败:', error);
throw error;
}
}
static async findAll(validOnly = true) {
let sql = 'SELECT * FROM proxies';
const params = [];
if (validOnly) {
sql += ' WHERE is_valid = 1';
}
sql += ' ORDER BY response_time ASC, created_at DESC';
try {
const proxies = await db.all(sql, params);
return proxies;
} catch (error) {
console.error('查询代理数据失败:', error);
throw error;
}
}
static async findRandom(validOnly = true, limit = 1) {
let sql = 'SELECT * FROM proxies';
const params = [];
if (validOnly) {
sql += ' WHERE is_valid = 1';
}
sql += ' ORDER BY RANDOM() LIMIT ?';
params.push(limit);
try {
const proxies = await db.all(sql, params);
return limit === 1 ? proxies[0] : proxies;
} catch (error) {
console.error('查询随机代理失败:', error);
throw error;
}
}
static async findByIpAndPort(ip, port) {
const sql = 'SELECT * FROM proxies WHERE ip = ? AND port = ?';
try {
const proxy = await db.get(sql, [ip, port]);
return proxy;
} catch (error) {
console.error('根据IP和端口查询代理失败:', error);
throw error;
}
}
static async updateValidity(ip, port, isValid, responseTime = null) {
const sql = `
UPDATE proxies
SET is_valid = ?, response_time = ?, updated_at = datetime('now')
WHERE ip = ? AND port = ?
`;
try {
const result = await db.run(sql, [isValid, responseTime, ip, port]);
return result;
} catch (error) {
console.error('更新代理有效性失败:', error);
throw error;
}
}
static async deleteInvalid() {
const sql = 'DELETE FROM proxies WHERE is_valid = 0';
try {
const result = await db.run(sql);
return result;
} catch (error) {
console.error('删除无效代理失败:', error);
throw error;
}
}
static async count(validOnly = true) {
let sql = 'SELECT COUNT(*) as count FROM proxies';
if (validOnly) {
sql += ' WHERE is_valid = 1';
}
try {
const result = await db.get(sql);
return result.count;
} catch (error) {
console.error('统计代理数量失败:', error);
throw error;
}
}
static async all(sql, params = []) {
try {
const proxies = await db.all(sql, params);
return proxies;
} catch (error) {
console.error('执行SQL查询失败:', error);
throw error;
}
}
static async findAllForValidation(limit = 50) {
const sql = `
SELECT * FROM proxies
WHERE is_valid = 1
ORDER BY updated_at ASC
LIMIT ?
`;
try {
const proxies = await db.all(sql, [limit]);
return proxies;
} catch (error) {
console.error('查询待验证代理失败:', error);
throw error;
}
}
}
module.exports = ProxyModel;

411
src/routes/dashboard.js Normal file
View File

@@ -0,0 +1,411 @@
const express = require('express');
const router = express.Router();
const ProxyModel = require('../database/models/proxy');
const HistoryModel = require('../database/models/history');
const LogsModel = require('../database/models/logs');
const ProxyValidator = require('../services/validator');
const ProxyScraper = require('../services/scraper');
const ProxyScheduler = require('../services/scheduler');
const validator = new ProxyValidator();
const scraper = new ProxyScraper();
// 获取仪表板统计数据
router.get('/stats', async (req, res) => {
try {
// 代理统计
const proxyStats = await validator.getProxyStatistics();
// 执行历史统计
const scrapeStats = await HistoryModel.getStatistics('scrape', 7);
const validationStats = await HistoryModel.getStatistics('validation', 7);
// 系统日志统计
const logStats = await LogsModel.getStatistics(7);
// 最近执行历史
const latestScrape = await HistoryModel.getLatest('scrape', 3);
const latestValidation = await HistoryModel.getLatest('validation', 3);
// 每日统计数据
const dailyProxyStats = await getDailyProxyStats(7);
const dailyTaskStats = await getDailyTaskStats(7);
res.json({
success: true,
data: {
proxies: proxyStats,
tasks: {
scrape: scrapeStats,
validation: validationStats
},
logs: logStats,
latest: {
scrape: latestScrape,
validation: latestValidation
},
charts: {
daily_proxies: dailyProxyStats,
daily_tasks: dailyTaskStats
},
timestamp: new Date().toISOString()
}
});
} catch (error) {
console.error('获取仪表板统计数据失败:', error);
res.status(500).json({
success: false,
error: '获取统计数据失败',
message: error.message
});
}
});
// 获取实时系统状态
router.get('/status', async (req, res) => {
try {
// 代理统计
const proxyStats = await validator.getProxyStatistics();
// 当前时间
const now = new Date();
// 今天的执行历史
const todayScrape = await HistoryModel.getStatistics('scrape', 1);
const todayValidation = await HistoryModel.getStatistics('validation', 1);
// 最近的日志
const recentLogs = await LogsModel.getLatest(null, null, 5);
// 系统运行时间
const uptime = process.uptime();
// 内存使用情况
const memUsage = process.memoryUsage();
res.json({
success: true,
data: {
timestamp: now.toISOString(),
uptime: uptime,
memory: {
rss: Math.round(memUsage.rss / 1024 / 1024), // MB
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024), // MB
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024) // MB
},
proxies: proxyStats,
today_tasks: {
scrape: todayScrape,
validation: todayValidation
},
recent_logs: recentLogs,
next_runs: getNextRunTimes()
}
});
} catch (error) {
console.error('获取实时状态失败:', error);
res.status(500).json({
success: false,
error: '获取状态失败',
message: error.message
});
}
});
// 获取代理统计图表数据
router.get('/charts/proxies', async (req, res) => {
try {
const days = parseInt(req.query.days) || 7;
const chartData = await getDailyProxyStats(days);
res.json({
success: true,
data: chartData
});
} catch (error) {
console.error('获取代理图表数据失败:', error);
res.status(500).json({
success: false,
error: '获取图表数据失败',
message: error.message
});
}
});
// 获取任务执行图表数据
router.get('/charts/tasks', async (req, res) => {
try {
const days = parseInt(req.query.days) || 7;
const chartData = await getDailyTaskStats(days);
res.json({
success: true,
data: chartData
});
} catch (error) {
console.error('获取任务图表数据失败:', error);
res.status(500).json({
success: false,
error: '获取图表数据失败',
message: error.message
});
}
});
// 获取日志统计图表数据
router.get('/charts/logs', async (req, res) => {
try {
const days = parseInt(req.query.days) || 7;
const chartData = await LogsModel.getHourlyStats(days);
res.json({
success: true,
data: chartData
});
} catch (error) {
console.error('获取日志图表数据失败:', error);
res.status(500).json({
success: false,
error: '获取图表数据失败',
message: error.message
});
}
});
// 快速操作接口
router.post('/actions/scrape', async (req, res) => {
try {
// 创建调度器实例
const scheduler = require('../services/scheduler');
const ProxyScheduler = require('../services/scheduler');
const schedulerInstance = new ProxyScheduler();
// 检查是否有抓取任务正在进行
const ProxyScraper = require('../services/scraper');
if (ProxyScraper.isScrapingInProgress()) {
return res.status(409).json({
success: false,
error: '有抓取任务正在进行',
message: '请等待当前抓取任务完成后再试'
});
}
// 创建执行历史记录
const historyId = await createTaskHistory('scrape', '手动触发抓取任务');
// 异步执行定时抓取任务(与定时任务相同的逻辑)
schedulerInstance.runScrapeTask().then(async (result) => {
if (result.skipped) {
await updateTaskHistory(historyId, 'skipped', result, result.message);
await LogsModel.logInfo(`手动抓取任务跳过: ${result.message}`, 'manual_action', result);
} else {
await updateTaskHistory(historyId, 'success', {
scraped: result.scraped,
total: result.total,
valid: result.valid
});
await LogsModel.logInfo(`手动抓取任务完成: 抓取${result.scraped}个代理`, 'manual_action', result);
}
}).catch(async (error) => {
await updateTaskHistory(historyId, 'failed', null, error.message);
await LogsModel.logError(`手动抓取任务失败: ${error.message}`, 'manual_action', { error: error.message });
});
res.json({
success: true,
message: '已触发定时抓取任务',
history_id: historyId
});
} catch (error) {
console.error('启动抓取任务失败:', error);
res.status(500).json({
success: false,
error: '启动抓取任务失败',
message: error.message
});
}
});
router.post('/actions/validate', async (req, res) => {
try {
const { limit = 50 } = req.body;
// 创建执行历史记录
const historyId = await createTaskHistory('validation', '手动验证任务');
// 异步执行验证任务
validator.validateDatabaseProxies(limit).then(async (result) => {
await updateTaskHistory(historyId, 'success', {
validated: result.validated,
valid: result.valid,
invalid: result.invalid
});
await LogsModel.logInfo(`手动验证任务完成: 验证${result.validated}个代理`, 'manual_action', result);
}).catch(async (error) => {
await updateTaskHistory(historyId, 'failed', null, error.message);
await LogsModel.logError(`手动验证任务失败: ${error.message}`, 'manual_action', { error: error.message });
});
res.json({
success: true,
message: `验证任务已启动,将验证 ${limit} 个代理`,
history_id: historyId
});
} catch (error) {
console.error('启动验证任务失败:', error);
res.status(500).json({
success: false,
error: '启动验证任务失败',
message: error.message
});
}
});
// 辅助函数
// 获取每日代理统计
async function getDailyProxyStats(days) {
const db = require('../database/db');
const sql = `
SELECT
DATE(created_at) as date,
COUNT(*) as total_added,
COUNT(CASE WHEN is_valid = 1 THEN 1 END) as valid_added
FROM proxies
WHERE created_at >= datetime('now', '-${days} days')
GROUP BY DATE(created_at)
ORDER BY date
`;
try {
const dailyStats = await db.all(sql);
return dailyStats;
} catch (error) {
console.error('获取每日代理统计失败:', error);
return [];
}
}
// 获取每日任务统计
async function getDailyTaskStats(days) {
try {
const scrapeStats = await HistoryModel.getDailyStats('scrape', days);
const validationStats = await HistoryModel.getDailyStats('validation', days);
// 合并数据
const mergedStats = {};
const dates = new Set();
scrapeStats.forEach(stat => {
mergedStats[stat.date] = {
date: stat.date,
scrape_total: stat.total,
scrape_success: stat.success,
scrape_failed: stat.failed,
validation_total: 0,
validation_success: 0,
validation_failed: 0
};
dates.add(stat.date);
});
validationStats.forEach(stat => {
if (mergedStats[stat.date]) {
mergedStats[stat.date].validation_total = stat.total;
mergedStats[stat.date].validation_success = stat.success;
mergedStats[stat.date].validation_failed = stat.failed;
} else {
mergedStats[stat.date] = {
date: stat.date,
scrape_total: 0,
scrape_success: 0,
scrape_failed: 0,
validation_total: stat.total,
validation_success: stat.success,
validation_failed: stat.failed
};
dates.add(stat.date);
}
});
return Object.values(mergedStats).sort((a, b) => new Date(a.date) - new Date(b.date));
} catch (error) {
console.error('获取每日任务统计失败:', error);
return [];
}
}
// 获取下次执行时间
function getNextRunTimes() {
const now = new Date();
const nextScrape = new Date(now);
const nextValidation = new Date(now);
const nextHealthCheck = new Date(now);
// 下次抓取时间(每小时整点)
nextScrape.setHours(nextScrape.getHours() + 1, 0, 0, 0);
// 下次验证时间每10分钟
const minutes = now.getMinutes();
const nextMinute = Math.ceil((minutes + 1) / 10) * 10;
nextValidation.setMinutes(nextMinute, 0, 0);
// 下次健康检查时间每小时30分
if (now.getMinutes() < 30) {
nextHealthCheck.setHours(now.getHours(), 30, 0, 0);
} else {
nextHealthCheck.setHours(now.getHours() + 1, 30, 0, 0);
}
return {
scrape: nextScrape.toISOString(),
validation: nextValidation.toISOString(),
healthCheck: nextHealthCheck.toISOString()
};
}
// 创建任务历史记录
async function createTaskHistory(taskType, taskName) {
try {
const result = await HistoryModel.create({
task_type: taskType,
task_name: taskName,
status: 'running',
start_time: new Date().toISOString()
});
return result.id;
} catch (error) {
console.error('创建任务历史记录失败:', error);
throw error;
}
}
// 更新任务历史记录
async function updateTaskHistory(id, status, details = null, errorMessage = null) {
try {
const updateData = {
status: status,
end_time: new Date().toISOString(),
duration: null,
details: details,
error_message: errorMessage
};
// 计算执行时长
const history = await HistoryModel.findById(id);
if (history && history.start_time) {
const startTime = new Date(history.start_time);
const endTime = new Date();
updateData.duration = endTime.getTime() - startTime.getTime();
}
await HistoryModel.update(id, updateData);
} catch (error) {
console.error('更新任务历史记录失败:', error);
throw error;
}
}
module.exports = router;

337
src/routes/history.js Normal file
View File

@@ -0,0 +1,337 @@
const express = require('express');
const router = express.Router();
const HistoryModel = require('../database/models/history');
const LogsModel = require('../database/models/logs');
// 获取执行历史列表
router.get('/', async (req, res) => {
try {
const taskType = req.query.taskType || null;
const status = req.query.status || null;
const limit = Math.min(parseInt(req.query.limit) || 50, 200);
const offset = parseInt(req.query.offset) || 0;
const histories = await HistoryModel.findAll(taskType, status, limit, offset);
const totalCount = await HistoryModel.count(taskType, status);
res.json({
success: true,
data: histories,
pagination: {
total: totalCount,
limit: limit,
offset: offset,
hasMore: offset + limit < totalCount
}
});
} catch (error) {
console.error('获取执行历史失败:', error);
res.status(500).json({
success: false,
error: '获取执行历史失败',
message: error.message
});
}
});
// 获取执行历史详情
router.get('/:id', async (req, res) => {
try {
const id = parseInt(req.params.id);
if (isNaN(id)) {
return res.status(400).json({
success: false,
error: '无效的历史记录ID'
});
}
const history = await HistoryModel.findById(id);
if (!history) {
return res.status(404).json({
success: false,
error: '未找到指定的历史记录'
});
}
res.json({
success: true,
data: history
});
} catch (error) {
console.error('获取执行历史详情失败:', error);
res.status(500).json({
success: false,
error: '获取执行历史详情失败',
message: error.message
});
}
});
// 获取执行历史统计
router.get('/stats/summary', async (req, res) => {
try {
const taskType = req.query.taskType || null;
const days = Math.min(parseInt(req.query.days) || 7, 30);
const stats = await HistoryModel.getStatistics(taskType, days);
const dailyStats = await HistoryModel.getDailyStats(taskType, days);
res.json({
success: true,
data: {
summary: stats,
daily: dailyStats
}
});
} catch (error) {
console.error('获取执行历史统计失败:', error);
res.status(500).json({
success: false,
error: '获取执行历史统计失败',
message: error.message
});
}
});
// 获取系统日志列表
router.get('/logs/system', async (req, res) => {
try {
const level = req.query.level || null;
const category = req.query.category || null;
const limit = Math.min(parseInt(req.query.limit) || 100, 500);
const offset = parseInt(req.query.offset) || 0;
const logs = await LogsModel.findAll(level, category, limit, offset);
const totalCount = await LogsModel.count(level, category);
res.json({
success: true,
data: logs,
pagination: {
total: totalCount,
limit: limit,
offset: offset,
hasMore: offset + limit < totalCount
}
});
} catch (error) {
console.error('获取系统日志失败:', error);
res.status(500).json({
success: false,
error: '获取系统日志失败',
message: error.message
});
}
});
// 搜索系统日志
router.get('/logs/search', async (req, res) => {
try {
const keyword = req.query.keyword;
const level = req.query.level || null;
const category = req.query.category || null;
const limit = Math.min(parseInt(req.query.limit) || 50, 200);
if (!keyword || keyword.trim().length === 0) {
return res.status(400).json({
success: false,
error: '搜索关键词不能为空'
});
}
const logs = await LogsModel.search(keyword.trim(), level, category, limit);
res.json({
success: true,
data: logs,
count: logs.length
});
} catch (error) {
console.error('搜索系统日志失败:', error);
res.status(500).json({
success: false,
error: '搜索系统日志失败',
message: error.message
});
}
});
// 获取日志统计
router.get('/logs/stats', async (req, res) => {
try {
const days = Math.min(parseInt(req.query.days) || 7, 30);
const stats = await LogsModel.getStatistics(days);
const hourlyStats = await LogsModel.getHourlyStats(days);
const categories = await LogsModel.getCategories();
res.json({
success: true,
data: {
summary: stats,
hourly: hourlyStats,
categories: categories
}
});
} catch (error) {
console.error('获取日志统计失败:', error);
res.status(500).json({
success: false,
error: '获取日志统计失败',
message: error.message
});
}
});
// 获取日志分类
router.get('/logs/categories', async (req, res) => {
try {
const categories = await LogsModel.getCategories();
res.json({
success: true,
data: categories
});
} catch (error) {
console.error('获取日志分类失败:', error);
res.status(500).json({
success: false,
error: '获取日志分类失败',
message: error.message
});
}
});
// 清理旧记录
router.delete('/cleanup', async (req, res) => {
try {
const { days = 30, type = 'all' } = req.body;
if (days < 1 || days > 365) {
return res.status(400).json({
success: false,
error: '保留天数必须在1-365之间'
});
}
let deletedHistory = 0;
let deletedLogs = 0;
if (type === 'all' || type === 'history') {
const historyResult = await HistoryModel.deleteOldRecords(days);
deletedHistory = historyResult.changes || 0;
}
if (type === 'all' || type === 'logs') {
const logsResult = await LogsModel.deleteOldRecords(days);
deletedLogs = logsResult.changes || 0;
}
res.json({
success: true,
message: `清理完成,删除了 ${deletedHistory} 条历史记录和 ${deletedLogs} 条日志记录`,
data: {
deleted_history: deletedHistory,
deleted_logs: deletedLogs,
days: days
}
});
// 记录清理操作日志
await LogsModel.logInfo(
`手动清理旧记录:删除${deletedHistory}条历史记录和${deletedLogs}条日志记录`,
'cleanup',
{ deleted_history: deletedHistory, deleted_logs: deletedLogs, days: days },
'manual_action'
);
} catch (error) {
console.error('清理旧记录失败:', error);
res.status(500).json({
success: false,
error: '清理旧记录失败',
message: error.message
});
}
});
// 获取任务类型统计
router.get('/task-types', async (req, res) => {
try {
const sql = `
SELECT DISTINCT task_type
FROM execution_history
ORDER BY task_type
`;
const taskTypes = await HistoryModel.all(sql);
res.json({
success: true,
data: taskTypes.map(row => row.task_type)
});
} catch (error) {
console.error('获取任务类型失败:', error);
res.status(500).json({
success: false,
error: '获取任务类型失败',
message: error.message
});
}
});
// 导出历史记录
router.get('/export', async (req, res) => {
try {
const taskType = req.query.taskType || null;
const status = req.query.status || null;
const format = req.query.format || 'json';
const histories = await HistoryModel.findAll(taskType, status, 1000, 0);
if (format === 'csv') {
// 生成CSV格式
const csvHeader = 'ID,任务类型,任务名称,状态,开始时间,结束时间,执行时长(ms),结果摘要,错误信息\n';
const csvData = histories.map(history => {
return [
history.id,
history.task_type,
history.task_name,
history.status,
history.start_time,
history.end_time || '',
history.duration || '',
`"${(history.result_summary || '').replace(/"/g, '""')}"`,
`"${(history.error_message || '').replace(/"/g, '""')}"`
].join(',');
}).join('\n');
const csv = csvHeader + csvData;
res.setHeader('Content-Type', 'text/csv');
res.setHeader('Content-Disposition', `attachment; filename=execution_history_${new Date().toISOString().slice(0, 10)}.csv`);
res.send(csv);
} else {
// JSON格式
res.setHeader('Content-Type', 'application/json');
res.setHeader('Content-Disposition', `attachment; filename=execution_history_${new Date().toISOString().slice(0, 10)}.json`);
res.json({
success: true,
export_time: new Date().toISOString(),
filters: { taskType, status },
count: histories.length,
data: histories
});
}
} catch (error) {
console.error('导出历史记录失败:', error);
res.status(500).json({
success: false,
error: '导出历史记录失败',
message: error.message
});
}
});
module.exports = router;

485
src/routes/proxies.js Normal file
View File

@@ -0,0 +1,485 @@
const express = require('express');
const router = express.Router();
const ProxyModel = require('../database/models/proxy');
const ProxyValidator = require('../services/validator');
const ProxyScraper = require('../services/scraper');
const validator = new ProxyValidator();
const scraper = new ProxyScraper();
// 获取所有可用代理
router.get('/', async (req, res) => {
try {
const limit = parseInt(req.query.limit) || 100;
const offset = parseInt(req.query.offset) || 0;
const sortBy = req.query.sortBy || 'response_time';
const order = req.query.order || 'ASC';
let sql = 'SELECT * FROM proxies WHERE is_valid = 1';
const params = [];
// 添加排序
const allowedSortFields = ['response_time', 'created_at', 'updated_at', 'ip', 'port'];
if (allowedSortFields.includes(sortBy)) {
sql += ` ORDER BY ${sortBy} ${order.toUpperCase() === 'DESC' ? 'DESC' : 'ASC'}`;
} else {
sql += ' ORDER BY response_time ASC';
}
// 添加分页
sql += ' LIMIT ? OFFSET ?';
params.push(limit, offset);
const proxies = await ProxyModel.all(sql, params);
const totalCount = await ProxyModel.count(true);
res.json({
success: true,
data: proxies,
pagination: {
total: totalCount,
limit: limit,
offset: offset,
hasMore: offset + limit < totalCount
}
});
} catch (error) {
console.error('获取代理列表失败:', error);
res.status(500).json({
success: false,
error: '获取代理列表失败',
message: error.message
});
}
});
// 获取随机可用代理
router.get('/random', async (req, res) => {
try {
const count = Math.min(parseInt(req.query.count) || 1, 10); // 最多返回10个
if (count === 1) {
const proxy = await ProxyModel.findRandom(true, 1);
if (!proxy) {
return res.status(404).json({
success: false,
error: '没有可用的代理'
});
}
res.json({
success: true,
data: proxy
});
} else {
const proxies = await ProxyModel.findRandom(true, count);
res.json({
success: true,
data: proxies,
count: proxies.length
});
}
} catch (error) {
console.error('获取随机代理失败:', error);
res.status(500).json({
success: false,
error: '获取随机代理失败',
message: error.message
});
}
});
// 获取代理统计信息
router.get('/stats', async (req, res) => {
try {
const stats = await validator.getProxyStatistics();
res.json({
success: true,
data: stats
});
} catch (error) {
console.error('获取统计信息失败:', error);
res.status(500).json({
success: false,
error: '获取统计信息失败',
message: error.message
});
}
});
// 手动验证代理
router.post('/verify', async (req, res) => {
try {
const { ip, port } = req.body;
if (!ip || !port) {
return res.status(400).json({
success: false,
error: 'IP和端口不能为空'
});
}
const result = await validator.validateSingleProxy(ip, port);
res.json({
success: true,
data: result
});
} catch (error) {
console.error('验证代理失败:', error);
res.status(500).json({
success: false,
error: '验证代理失败',
message: error.message
});
}
});
// 批量验证代理
router.post('/verify-batch', async (req, res) => {
try {
const { proxies } = req.body;
if (!Array.isArray(proxies) || proxies.length === 0) {
return res.status(400).json({
success: false,
error: '代理列表不能为空'
});
}
// 验证格式
const validProxies = proxies.filter(p => p.ip && p.port);
if (validProxies.length === 0) {
return res.status(400).json({
success: false,
error: '没有有效的代理格式'
});
}
const results = await validator.validateMultipleProxies(validProxies, 3);
res.json({
success: true,
data: {
validated: results.length,
valid: results.filter(r => r.isValid).length,
invalid: results.filter(r => !r.isValid).length,
results: results
}
});
} catch (error) {
console.error('批量验证代理失败:', error);
res.status(500).json({
success: false,
error: '批量验证代理失败',
message: error.message
});
}
});
// 手动触发抓取任务
router.post('/scrape', async (req, res) => {
try {
const { pages = 40 } = req.body;
// if (pages < 1 || pages > 10) {
// return res.status(400).json({
// success: false,
// error: '页数必须在1-10之间'
// });
// }
// 检查是否有抓取任务正在进行
const ProxyScraper = require('../services/scraper');
if (ProxyScraper.isScrapingInProgress()) {
return res.status(409).json({
success: false,
error: '有抓取任务正在进行',
message: '请等待当前抓取任务完成后再试'
});
}
console.log(`手动触发抓取任务,页数: ${pages}`);
// 异步执行抓取任务
scraper.scrapeMultiplePages(pages).then(result => {
console.log('手动抓取任务完成:', result);
}).catch(error => {
console.error('手动抓取任务失败:', error);
});
res.json({
success: true,
message: `抓取任务已启动,将抓取前 ${pages}`
});
} catch (error) {
console.error('触发抓取任务失败:', error);
res.status(500).json({
success: false,
error: '触发抓取任务失败',
message: error.message
});
}
});
// 手动触发验证任务
router.post('/validate-all', async (req, res) => {
try {
const { limit = 50 } = req.body;
if (limit < 1 || limit > 200) {
return res.status(400).json({
success: false,
error: '验证数量必须在1-200之间'
});
}
console.log(`手动触发验证任务,数量: ${limit}`);
// 异步执行验证任务
validator.validateDatabaseProxies(limit).then(result => {
console.log('手动验证任务完成:', result);
}).catch(error => {
console.error('手动验证任务失败:', error);
});
res.json({
success: true,
message: `验证任务已启动,将验证 ${limit} 个代理`
});
} catch (error) {
console.error('触发验证任务失败:', error);
res.status(500).json({
success: false,
error: '触发验证任务失败',
message: error.message
});
}
});
// 根据IP和端口查找代理
router.get('/search', async (req, res) => {
try {
const { ip, port } = req.query;
if (!ip || !port) {
return res.status(400).json({
success: false,
error: 'IP和端口参数不能为空'
});
}
const proxy = await ProxyModel.findByIpAndPort(ip, port);
if (!proxy) {
return res.status(404).json({
success: false,
error: '未找到指定的代理'
});
}
res.json({
success: true,
data: proxy
});
} catch (error) {
console.error('查找代理失败:', error);
res.status(500).json({
success: false,
error: '查找代理失败',
message: error.message
});
}
});
// 删除无效代理
router.delete('/cleanup', async (req, res) => {
try {
const result = await ProxyModel.deleteInvalid();
res.json({
success: true,
message: `已删除 ${result.changes} 个无效代理`
});
} catch (error) {
console.error('清理无效代理失败:', error);
res.status(500).json({
success: false,
error: '清理无效代理失败',
message: error.message
});
}
});
// 检查抓取任务状态
router.get('/scraping-status', (req, res) => {
try {
const ProxyScraper = require('../services/scraper');
const isInProgress = ProxyScraper.isScrapingInProgress();
res.json({
success: true,
data: {
isScrapingInProgress: isInProgress,
message: isInProgress ? '有抓取任务正在进行' : '当前没有抓取任务'
}
});
} catch (error) {
console.error('获取抓取状态失败:', error);
res.status(500).json({
success: false,
error: '获取抓取状态失败',
message: error.message
});
}
});
// 强制清除抓取标志(用于异常情况)
router.post('/clear-scraping-flag', (req, res) => {
try {
const ProxyScraper = require('../services/scraper');
ProxyScraper.clearScrapingFlag();
res.json({
success: true,
message: '已强制清除抓取进行中标志'
});
} catch (error) {
console.error('清除抓取标志失败:', error);
res.status(500).json({
success: false,
error: '清除抓取标志失败',
message: error.message
});
}
});
// 导入代理JSON
router.post('/import', async (req, res) => {
try {
const { proxies } = req.body;
if (!Array.isArray(proxies) || proxies.length === 0) {
return res.status(400).json({
success: false,
error: '代理列表不能为空'
});
}
console.log(`开始导入 ${proxies.length} 个代理`);
// 验证格式并转换为标准格式
const validProxies = [];
const invalidProxies = [];
for (const proxy of proxies) {
let ip, port;
// 支持多种字段名
if (proxy.ip_address && proxy.port) {
ip = proxy.ip_address;
port = proxy.port;
} else if (proxy.ip && proxy.port) {
ip = proxy.ip;
port = proxy.port;
} else if (proxy.host && proxy.port) {
ip = proxy.host;
port = proxy.port;
} else {
invalidProxies.push(proxy);
continue;
}
// 验证IP和端口格式
if (/^\d+\.\d+\.\d+\.\d+$/.test(ip) && /^\d+$/.test(port)) {
const portNum = parseInt(port);
if (portNum > 0 && portNum <= 65535) {
validProxies.push({
ip: ip,
port: portNum,
location: proxy.location || proxy.area || null,
speed: proxy.speed || null,
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
});
} else {
invalidProxies.push(proxy);
}
} else {
invalidProxies.push(proxy);
}
}
if (validProxies.length === 0) {
return res.status(400).json({
success: false,
error: '没有有效的代理格式',
details: {
total: proxies.length,
valid: 0,
invalid: invalidProxies.length
}
});
}
console.log(`验证完成,有效代理: ${validProxies.length},无效代理: ${invalidProxies.length}`);
// 立即验证代理
const ProxyValidator = require('../services/validator');
const validator = new ProxyValidator();
const validationResult = await validator.validateMultipleProxies(validProxies, 10);
// 保存验证通过的代理到数据库
let savedCount = 0;
for (const result of validationResult) {
if (result.isValid) {
try {
await ProxyModel.create({
ip: result.ip,
port: result.port,
location: null,
speed: null,
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' '),
is_valid: 1,
response_time: result.responseTime
});
savedCount++;
console.log(`✓ 保存代理: ${result.ip}:${result.port} - ${result.responseTime}ms`);
} catch (error) {
if (!error.message.includes('UNIQUE constraint failed')) {
console.warn(`保存代理 ${result.ip}:${result.port} 失败:`, error.message);
}
}
}
}
const validCount = validationResult.filter(r => r.isValid).length;
console.log(`导入完成: 总数 ${proxies.length},格式有效 ${validProxies.length},验证通过 ${validCount},保存成功 ${savedCount}`);
res.json({
success: true,
data: {
total: proxies.length,
format_valid: validProxies.length,
format_invalid: invalidProxies.length,
validated: validationResult.length,
valid: validCount,
invalid: validationResult.length - validCount,
saved: savedCount,
results: validationResult
},
message: `导入完成:保存了 ${savedCount} 个可用代理`
});
} catch (error) {
console.error('导入代理失败:', error);
res.status(500).json({
success: false,
error: '导入代理失败',
message: error.message
});
}
});
module.exports = router;

398
src/services/scheduler.js Normal file
View File

@@ -0,0 +1,398 @@
const cron = require('node-cron');
const ProxyScraper = require('./scraper');
const ProxyValidator = require('./validator');
const ProxyModel = require('../database/models/proxy');
const HistoryModel = require('../database/models/history');
const LogsModel = require('../database/models/logs');
class ProxyScheduler {
constructor() {
this.scraper = new ProxyScraper();
this.validator = new ProxyValidator();
this.tasks = new Map();
}
async log(message) {
const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
console.log(`[${timestamp}] ${message}`);
}
// 抓取任务 - 每小时执行一次
async runScrapeTask() {
let historyId = null;
try {
await this.log('开始执行定时抓取任务');
// 检查是否有抓取任务正在进行
if (this.scraper.constructor.isScrapingInProgress()) {
await this.log('⚠️ 有抓取任务正在进行,跳过本次定时抓取任务');
return {
scraped: 0,
total: 0,
valid: 0,
skipped: true,
message: '有抓取任务正在进行,跳过本次执行'
};
}
// 创建执行历史记录
historyId = await HistoryModel.create({
task_type: 'scrape',
task_name: '定时抓取任务',
status: 'running',
start_time: new Date().toISOString()
});
// 抓取代理
const scrapeResult = await this.scraper.scrape();
// 更新历史记录
const duration = Date.now() - new Date(historyId ? await this.getHistoryStartTime(historyId) : Date.now());
await HistoryModel.update(historyId, {
status: 'success',
end_time: new Date().toISOString(),
duration: duration,
result_summary: `抓取 ${scrapeResult.scraped} 个代理,可用 ${scrapeResult.valid}`,
details: scrapeResult
});
if (scrapeResult.skipped) {
await this.log(`定时抓取任务跳过: ${scrapeResult.message}`);
// 更新历史记录
if (historyId) {
await HistoryModel.update(historyId, {
status: 'skipped',
end_time: new Date().toISOString(),
result_summary: scrapeResult.message,
details: scrapeResult
});
}
} else {
await this.log(`定时抓取任务完成: 抓取 ${scrapeResult.scraped} 个,总数 ${scrapeResult.total},可用 ${scrapeResult.valid}`);
// 获取当前统计信息
const stats = await this.validator.getProxyStatistics();
await this.log(`当前代理状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
// 记录系统日志
await LogsModel.logInfo(
`定时抓取任务完成: 抓取${scrapeResult.scraped}个代理,可用${scrapeResult.valid}`,
'scheduler',
scrapeResult,
'automated_task'
);
}
return scrapeResult;
} catch (error) {
const errorMessage = `定时抓取任务失败: ${error.message}`;
await this.log(errorMessage);
// 更新历史记录
if (historyId) {
const duration = Date.now() - new Date(await this.getHistoryStartTime(historyId));
await HistoryModel.update(historyId, {
status: 'failed',
end_time: new Date().toISOString(),
duration: duration,
error_message: error.message
});
}
// 记录错误日志
await LogsModel.logError(
errorMessage,
'scheduler',
{ error: error.message, stack: error.stack },
'automated_task'
);
throw error;
}
}
// 验证任务 - 每10分钟执行一次
async runValidationTask() {
let historyId = null;
try {
await this.log('开始执行定时验证任务');
// 创建执行历史记录
historyId = await HistoryModel.create({
task_type: 'validation',
task_name: '定时验证任务',
status: 'running',
start_time: new Date().toISOString()
});
// 验证数据库中的代理每次验证50个
const validationResult = await this.validator.validateDatabaseProxies(50);
// 更新历史记录
const duration = Date.now() - new Date(historyId ? await this.getHistoryStartTime(historyId) : Date.now());
await HistoryModel.update(historyId, {
status: 'success',
end_time: new Date().toISOString(),
duration: duration,
result_summary: `验证 ${validationResult.validated} 个代理,有效 ${validationResult.valid}`,
details: validationResult
});
await this.log(`定时验证任务完成: 验证 ${validationResult.validated} 个,有效 ${validationResult.valid},无效 ${validationResult.invalid}`);
// 清理无效代理
if (validationResult.invalid > 0) {
await this.log(`已清理 ${validationResult.invalid} 个无效代理`);
}
// 获取当前统计信息
const stats = await this.validator.getProxyStatistics();
await this.log(`当前代理状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
// 记录系统日志
await LogsModel.logInfo(
`定时验证任务完成: 验证${validationResult.validated}个代理,有效${validationResult.valid}`,
'scheduler',
validationResult,
'automated_task'
);
return validationResult;
} catch (error) {
const errorMessage = `定时验证任务失败: ${error.message}`;
await this.log(errorMessage);
// 更新历史记录
if (historyId) {
const duration = Date.now() - new Date(await this.getHistoryStartTime(historyId));
await HistoryModel.update(historyId, {
status: 'failed',
end_time: new Date().toISOString(),
duration: duration,
error_message: error.message
});
}
// 记录错误日志
await LogsModel.logError(
errorMessage,
'scheduler',
{ error: error.message, stack: error.stack },
'automated_task'
);
throw error;
}
}
// 健康检查任务 - 每小时执行一次
async runHealthCheckTask() {
try {
await this.log('开始执行健康检查任务');
const stats = await this.validator.getProxyStatistics();
await this.log(`代理池健康状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
// 如果可用代理太少,触发紧急抓取
if (stats.valid < 10) {
await this.log('可用代理数量过少,触发紧急抓取任务');
await this.runScrapeTask();
}
return stats;
} catch (error) {
await this.log(`健康检查任务失败: ${error.message}`);
throw error;
}
}
// 启动所有定时任务
start() {
this.log('启动代理服务定时任务调度器');
// 每小时的第0分钟执行抓取任务异步执行不阻塞其他任务
const scrapeTask = cron.schedule('0 * * * *', async () => {
try {
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
setImmediate(async () => {
try {
await this.runScrapeTask();
} catch (error) {
this.log(`定时抓取任务异常: ${error.message}`);
}
});
} catch (error) {
this.log(`定时抓取任务启动异常: ${error.message}`);
}
}, {
scheduled: false,
timezone: 'Asia/Shanghai'
});
// 每10分钟执行验证任务异步执行不阻塞其他任务
const validationTask = cron.schedule('*/10 * * * *', async () => {
try {
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
setImmediate(async () => {
try {
await this.runValidationTask();
} catch (error) {
this.log(`定时验证任务异常: ${error.message}`);
}
});
} catch (error) {
this.log(`定时验证任务启动异常: ${error.message}`);
}
}, {
scheduled: false,
timezone: 'Asia/Shanghai'
});
// 每小时的第30分钟执行健康检查异步执行不阻塞其他任务
const healthCheckTask = cron.schedule('30 * * * *', async () => {
try {
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
setImmediate(async () => {
try {
await this.runHealthCheckTask();
} catch (error) {
this.log(`健康检查任务异常: ${error.message}`);
}
});
} catch (error) {
this.log(`健康检查任务启动异常: ${error.message}`);
}
}, {
scheduled: false,
timezone: 'Asia/Shanghai'
});
// 保存任务引用
this.tasks.set('scrape', scrapeTask);
this.tasks.set('validation', validationTask);
this.tasks.set('healthCheck', healthCheckTask);
// 启动所有任务
this.tasks.forEach((task, name) => {
task.start();
this.log(`定时任务已启动: ${name}`);
});
this.log('所有定时任务已启动');
this.printNextRunTimes();
}
// 停止所有定时任务
stop() {
this.log('停止所有定时任务');
this.tasks.forEach((task, name) => {
task.stop();
this.log(`定时任务已停止: ${name}`);
});
this.tasks.clear();
this.log('定时任务调度器已停止');
}
// 手动执行任务
async runTask(taskName) {
this.log(`手动执行任务: ${taskName}`);
switch (taskName) {
case 'scrape':
return await this.runScrapeTask();
case 'validation':
return await this.runValidationTask();
case 'healthCheck':
return await this.runHealthCheckTask();
default:
throw new Error(`未知的任务名称: ${taskName}`);
}
}
// 获取任务状态
getStatus() {
const status = {};
this.tasks.forEach((task, name) => {
status[name] = {
running: task.running || false,
scheduled: task.scheduled || false
};
});
return {
tasks: status,
taskCount: this.tasks.size
};
}
// 打印下次执行时间
printNextRunTimes() {
const now = new Date();
this.log('定时任务执行计划:');
this.log(`- 抓取任务: 每小时整点执行 (下次: ${this.getNextHourTime(now)})`);
this.log(`- 验证任务: 每10分钟执行 (下次: ${this.getNext10MinuteTime(now)})`);
this.log(`- 健康检查: 每小时30分执行 (下次: ${this.getNextHour30Time(now)})`);
}
getNextHourTime(now) {
const next = new Date(now);
next.setHours(next.getHours() + 1, 0, 0, 0);
return next.toTimeString().slice(0, 5);
}
getNext10MinuteTime(now) {
const next = new Date(now);
const minutes = next.getMinutes();
const nextMinute = Math.ceil((minutes + 1) / 10) * 10;
next.setMinutes(nextMinute, 0, 0);
return next.toTimeString().slice(0, 5);
}
getNextHour30Time(now) {
const next = new Date(now);
if (now.getMinutes() < 30) {
next.setHours(now.getHours(), 30, 0, 0);
} else {
next.setHours(now.getHours() + 1, 30, 0, 0);
}
return next.toTimeString().slice(0, 5);
}
// 获取历史记录开始时间
async getHistoryStartTime(historyId) {
try {
const history = await HistoryModel.findById(historyId);
return history ? new Date(history.start_time).getTime() : Date.now();
} catch (error) {
return Date.now();
}
}
// 获取系统统计信息
async getSystemStats() {
try {
const proxyStats = await this.validator.getProxyStatistics();
const taskStatus = this.getStatus();
return {
timestamp: new Date().toISOString(),
proxies: proxyStats,
scheduler: taskStatus,
uptime: process.uptime()
};
} catch (error) {
this.log(`获取系统统计信息失败: ${error.message}`);
throw error;
}
}
}
module.exports = ProxyScheduler;

596
src/services/scraper.js Normal file
View File

@@ -0,0 +1,596 @@
const axios = require('axios');
const cheerio = require('cheerio');
const ProxyModel = require('../database/models/proxy');
// 全局变量:标记是否有抓取任务正在进行
let isScrapingInProgress = false;
class ProxyScraper {
constructor() {
this.baseUrl = 'https://www.kuaidaili.com/free/inha';
this.userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
];
this.currentProxyIndex = 0;
this.localProxies = [];
}
getRandomUserAgent() {
return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// 获取本地数据库中的有效代理
async loadLocalProxies() {
try {
console.log('正在加载本地代理数据库...');
this.localProxies = await ProxyModel.findAll(true);
this.currentProxyIndex = 0;
console.log(`成功加载 ${this.localProxies.length} 个本地代理`);
if (this.localProxies.length > 0) {
console.log('将使用本地代理进行抓取');
} else {
console.log('本地无可用代理,将使用直连');
}
} catch (error) {
console.error('加载本地代理失败:', error);
this.localProxies = [];
}
}
// 获取下一个本地代理(循环使用)
getNextLocalProxy() {
if (this.localProxies.length === 0) {
return null; // 没有本地代理返回null使用直连
}
const proxy = this.localProxies[this.currentProxyIndex];
this.currentProxyIndex = (this.currentProxyIndex + 1) % this.localProxies.length;
console.log(`使用本地代理: ${proxy.ip}:${proxy.port}`);
return {
host: proxy.ip,
port: proxy.port,
protocol: 'http'
};
}
// 测试代理是否可用(用于抓取)
async testProxyForScraping(proxyConfig) {
try {
const response = await axios.get('https://www.baidu.com', {
proxy: proxyConfig,
timeout: 10000,
headers: {
'User-Agent': this.getRandomUserAgent()
},
validateStatus: (status) => status === 200
});
return response.status === 200 && response.data.includes('百度');
} catch (error) {
return false;
}
}
// 获取可用的代理配置
async getWorkingProxy() {
if (this.localProxies.length === 0) {
return null; // 无本地代理,使用直连
}
// 尝试几个代理,找到可用的
for (let i = 0; i < Math.min(5, this.localProxies.length); i++) {
const proxyConfig = this.getNextLocalProxy();
if (await this.testProxyForScraping(proxyConfig)) {
console.log(`✓ 代理 ${proxyConfig.host}:${proxyConfig.port} 可用`);
return proxyConfig;
} else {
console.log(`✗ 代理 ${proxyConfig.host}:${proxyConfig.port} 不可用,尝试下一个`);
}
}
console.log('测试的本地代理都不可用,使用直连');
return null; // 所有测试的代理都不可用,使用直连
}
// 验证抓取到的代理
async validateScrapedProxies(proxies, concurrency = 20) {
if (!proxies || proxies.length === 0) {
return [];
}
console.log(`开始验证 ${proxies.length} 个抓取到的代理,并发数: ${concurrency}`);
const validProxies = [];
// 分批处理,避免过多并发连接
for (let i = 0; i < proxies.length; i += concurrency) {
const batch = proxies.slice(i, i + concurrency);
const batchPromises = batch.map(proxy => this.validateScrapedProxy(proxy));
try {
const batchResults = await Promise.all(batchPromises);
// 收集验证通过的代理
batchResults.forEach(result => {
if (result.isValid) {
validProxies.push({
...result.proxy,
is_valid: 1,
response_time: result.responseTime
});
console.log(`✓ 代理验证通过: ${result.proxy.ip}:${result.proxy.port} - ${result.responseTime}ms`);
} else {
console.log(`✗ 代理验证失败: ${result.proxy.ip}:${result.proxy.port} - ${result.error || '连接失败'}`);
}
});
// 批次间延迟,避免请求过于频繁
if (i + concurrency < proxies.length) {
await this.sleep(1000);
}
} catch (error) {
console.error(`批量验证第 ${Math.floor(i / concurrency) + 1} 批时发生错误:`, error);
}
}
console.log(`代理验证完成,${validProxies.length}/${proxies.length} 个代理验证通过`);
return validProxies;
}
// 验证单个抓取到的代理
async validateScrapedProxy(proxy, retryCount = 2) {
const testUrls = [
'https://www.baidu.com',
];
for (let attempt = 1; attempt <= retryCount; attempt++) {
for (const testUrl of testUrls) {
const startTime = Date.now();
try {
const proxyConfig = {
host: proxy.ip,
port: proxy.port,
protocol: 'http'
};
const response = await axios.get(testUrl, {
proxy: proxyConfig,
timeout: 10000, // 3秒超时
headers: {
'User-Agent': this.getRandomUserAgent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Connection': 'keep-alive'
},
validateStatus: (status) => status >= 200 && status < 300 // 接受2xx状态码
});
const responseTime = Date.now() - startTime;
let isValid = false;
// 检查响应内容
if (response.status === 200) {
if (testUrl.includes('baidu.com')) {
isValid = response.data.includes('百度');
} else if (testUrl.includes('httpbin.org')) {
isValid = response.data.includes('origin');
} else if (testUrl.includes('google.com')) {
isValid = response.data.includes('google');
} else {
isValid = true; // 对于其他URL只要能连接就认为有效
}
}
if (isValid) {
console.log(`✓ 代理验证通过: ${proxy.ip}:${proxy.port} - ${testUrl} - ${responseTime}ms`);
return {
proxy: proxy,
isValid: true,
responseTime: responseTime,
error: null,
testUrl: testUrl
};
}
} catch (error) {
const responseTime = Date.now() - startTime;
// 如果是最后一次尝试,返回失败
if (attempt === retryCount && testUrl === testUrls[testUrls.length - 1]) {
return {
proxy: proxy,
isValid: false,
responseTime: responseTime,
error: error.message,
testUrl: testUrl
};
}
// 否则继续尝试下一个URL或重试
await this.sleep(500);
}
}
}
// 所有尝试都失败
return {
proxy: proxy,
isValid: false,
responseTime: 0,
error: 'All validation attempts failed',
testUrl: null
};
}
async fetchPage(pageNum, retryCount = 3) {
const url = pageNum === 1 ? `${this.baseUrl}/` : `${this.baseUrl}/${pageNum}/`;
console.log(`正在抓取第 ${pageNum} 页: ${url}`);
for (let attempt = 1; attempt <= retryCount; attempt++) {
let proxyConfig = null;
let proxyUsed = '';
try {
const userAgent = this.getRandomUserAgent();
// 获取可用代理配置(每次请求都尝试不同的代理)
proxyConfig = await this.getWorkingProxy();
const requestConfig = {
headers: {
'User-Agent': userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Cache-Control': 'max-age=0',
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Referer': 'https://www.kuaidaili.com/free/',
'Origin': 'https://www.kuaidaili.com'
},
timeout: 15000,
validateStatus: (status) => status === 200,
maxRedirects: 3
};
// 如果有可用代理,添加到请求配置中
if (proxyConfig) {
requestConfig.proxy = proxyConfig;
proxyUsed = `代理 ${proxyConfig.host}:${proxyConfig.port}`;
console.log(`使用代理 ${proxyConfig.host}:${proxyConfig.port} 抓取第 ${pageNum}`);
} else {
proxyUsed = '直连';
console.log(`使用直连抓取第 ${pageNum}`);
}
const response = await axios.get(url, requestConfig);
// 检查响应内容
const content = response.data;
if (content.includes('访问过于频繁') || content.includes('请稍后再试')) {
console.log(`${pageNum} 页被限制访问,延长等待时间`);
if (attempt < retryCount) {
await this.sleep(5000 * attempt);
continue;
}
}
console.log(`${pageNum} 页获取成功,内容长度: ${content.length}`);
return content;
} catch (error) {
console.error(`${attempt} 次尝试抓取第 ${pageNum} 页失败 (${proxyUsed}):`, error.message);
if (attempt === retryCount) {
throw new Error(`抓取第 ${pageNum} 页失败,已重试 ${retryCount} 次: ${error.message}`);
}
// 递增等待时间
await this.sleep(3000 * attempt);
}
}
}
extractProxiesFromHtml(html) {
try {
const $ = cheerio.load(html);
const proxies = [];
console.log('开始解析HTML页面标题:', $('title').text());
// 方法1: 尝试从JavaScript变量中提取
const scripts = $('script').map((i, elem) => $(elem).html()).get();
console.log(`找到 ${scripts.length} 个script标签`);
for (let i = 0; i < scripts.length; i++) {
const script = scripts[i];
// 尝试多种匹配模式 - 优先使用 const 模式
const patterns = [
/const fpsList = (\[[\s\S]*?\]);/,
/var fpsList = (\[[\s\S]*?\]);/,
/let fpsList = (\[[\s\S]*?\]);/,
/fpsList = (\[[\s\S]*?\]);/,
/var\s+fpsList\s*=\s*(\[[\s\S]*?\]);/,
/const\s+fpsList\s*=\s*(\[[\s\S]*?\]);/,
/let\s+fpsList\s*=\s*(\[[\s\S]*?\]);/
];
for (const pattern of patterns) {
const match = script.match(pattern);
if (match) {
try {
console.log('找到JavaScript变量尝试解析...');
const fpsList = eval(match[1]);
console.log(`从JavaScript变量中提取到 ${fpsList.length} 个代理`);
if (Array.isArray(fpsList) && fpsList.length > 0) {
return fpsList.map(item => ({
ip: item.ip,
port: parseInt(item.port), // 确保端口是数字
location: item.location || null,
speed: item.speed ? parseInt(item.speed) : null,
last_check_time: item.last_check_time || new Date().toISOString().slice(0, 19).replace('T', ' ')
}));
}
} catch (e) {
console.warn('解析JavaScript变量失败:', e.message);
}
}
}
}
// 方法2: 尝试从表格中提取 - 使用多种选择器
const tableSelectors = [
'table tbody tr',
'.table tbody tr',
'#list table tbody tr',
'.list table tbody tr',
'table tr',
'.proxy-table tbody tr'
];
for (const selector of tableSelectors) {
const rows = $(selector);
console.log(`尝试选择器 "${selector}": 找到 ${rows.length}`);
if (rows.length > 0) {
rows.each((i, row) => {
try {
const cols = $(row).find('td');
// 尝试多种列结构
let ip, port, location, speed;
if (cols.length >= 2) {
// 标准格式IP、端口、位置、匿名度、类型、位置、速度、最后验证时间
ip = $(cols[0]).text().trim();
port = $(cols[1]).text().trim();
if (cols.length >= 5) {
location = $(cols[4]).text().trim();
}
if (cols.length >= 6) {
speed = $(cols[5]).text().trim();
}
// 清理和验证数据
ip = ip.replace(/[^\d\.]/g, '');
port = port.replace(/[^\d]/g, '');
if (ip && port && /^\d+\.\d+\.\d+\.\d+$/.test(ip) && /^\d+$/.test(port)) {
const portNum = parseInt(port);
if (portNum > 0 && portNum <= 65535) {
proxies.push({
ip: ip,
port: portNum,
location: location || null,
speed: speed ? parseInt(speed.replace(/[^\d]/g, '')) : null,
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
});
}
}
}
} catch (e) {
console.warn(`解析第 ${i} 行失败:`, e.message);
}
});
if (proxies.length > 0) {
console.log(`从表格中提取到 ${proxies.length} 个代理`);
break;
}
}
}
// 方法3: 尝试从JSON数据中提取
const jsonPatterns = [
/"data":\s*(\[[\s\S]*?\])/,
/"proxies":\s*(\[[\s\S]*?\])/,
/window\.__INITIAL_STATE__\s*=\s*({[\s\S]*?});/
];
const pageContent = $.html();
for (const pattern of jsonPatterns) {
const match = pageContent.match(pattern);
if (match) {
try {
console.log('找到JSON数据尝试解析...');
const data = JSON.parse(match[1]);
// 处理JSON数据格式
if (Array.isArray(data)) {
const jsonProxies = data.map(item => ({
ip: item.ip || item.IP,
port: item.port || item.PORT,
location: item.location || item.area || null,
speed: item.speed || null,
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
})).filter(p => p.ip && p.port);
if (jsonProxies.length > 0) {
console.log(`从JSON数据中提取到 ${jsonProxies.length} 个代理`);
return jsonProxies;
}
}
} catch (e) {
console.warn('解析JSON数据失败:', e.message);
}
}
}
console.log(`所有提取方法都失败,最终提取到 ${proxies.length} 个代理`);
// 如果没有找到代理,输出页面内容用于调试
if (proxies.length === 0) {
console.log('页面内容片段:', pageContent.substring(0, 500) + '...');
}
return proxies;
} catch (error) {
console.error('提取代理数据失败:', error);
return [];
}
}
async scrapePage(pageNum) {
try {
const html = await this.fetchPage(pageNum);
const proxies = this.extractProxiesFromHtml(html);
if (proxies.length === 0) {
console.warn(`${pageNum} 页没有提取到代理数据`);
return [];
}
console.log(`${pageNum} 页处理完成,获取到 ${proxies.length} 个代理,开始验证...`);
// 立即验证当前页面的所有代理
const validatedProxies = await this.validateScrapedProxies(proxies);
let savedCount = 0;
for (const proxy of validatedProxies) {
try {
await ProxyModel.create(proxy);
savedCount++;
console.log(`✓ 保存验证通过的代理: ${proxy.ip}:${proxy.port}`);
} catch (error) {
// 忽略重复插入错误
if (!error.message.includes('UNIQUE constraint failed')) {
console.warn(`保存代理 ${proxy.ip}:${proxy.port} 失败:`, error.message);
}
}
}
console.log(`${pageNum} 页验证完成,验证通过 ${validatedProxies.length}/${proxies.length} 个代理,保存了 ${savedCount} 个新代理`);
return validatedProxies;
} catch (error) {
console.error(`处理第 ${pageNum} 页失败:`, error);
return [];
}
}
async scrapeMultiplePages(maxPages = 40) {
console.log(`开始抓取前 ${maxPages} 页代理IP`);
// 首先加载本地代理
await this.loadLocalProxies();
const allValidatedProxies = [];
for (let page = 1; page <= maxPages; page++) {
try {
const validatedProxies = await this.scrapePage(page);
allValidatedProxies.push(...validatedProxies);
// 页面间延迟,避免请求过于频繁
if (page < maxPages) {
await this.sleep(3000 + Math.random() * 2000);
}
} catch (error) {
console.error(`抓取第 ${page} 页失败,跳过此页:`, error);
}
}
const totalValidated = allValidatedProxies.length;
console.log(`抓取完成,共获取到 ${totalValidated} 个验证通过的代理IP`);
return totalValidated;
}
async scrape() {
console.log('开始执行代理IP抓取任务');
// 检查是否有抓取任务正在进行
if (isScrapingInProgress) {
console.log('⚠️ 有抓取任务正在进行,跳过本次抓取');
const totalCount = await ProxyModel.count();
const validCount = await ProxyModel.count(true);
return {
scraped: 0,
total: totalCount,
valid: validCount,
skipped: true
};
}
// 设置抓取进行中标志
isScrapingInProgress = true;
console.log('🚀 开始抓取,设置抓取进行中标志');
try {
const validatedCount = await this.scrapeMultiplePages(40);
// 获取数据库中的总代理数量
const totalCount = await ProxyModel.count();
const validCount = await ProxyModel.count(true);
console.log(`抓取任务完成。数据库中共有 ${totalCount} 个代理,其中 ${validCount} 个可用`);
return {
scraped: validatedCount,
total: totalCount,
valid: validCount,
skipped: false
};
} catch (error) {
console.error('代理抓取任务失败:', error);
throw error;
} finally {
// 无论成功还是失败,都要清除抓取进行中标志
isScrapingInProgress = false;
console.log('✅ 清除抓取进行中标志');
}
}
// 静态方法:检查是否有抓取任务正在进行
static isScrapingInProgress() {
return isScrapingInProgress;
}
// 静态方法:强制清除抓取标志(用于异常情况)
static clearScrapingFlag() {
isScrapingInProgress = false;
console.log('🔧 强制清除抓取进行中标志');
}
}
module.exports = ProxyScraper;

202
src/services/validator.js Normal file
View File

@@ -0,0 +1,202 @@
const axios = require('axios');
const ProxyModel = require('../database/models/proxy');
class ProxyValidator {
constructor() {
this.testUrl = 'https://www.baidu.com';
this.timeout = 10000; // 3秒超时
this.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
}
async validateProxy(ip, port) {
const startTime = Date.now();
const proxy = {
host: ip,
port: port,
protocol: 'http'
};
console.log(`正在验证代理 ${ip}:${port}`);
try {
const response = await axios.get(this.testUrl, {
proxy: proxy,
timeout: this.timeout,
headers: {
'User-Agent': this.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
},
validateStatus: (status) => status === 200
});
const responseTime = Date.now() - startTime;
const isValid = response.status === 200 && response.data.includes('百度');
if (isValid) {
console.log(`✓ 代理 ${ip}:${port} 验证成功,响应时间: ${responseTime}ms`);
} else {
console.log(`✗ 代理 ${ip}:${port} 验证失败,响应不正确`);
}
// 更新数据库中的验证结果
await ProxyModel.updateValidity(ip, port, isValid ? 1 : 0, responseTime);
return {
ip: ip,
port: port,
isValid: isValid,
responseTime: responseTime,
error: null
};
} catch (error) {
const responseTime = Date.now() - startTime;
console.log(`✗ 代理 ${ip}:${port} 验证失败:`, error.message);
// 更新数据库中的验证结果
await ProxyModel.updateValidity(ip, port, 0, responseTime);
return {
ip: ip,
port: port,
isValid: false,
responseTime: responseTime,
error: error.message
};
}
}
async validateSingleProxy(ip, port) {
try {
const result = await this.validateProxy(ip, port);
return result;
} catch (error) {
console.error(`验证代理 ${ip}:${port} 时发生错误:`, error);
return {
ip: ip,
port: port,
isValid: false,
responseTime: null,
error: error.message
};
}
}
async validateMultipleProxies(proxies, concurrency = 20) {
console.log(`开始批量验证 ${proxies.length} 个代理,并发数: ${concurrency}`);
const results = [];
// 分批处理,避免过多并发连接
for (let i = 0; i < proxies.length; i += concurrency) {
const batch = proxies.slice(i, i + concurrency);
const batchPromises = batch.map(proxy =>
this.validateProxy(proxy.ip, proxy.port)
);
try {
const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);
} catch (error) {
console.error(`批量验证第 ${Math.floor(i / concurrency) + 1} 批时发生错误:`, error);
}
// 批次间延迟
if (i + concurrency < proxies.length) {
await this.sleep(1000);
}
}
const validCount = results.filter(r => r.isValid).length;
console.log(`批量验证完成,${validCount}/${results.length} 个代理可用`);
return results;
}
async validateDatabaseProxies(limit = 50) {
console.log(`开始验证数据库中的代理IP限制数量: ${limit}`);
try {
// 获取待验证的代理(优先验证最久未验证的)
const proxies = await ProxyModel.findAllForValidation(limit);
if (proxies.length === 0) {
console.log('没有需要验证的代理');
return { validated: 0, valid: 0, invalid: 0 };
}
console.log(`找到 ${proxies.length} 个待验证的代理`);
const results = await this.validateMultipleProxies(proxies, 10);
// 清理无效代理
const invalidCount = results.filter(r => !r.isValid).length;
if (invalidCount > 0) {
await ProxyModel.deleteInvalid();
console.log(`已删除 ${invalidCount} 个无效代理`);
}
const validCount = results.filter(r => r.isValid).length;
return {
validated: results.length,
valid: validCount,
invalid: invalidCount,
results: results
};
} catch (error) {
console.error('验证数据库代理失败:', error);
throw error;
}
}
async getProxyStatistics() {
try {
const totalCount = await ProxyModel.count(false);
const validCount = await ProxyModel.count(true);
const invalidCount = totalCount - validCount;
return {
total: totalCount,
valid: validCount,
invalid: invalidCount,
validRate: totalCount > 0 ? ((validCount / totalCount) * 100).toFixed(2) + '%' : '0%'
};
} catch (error) {
console.error('获取代理统计信息失败:', error);
return {
total: 0,
valid: 0,
invalid: 0,
validRate: '0%'
};
}
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// 验证新抓取的代理
async validateNewProxies(proxies) {
if (!proxies || proxies.length === 0) {
return { validated: 0, valid: 0, invalid: 0 };
}
console.log(`开始验证新抓取的 ${proxies.length} 个代理`);
const results = await this.validateMultipleProxies(proxies, 2);
const validCount = results.filter(r => r.isValid).length;
console.log(`新代理验证完成,${validCount}/${results.length} 个代理可用`);
return {
validated: results.length,
valid: validCount,
invalid: results.length - validCount,
results: results
};
}
}
module.exports = ProxyValidator;