init
This commit is contained in:
329
src/app.js
Normal file
329
src/app.js
Normal file
@@ -0,0 +1,329 @@
|
||||
const express = require('express');
|
||||
const path = require('path');
|
||||
const Database = require('./database/db');
|
||||
const ProxyModel = require('./database/models/proxy');
|
||||
const HistoryModel = require('./database/models/history');
|
||||
const LogsModel = require('./database/models/logs');
|
||||
const ProxyScheduler = require('./services/scheduler');
|
||||
const proxyRoutes = require('./routes/proxies');
|
||||
const dashboardRoutes = require('./routes/dashboard');
|
||||
const historyRoutes = require('./routes/history');
|
||||
|
||||
class ProxyApp {
|
||||
constructor() {
|
||||
this.app = express();
|
||||
this.port = process.env.PORT || 3000;
|
||||
this.scheduler = new ProxyScheduler();
|
||||
this.isShuttingDown = false;
|
||||
}
|
||||
|
||||
setupMiddleware() {
|
||||
// 静态文件服务
|
||||
this.app.use(express.static(path.join(__dirname, '../public')));
|
||||
|
||||
// JSON 解析中间件
|
||||
this.app.use(express.json({ limit: '10mb' }));
|
||||
this.app.use(express.urlencoded({ extended: true }));
|
||||
|
||||
// CORS 支持
|
||||
this.app.use((req, res, next) => {
|
||||
res.header('Access-Control-Allow-Origin', '*');
|
||||
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
|
||||
res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
|
||||
|
||||
if (req.method === 'OPTIONS') {
|
||||
res.sendStatus(200);
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
});
|
||||
|
||||
// 请求日志中间件
|
||||
this.app.use((req, res, next) => {
|
||||
const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
|
||||
console.log(`[${timestamp}] ${req.method} ${req.url}`);
|
||||
next();
|
||||
});
|
||||
}
|
||||
|
||||
setupRoutes() {
|
||||
// 健康检查端点
|
||||
this.app.get('/api/health', async (req, res) => {
|
||||
try {
|
||||
const stats = await this.scheduler.getSystemStats();
|
||||
res.json({
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
system: stats
|
||||
});
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
status: 'unhealthy',
|
||||
error: error.message,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 根路径 - 重定向到仪表板
|
||||
this.app.get('/', (req, res) => {
|
||||
res.redirect('/index.html');
|
||||
});
|
||||
|
||||
// API信息端点
|
||||
this.app.get('/api/info', (req, res) => {
|
||||
res.json({
|
||||
name: 'Proxy IP Service',
|
||||
version: '1.0.0',
|
||||
description: '代理IP抓取、验证和管理服务',
|
||||
web_interface: {
|
||||
dashboard: '/index.html',
|
||||
proxies: '/proxies.html',
|
||||
history: '/history.html',
|
||||
monitoring: '/monitoring.html'
|
||||
},
|
||||
endpoints: {
|
||||
'GET /api/health': '健康检查',
|
||||
'GET /api/info': 'API信息',
|
||||
'GET /api/dashboard/stats': '仪表板统计数据',
|
||||
'GET /api/dashboard/status': '实时系统状态',
|
||||
'POST /api/dashboard/actions/scrape': '手动抓取',
|
||||
'POST /api/dashboard/actions/validate': '手动验证',
|
||||
'GET /api/proxies': '获取代理列表',
|
||||
'GET /api/proxies/random': '获取随机代理',
|
||||
'GET /api/proxies/stats': '获取统计信息',
|
||||
'POST /api/proxies/verify': '验证单个代理',
|
||||
'POST /api/proxies/verify-batch': '批量验证代理',
|
||||
'POST /api/proxies/scrape': '手动触发抓取',
|
||||
'POST /api/proxies/validate-all': '手动触发验证',
|
||||
'GET /api/proxies/search': '搜索代理',
|
||||
'DELETE /api/proxies/cleanup': '清理无效代理',
|
||||
'GET /api/history': '执行历史',
|
||||
'GET /api/history/logs/system': '系统日志',
|
||||
'GET /api/history/stats': '历史统计'
|
||||
},
|
||||
scheduler: {
|
||||
scrape: '每小时整点执行',
|
||||
validation: '每10分钟执行',
|
||||
healthCheck: '每小时30分执行'
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// API 路由
|
||||
this.app.use('/api/proxies', proxyRoutes);
|
||||
this.app.use('/api/dashboard', dashboardRoutes);
|
||||
this.app.use('/api/history', historyRoutes);
|
||||
|
||||
// 404 处理 - API端点
|
||||
this.app.use('/api', (req, res, next) => {
|
||||
// 检查是否已处理的路由
|
||||
if (!req.route) {
|
||||
res.status(404).json({
|
||||
error: 'API接口不存在',
|
||||
message: `路径 ${req.originalUrl} 未找到`,
|
||||
available_endpoints: [
|
||||
'GET /api/health',
|
||||
'GET /api/info',
|
||||
'GET /api/dashboard/stats',
|
||||
'GET /api/dashboard/status',
|
||||
'POST /api/dashboard/actions/scrape',
|
||||
'POST /api/dashboard/actions/validate',
|
||||
'GET /api/proxies',
|
||||
'GET /api/proxies/random',
|
||||
'GET /api/proxies/stats',
|
||||
'POST /api/proxies/verify',
|
||||
'POST /api/proxies/verify-batch',
|
||||
'GET /api/history',
|
||||
'GET /api/history/logs/system',
|
||||
'GET /api/history/stats'
|
||||
]
|
||||
});
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
});
|
||||
|
||||
// 404 处理 - Web页面
|
||||
this.app.use((req, res) => {
|
||||
res.status(404).send(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>页面未找到 - 代理IP管理系统</title>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
</head>
|
||||
<body class="bg-light">
|
||||
<div class="container mt-5">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<div class="card">
|
||||
<div class="card-body text-center">
|
||||
<h1 class="text-danger mb-4">404</h1>
|
||||
<h3 class="mb-3">页面未找到</h3>
|
||||
<p class="text-muted mb-4">您访问的页面 ${req.originalUrl} 不存在</p>
|
||||
<a href="/index.html" class="btn btn-primary">返回首页</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`);
|
||||
});
|
||||
|
||||
// 错误处理中间件
|
||||
this.app.use((err, req, res, next) => {
|
||||
console.error('未处理的错误:', err);
|
||||
res.status(500).json({
|
||||
error: '服务器内部错误',
|
||||
message: err.message,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async initializeDatabase() {
|
||||
try {
|
||||
console.log('初始化数据库连接...');
|
||||
await Database.connect();
|
||||
console.log('数据库连接成功');
|
||||
|
||||
console.log('创建数据表...');
|
||||
await ProxyModel.initTable();
|
||||
await HistoryModel.initTable();
|
||||
await LogsModel.initTable();
|
||||
console.log('数据表初始化完成');
|
||||
|
||||
// 获取初始统计信息
|
||||
const totalCount = await ProxyModel.count();
|
||||
const validCount = await ProxyModel.count(true);
|
||||
console.log(`数据库初始化完成,共 ${totalCount} 个代理,其中 ${validCount} 个可用`);
|
||||
|
||||
// 记录系统启动日志
|
||||
await LogsModel.logInfo('系统启动成功', 'system', {
|
||||
total_proxies: totalCount,
|
||||
valid_proxies: validCount,
|
||||
node_version: process.version,
|
||||
platform: process.platform
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('数据库初始化失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
setupGracefulShutdown() {
|
||||
const gracefulShutdown = async (signal) => {
|
||||
if (this.isShuttingDown) {
|
||||
console.log('正在关闭中,忽略信号:', signal);
|
||||
return;
|
||||
}
|
||||
|
||||
this.isShuttingDown = true;
|
||||
console.log(`收到 ${signal} 信号,开始优雅关闭...`);
|
||||
|
||||
// 停止定时任务
|
||||
console.log('停止定时任务...');
|
||||
this.scheduler.stop();
|
||||
|
||||
// 记录系统关闭日志
|
||||
try {
|
||||
await LogsModel.logInfo('系统正常关闭', 'system', {
|
||||
signal: signal,
|
||||
uptime: process.uptime()
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('记录关闭日志失败:', error);
|
||||
}
|
||||
|
||||
// 关闭数据库连接
|
||||
console.log('关闭数据库连接...');
|
||||
try {
|
||||
await Database.close();
|
||||
} catch (error) {
|
||||
console.error('关闭数据库连接失败:', error);
|
||||
}
|
||||
|
||||
// 关闭HTTP服务器
|
||||
console.log('关闭HTTP服务器...');
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
// 监听关闭信号
|
||||
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
|
||||
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
|
||||
|
||||
// 监听未捕获的异常
|
||||
process.on('uncaughtException', (error) => {
|
||||
console.error('未捕获的异常:', error);
|
||||
gracefulShutdown('uncaughtException');
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
console.error('未处理的Promise拒绝:', reason);
|
||||
console.error('Promise:', promise);
|
||||
});
|
||||
}
|
||||
|
||||
async start() {
|
||||
try {
|
||||
console.log('=== 代理IP服务启动中 ===');
|
||||
console.log('服务端口:', this.port);
|
||||
console.log('启动时间:', new Date().toISOString());
|
||||
|
||||
// 初始化数据库
|
||||
await this.initializeDatabase();
|
||||
|
||||
// 设置中间件和路由
|
||||
this.setupMiddleware();
|
||||
this.setupRoutes();
|
||||
|
||||
// 设置优雅关闭
|
||||
this.setupGracefulShutdown();
|
||||
|
||||
// 启动定时任务
|
||||
console.log('启动定时任务调度器...');
|
||||
this.scheduler.start();
|
||||
|
||||
// 启动HTTP服务器
|
||||
this.server = this.app.listen(this.port, () => {
|
||||
console.log('=== 代理IP服务启动成功 ===');
|
||||
console.log('服务地址: http://localhost:' + this.port);
|
||||
console.log('Web管理界面: http://localhost:' + this.port);
|
||||
console.log(' - 仪表板: http://localhost:' + this.port + '/index.html');
|
||||
console.log(' - 代理管理: http://localhost:' + this.port + '/proxies.html');
|
||||
console.log(' - 执行历史: http://localhost:' + this.port + '/history.html');
|
||||
console.log(' - 系统监控: http://localhost:' + this.port + '/monitoring.html');
|
||||
console.log('API文档: http://localhost:' + this.port + '/api/info');
|
||||
console.log('健康检查: http://localhost:' + this.port + '/api/health');
|
||||
console.log('定时任务已自动启动');
|
||||
});
|
||||
|
||||
// 设置服务器超时
|
||||
this.server.timeout = 30000; // 30秒超时
|
||||
|
||||
} catch (error) {
|
||||
console.error('服务启动失败:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
async stop() {
|
||||
if (this.server) {
|
||||
this.server.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 创建并启动应用
|
||||
const app = new ProxyApp();
|
||||
app.start().catch(error => {
|
||||
console.error('应用启动失败:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
module.exports = ProxyApp;
|
||||
82
src/database/db.js
Normal file
82
src/database/db.js
Normal file
@@ -0,0 +1,82 @@
|
||||
const sqlite3 = require('sqlite3').verbose();
|
||||
const path = require('path');
|
||||
const config = require('../../config/database');
|
||||
|
||||
class Database {
|
||||
constructor() {
|
||||
this.db = null;
|
||||
}
|
||||
|
||||
connect() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db = new sqlite3.Database(
|
||||
config.development.storage,
|
||||
sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE,
|
||||
(err) => {
|
||||
if (err) {
|
||||
console.error('数据库连接失败:', err.message);
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('已连接到 SQLite 数据库');
|
||||
resolve();
|
||||
}
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
close() {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (this.db) {
|
||||
this.db.close((err) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('数据库连接已关闭');
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
run(sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.run(sql, params, function(err) {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve({ id: this.lastID, changes: this.changes });
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
get(sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(sql, params, (err, row) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve(row);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
all(sql, params = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.all(sql, params, (err, rows) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve(rows);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new Database();
|
||||
260
src/database/models/history.js
Normal file
260
src/database/models/history.js
Normal file
@@ -0,0 +1,260 @@
|
||||
const db = require('../db');
|
||||
|
||||
class HistoryModel {
|
||||
static async initTable() {
|
||||
const sql = `
|
||||
CREATE TABLE IF NOT EXISTS execution_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_type TEXT NOT NULL,
|
||||
task_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
start_time DATETIME NOT NULL,
|
||||
end_time DATETIME,
|
||||
duration INTEGER,
|
||||
result_summary TEXT,
|
||||
error_message TEXT,
|
||||
details TEXT,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
`;
|
||||
|
||||
try {
|
||||
await db.run(sql);
|
||||
console.log('执行历史表创建成功或已存在');
|
||||
} catch (error) {
|
||||
console.error('创建执行历史表失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async create(historyData) {
|
||||
const sql = `
|
||||
INSERT INTO execution_history
|
||||
(task_type, task_name, status, start_time, end_time, duration, result_summary, error_message, details)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
const params = [
|
||||
historyData.task_type,
|
||||
historyData.task_name,
|
||||
historyData.status || 'pending',
|
||||
historyData.start_time,
|
||||
historyData.end_time || null,
|
||||
historyData.duration || null,
|
||||
historyData.result_summary || null,
|
||||
historyData.error_message || null,
|
||||
historyData.details ? JSON.stringify(historyData.details) : null
|
||||
];
|
||||
|
||||
try {
|
||||
const result = await db.run(sql, params);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('创建执行历史记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async update(id, updateData) {
|
||||
const fields = [];
|
||||
const params = [];
|
||||
|
||||
Object.keys(updateData).forEach(key => {
|
||||
if (updateData[key] !== undefined) {
|
||||
fields.push(`${key} = ?`);
|
||||
if (key === 'details') {
|
||||
params.push(JSON.stringify(updateData[key]));
|
||||
} else {
|
||||
params.push(updateData[key]);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (fields.length === 0) {
|
||||
throw new Error('没有要更新的字段');
|
||||
}
|
||||
|
||||
const sql = `
|
||||
UPDATE execution_history
|
||||
SET ${fields.join(', ')}
|
||||
WHERE id = ?
|
||||
`;
|
||||
|
||||
params.push(id);
|
||||
|
||||
try {
|
||||
const result = await db.run(sql, params);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('更新执行历史记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findAll(taskType = null, status = null, limit = 50, offset = 0) {
|
||||
let sql = 'SELECT * FROM execution_history WHERE 1=1';
|
||||
const params = [];
|
||||
|
||||
if (taskType) {
|
||||
sql += ' AND task_type = ?';
|
||||
params.push(taskType);
|
||||
}
|
||||
|
||||
if (status) {
|
||||
sql += ' AND status = ?';
|
||||
params.push(status);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY start_time DESC LIMIT ? OFFSET ?';
|
||||
params.push(limit, offset);
|
||||
|
||||
try {
|
||||
const histories = await db.all(sql, params);
|
||||
|
||||
// 解析details字段
|
||||
return histories.map(history => ({
|
||||
...history,
|
||||
details: history.details ? JSON.parse(history.details) : null
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('查询执行历史失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findById(id) {
|
||||
const sql = 'SELECT * FROM execution_history WHERE id = ?';
|
||||
|
||||
try {
|
||||
const history = await db.get(sql, [id]);
|
||||
|
||||
if (history && history.details) {
|
||||
history.details = JSON.parse(history.details);
|
||||
}
|
||||
|
||||
return history;
|
||||
} catch (error) {
|
||||
console.error('根据ID查询执行历史失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getLatest(taskType, limit = 5) {
|
||||
const sql = 'SELECT * FROM execution_history WHERE task_type = ? ORDER BY start_time DESC LIMIT ?';
|
||||
|
||||
try {
|
||||
const histories = await db.all(sql, [taskType, limit]);
|
||||
|
||||
// 解析details字段
|
||||
return histories.map(history => ({
|
||||
...history,
|
||||
details: history.details ? JSON.parse(history.details) : null
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('查询最新执行历史失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getStatistics(taskType = null, days = 7) {
|
||||
let sql = `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN status = 'success' THEN 1 END) as success,
|
||||
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
|
||||
COUNT(CASE WHEN status = 'running' THEN 1 END) as running,
|
||||
AVG(duration) as avg_duration
|
||||
FROM execution_history
|
||||
WHERE start_time >= datetime('now', '-${days} days')
|
||||
`;
|
||||
|
||||
const params = [];
|
||||
|
||||
if (taskType) {
|
||||
sql += ' AND task_type = ?';
|
||||
params.push(taskType);
|
||||
}
|
||||
|
||||
try {
|
||||
const stats = await db.get(sql, params);
|
||||
return {
|
||||
total: stats.total || 0,
|
||||
success: stats.success || 0,
|
||||
failed: stats.failed || 0,
|
||||
running: stats.running || 0,
|
||||
success_rate: stats.total > 0 ? ((stats.success / stats.total) * 100).toFixed(2) : '0',
|
||||
avg_duration: stats.avg_duration ? Math.round(stats.avg_duration) : 0
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('查询执行历史统计失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getDailyStats(taskType = null, days = 7) {
|
||||
let sql = `
|
||||
SELECT
|
||||
DATE(start_time) as date,
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN status = 'success' THEN 1 END) as success,
|
||||
COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed,
|
||||
AVG(duration) as avg_duration
|
||||
FROM execution_history
|
||||
WHERE start_time >= datetime('now', '-${days} days')
|
||||
`;
|
||||
|
||||
const params = [];
|
||||
|
||||
if (taskType) {
|
||||
sql += ' AND task_type = ?';
|
||||
params.push(taskType);
|
||||
}
|
||||
|
||||
sql += ' GROUP BY DATE(start_time) ORDER BY date DESC';
|
||||
|
||||
try {
|
||||
const dailyStats = await db.all(sql, params);
|
||||
return dailyStats;
|
||||
} catch (error) {
|
||||
console.error('查询每日统计失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async deleteOldRecords(days = 30) {
|
||||
const sql = 'DELETE FROM execution_history WHERE start_time < datetime("now", "-' + days + ' days")';
|
||||
|
||||
try {
|
||||
const result = await db.run(sql);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('删除旧执行历史记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async count(taskType = null, status = null) {
|
||||
let sql = 'SELECT COUNT(*) as count FROM execution_history WHERE 1=1';
|
||||
const params = [];
|
||||
|
||||
if (taskType) {
|
||||
sql += ' AND task_type = ?';
|
||||
params.push(taskType);
|
||||
}
|
||||
|
||||
if (status) {
|
||||
sql += ' AND status = ?';
|
||||
params.push(status);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await db.get(sql, params);
|
||||
return result.count;
|
||||
} catch (error) {
|
||||
console.error('统计执行历史记录数量失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = HistoryModel;
|
||||
287
src/database/models/logs.js
Normal file
287
src/database/models/logs.js
Normal file
@@ -0,0 +1,287 @@
|
||||
const db = require('../db');
|
||||
|
||||
class LogsModel {
|
||||
static async initTable() {
|
||||
const sql = `
|
||||
CREATE TABLE IF NOT EXISTS system_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
level TEXT NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
category TEXT,
|
||||
details TEXT,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
source TEXT,
|
||||
user_agent TEXT,
|
||||
ip_address TEXT
|
||||
)
|
||||
`;
|
||||
|
||||
try {
|
||||
await db.run(sql);
|
||||
console.log('系统日志表创建成功或已存在');
|
||||
} catch (error) {
|
||||
console.error('创建系统日志表失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async create(logData) {
|
||||
const sql = `
|
||||
INSERT INTO system_logs
|
||||
(level, message, category, details, timestamp, source, user_agent, ip_address)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`;
|
||||
|
||||
const params = [
|
||||
logData.level,
|
||||
logData.message,
|
||||
logData.category || null,
|
||||
logData.details ? JSON.stringify(logData.details) : null,
|
||||
logData.timestamp || new Date().toISOString(),
|
||||
logData.source || 'system',
|
||||
logData.user_agent || null,
|
||||
logData.ip_address || null
|
||||
];
|
||||
|
||||
try {
|
||||
const result = await db.run(sql, params);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('创建系统日志记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async log(level, message, category = null, details = null, source = 'system') {
|
||||
return await this.create({
|
||||
level,
|
||||
message,
|
||||
category,
|
||||
details,
|
||||
source
|
||||
});
|
||||
}
|
||||
|
||||
static async logInfo(message, category = null, details = null, source = 'system') {
|
||||
return await this.log('info', message, category, details, source);
|
||||
}
|
||||
|
||||
static async logWarning(message, category = null, details = null, source = 'system') {
|
||||
return await this.log('warning', message, category, details, source);
|
||||
}
|
||||
|
||||
static async logError(message, category = null, details = null, source = 'system') {
|
||||
return await this.log('error', message, category, details, source);
|
||||
}
|
||||
|
||||
static async logDebug(message, category = null, details = null, source = 'system') {
|
||||
return await this.log('debug', message, category, details, source);
|
||||
}
|
||||
|
||||
static async findAll(level = null, category = null, limit = 100, offset = 0) {
|
||||
let sql = 'SELECT * FROM system_logs WHERE 1=1';
|
||||
const params = [];
|
||||
|
||||
if (level) {
|
||||
sql += ' AND level = ?';
|
||||
params.push(level);
|
||||
}
|
||||
|
||||
if (category) {
|
||||
sql += ' AND category = ?';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY timestamp DESC LIMIT ? OFFSET ?';
|
||||
params.push(limit, offset);
|
||||
|
||||
try {
|
||||
const logs = await db.all(sql, params);
|
||||
|
||||
// 解析details字段
|
||||
return logs.map(log => ({
|
||||
...log,
|
||||
details: log.details ? JSON.parse(log.details) : null
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('查询系统日志失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findById(id) {
|
||||
const sql = 'SELECT * FROM system_logs WHERE id = ?';
|
||||
|
||||
try {
|
||||
const log = await db.get(sql, [id]);
|
||||
|
||||
if (log && log.details) {
|
||||
log.details = JSON.parse(log.details);
|
||||
}
|
||||
|
||||
return log;
|
||||
} catch (error) {
|
||||
console.error('根据ID查询系统日志失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getLatest(level = null, category = null, limit = 20) {
|
||||
let sql = 'SELECT * FROM system_logs WHERE 1=1';
|
||||
const params = [];
|
||||
|
||||
if (level) {
|
||||
sql += ' AND level = ?';
|
||||
params.push(level);
|
||||
}
|
||||
|
||||
if (category) {
|
||||
sql += ' AND category = ?';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY timestamp DESC LIMIT ?';
|
||||
params.push(limit);
|
||||
|
||||
try {
|
||||
const logs = await db.all(sql, params);
|
||||
|
||||
// 解析details字段
|
||||
return logs.map(log => ({
|
||||
...log,
|
||||
details: log.details ? JSON.parse(log.details) : null
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('查询最新系统日志失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getStatistics(days = 7) {
|
||||
const sql = `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(CASE WHEN level = 'error' THEN 1 END) as error,
|
||||
COUNT(CASE WHEN level = 'warning' THEN 1 END) as warning,
|
||||
COUNT(CASE WHEN level = 'info' THEN 1 END) as info,
|
||||
COUNT(CASE WHEN level = 'debug' THEN 1 END) as debug
|
||||
FROM system_logs
|
||||
WHERE timestamp >= datetime('now', '-${days} days')
|
||||
`;
|
||||
|
||||
try {
|
||||
const stats = await db.get(sql);
|
||||
return {
|
||||
total: stats.total || 0,
|
||||
error: stats.error || 0,
|
||||
warning: stats.warning || 0,
|
||||
info: stats.info || 0,
|
||||
debug: stats.debug || 0
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('查询系统日志统计失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getHourlyStats(days = 1) {
|
||||
const sql = `
|
||||
SELECT
|
||||
strftime('%Y-%m-%d %H:00:00', timestamp) as hour,
|
||||
level,
|
||||
COUNT(*) as count
|
||||
FROM system_logs
|
||||
WHERE timestamp >= datetime('now', '-${days} days')
|
||||
GROUP BY hour, level
|
||||
ORDER BY hour DESC
|
||||
`;
|
||||
|
||||
try {
|
||||
const hourlyStats = await db.all(sql);
|
||||
return hourlyStats;
|
||||
} catch (error) {
|
||||
console.error('查询每小时统计失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async search(keyword, level = null, category = null, limit = 50) {
|
||||
let sql = 'SELECT * FROM system_logs WHERE message LIKE ?';
|
||||
const params = [`%${keyword}%`];
|
||||
|
||||
if (level) {
|
||||
sql += ' AND level = ?';
|
||||
params.push(level);
|
||||
}
|
||||
|
||||
if (category) {
|
||||
sql += ' AND category = ?';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY timestamp DESC LIMIT ?';
|
||||
params.push(limit);
|
||||
|
||||
try {
|
||||
const logs = await db.all(sql, params);
|
||||
|
||||
// 解析details字段
|
||||
return logs.map(log => ({
|
||||
...log,
|
||||
details: log.details ? JSON.parse(log.details) : null
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('搜索系统日志失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async deleteOldRecords(days = 30) {
|
||||
const sql = 'DELETE FROM system_logs WHERE timestamp < datetime("now", "-' + days + ' days")';
|
||||
|
||||
try {
|
||||
const result = await db.run(sql);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('删除旧系统日志记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async count(level = null, category = null) {
|
||||
let sql = 'SELECT COUNT(*) as count FROM system_logs WHERE 1=1';
|
||||
const params = [];
|
||||
|
||||
if (level) {
|
||||
sql += ' AND level = ?';
|
||||
params.push(level);
|
||||
}
|
||||
|
||||
if (category) {
|
||||
sql += ' AND category = ?';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await db.get(sql, params);
|
||||
return result.count;
|
||||
} catch (error) {
|
||||
console.error('统计系统日志记录数量失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async getCategories() {
|
||||
const sql = 'SELECT DISTINCT category FROM system_logs WHERE category IS NOT NULL ORDER BY category';
|
||||
|
||||
try {
|
||||
const categories = await db.all(sql);
|
||||
return categories.map(row => row.category);
|
||||
} catch (error) {
|
||||
console.error('获取日志分类失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = LogsModel;
|
||||
178
src/database/models/proxy.js
Normal file
178
src/database/models/proxy.js
Normal file
@@ -0,0 +1,178 @@
|
||||
const db = require('../db');
|
||||
|
||||
class ProxyModel {
|
||||
static async initTable() {
|
||||
const sql = `
|
||||
CREATE TABLE IF NOT EXISTS proxies (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ip TEXT NOT NULL,
|
||||
port INTEGER NOT NULL,
|
||||
location TEXT,
|
||||
speed INTEGER,
|
||||
last_check_time TEXT,
|
||||
is_valid INTEGER DEFAULT 1,
|
||||
response_time INTEGER,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(ip, port)
|
||||
)
|
||||
`;
|
||||
|
||||
try {
|
||||
await db.run(sql);
|
||||
console.log('代理表创建成功或已存在');
|
||||
} catch (error) {
|
||||
console.error('创建代理表失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async create(proxyData) {
|
||||
const sql = `
|
||||
INSERT OR REPLACE INTO proxies
|
||||
(ip, port, location, speed, last_check_time, is_valid, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
|
||||
`;
|
||||
|
||||
const params = [
|
||||
proxyData.ip,
|
||||
proxyData.port,
|
||||
proxyData.location || null,
|
||||
proxyData.speed || null,
|
||||
proxyData.last_check_time || null,
|
||||
proxyData.is_valid !== undefined ? proxyData.is_valid : 1
|
||||
];
|
||||
|
||||
try {
|
||||
const result = await db.run(sql, params);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('插入代理数据失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findAll(validOnly = true) {
|
||||
let sql = 'SELECT * FROM proxies';
|
||||
const params = [];
|
||||
|
||||
if (validOnly) {
|
||||
sql += ' WHERE is_valid = 1';
|
||||
}
|
||||
|
||||
sql += ' ORDER BY response_time ASC, created_at DESC';
|
||||
|
||||
try {
|
||||
const proxies = await db.all(sql, params);
|
||||
return proxies;
|
||||
} catch (error) {
|
||||
console.error('查询代理数据失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findRandom(validOnly = true, limit = 1) {
|
||||
let sql = 'SELECT * FROM proxies';
|
||||
const params = [];
|
||||
|
||||
if (validOnly) {
|
||||
sql += ' WHERE is_valid = 1';
|
||||
}
|
||||
|
||||
sql += ' ORDER BY RANDOM() LIMIT ?';
|
||||
params.push(limit);
|
||||
|
||||
try {
|
||||
const proxies = await db.all(sql, params);
|
||||
return limit === 1 ? proxies[0] : proxies;
|
||||
} catch (error) {
|
||||
console.error('查询随机代理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findByIpAndPort(ip, port) {
|
||||
const sql = 'SELECT * FROM proxies WHERE ip = ? AND port = ?';
|
||||
|
||||
try {
|
||||
const proxy = await db.get(sql, [ip, port]);
|
||||
return proxy;
|
||||
} catch (error) {
|
||||
console.error('根据IP和端口查询代理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async updateValidity(ip, port, isValid, responseTime = null) {
|
||||
const sql = `
|
||||
UPDATE proxies
|
||||
SET is_valid = ?, response_time = ?, updated_at = datetime('now')
|
||||
WHERE ip = ? AND port = ?
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = await db.run(sql, [isValid, responseTime, ip, port]);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('更新代理有效性失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async deleteInvalid() {
|
||||
const sql = 'DELETE FROM proxies WHERE is_valid = 0';
|
||||
|
||||
try {
|
||||
const result = await db.run(sql);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('删除无效代理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async count(validOnly = true) {
|
||||
let sql = 'SELECT COUNT(*) as count FROM proxies';
|
||||
|
||||
if (validOnly) {
|
||||
sql += ' WHERE is_valid = 1';
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await db.get(sql);
|
||||
return result.count;
|
||||
} catch (error) {
|
||||
console.error('统计代理数量失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async all(sql, params = []) {
|
||||
try {
|
||||
const proxies = await db.all(sql, params);
|
||||
return proxies;
|
||||
} catch (error) {
|
||||
console.error('执行SQL查询失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async findAllForValidation(limit = 50) {
|
||||
const sql = `
|
||||
SELECT * FROM proxies
|
||||
WHERE is_valid = 1
|
||||
ORDER BY updated_at ASC
|
||||
LIMIT ?
|
||||
`;
|
||||
|
||||
try {
|
||||
const proxies = await db.all(sql, [limit]);
|
||||
return proxies;
|
||||
} catch (error) {
|
||||
console.error('查询待验证代理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProxyModel;
|
||||
411
src/routes/dashboard.js
Normal file
411
src/routes/dashboard.js
Normal file
@@ -0,0 +1,411 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const ProxyModel = require('../database/models/proxy');
|
||||
const HistoryModel = require('../database/models/history');
|
||||
const LogsModel = require('../database/models/logs');
|
||||
const ProxyValidator = require('../services/validator');
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
const ProxyScheduler = require('../services/scheduler');
|
||||
|
||||
const validator = new ProxyValidator();
|
||||
const scraper = new ProxyScraper();
|
||||
|
||||
// 获取仪表板统计数据
|
||||
router.get('/stats', async (req, res) => {
|
||||
try {
|
||||
// 代理统计
|
||||
const proxyStats = await validator.getProxyStatistics();
|
||||
|
||||
// 执行历史统计
|
||||
const scrapeStats = await HistoryModel.getStatistics('scrape', 7);
|
||||
const validationStats = await HistoryModel.getStatistics('validation', 7);
|
||||
|
||||
// 系统日志统计
|
||||
const logStats = await LogsModel.getStatistics(7);
|
||||
|
||||
// 最近执行历史
|
||||
const latestScrape = await HistoryModel.getLatest('scrape', 3);
|
||||
const latestValidation = await HistoryModel.getLatest('validation', 3);
|
||||
|
||||
// 每日统计数据
|
||||
const dailyProxyStats = await getDailyProxyStats(7);
|
||||
const dailyTaskStats = await getDailyTaskStats(7);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
proxies: proxyStats,
|
||||
tasks: {
|
||||
scrape: scrapeStats,
|
||||
validation: validationStats
|
||||
},
|
||||
logs: logStats,
|
||||
latest: {
|
||||
scrape: latestScrape,
|
||||
validation: latestValidation
|
||||
},
|
||||
charts: {
|
||||
daily_proxies: dailyProxyStats,
|
||||
daily_tasks: dailyTaskStats
|
||||
},
|
||||
timestamp: new Date().toISOString()
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取仪表板统计数据失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取统计数据失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取实时系统状态
|
||||
router.get('/status', async (req, res) => {
|
||||
try {
|
||||
// 代理统计
|
||||
const proxyStats = await validator.getProxyStatistics();
|
||||
|
||||
// 当前时间
|
||||
const now = new Date();
|
||||
|
||||
// 今天的执行历史
|
||||
const todayScrape = await HistoryModel.getStatistics('scrape', 1);
|
||||
const todayValidation = await HistoryModel.getStatistics('validation', 1);
|
||||
|
||||
// 最近的日志
|
||||
const recentLogs = await LogsModel.getLatest(null, null, 5);
|
||||
|
||||
// 系统运行时间
|
||||
const uptime = process.uptime();
|
||||
|
||||
// 内存使用情况
|
||||
const memUsage = process.memoryUsage();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
timestamp: now.toISOString(),
|
||||
uptime: uptime,
|
||||
memory: {
|
||||
rss: Math.round(memUsage.rss / 1024 / 1024), // MB
|
||||
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024), // MB
|
||||
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024) // MB
|
||||
},
|
||||
proxies: proxyStats,
|
||||
today_tasks: {
|
||||
scrape: todayScrape,
|
||||
validation: todayValidation
|
||||
},
|
||||
recent_logs: recentLogs,
|
||||
next_runs: getNextRunTimes()
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取实时状态失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取状态失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取代理统计图表数据
|
||||
router.get('/charts/proxies', async (req, res) => {
|
||||
try {
|
||||
const days = parseInt(req.query.days) || 7;
|
||||
const chartData = await getDailyProxyStats(days);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: chartData
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取代理图表数据失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取图表数据失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取任务执行图表数据
|
||||
router.get('/charts/tasks', async (req, res) => {
|
||||
try {
|
||||
const days = parseInt(req.query.days) || 7;
|
||||
const chartData = await getDailyTaskStats(days);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: chartData
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取任务图表数据失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取图表数据失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取日志统计图表数据
|
||||
router.get('/charts/logs', async (req, res) => {
|
||||
try {
|
||||
const days = parseInt(req.query.days) || 7;
|
||||
const chartData = await LogsModel.getHourlyStats(days);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: chartData
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取日志图表数据失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取图表数据失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 快速操作接口
|
||||
router.post('/actions/scrape', async (req, res) => {
|
||||
try {
|
||||
// 创建调度器实例
|
||||
const scheduler = require('../services/scheduler');
|
||||
const ProxyScheduler = require('../services/scheduler');
|
||||
const schedulerInstance = new ProxyScheduler();
|
||||
|
||||
// 检查是否有抓取任务正在进行
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
if (ProxyScraper.isScrapingInProgress()) {
|
||||
return res.status(409).json({
|
||||
success: false,
|
||||
error: '有抓取任务正在进行',
|
||||
message: '请等待当前抓取任务完成后再试'
|
||||
});
|
||||
}
|
||||
|
||||
// 创建执行历史记录
|
||||
const historyId = await createTaskHistory('scrape', '手动触发抓取任务');
|
||||
|
||||
// 异步执行定时抓取任务(与定时任务相同的逻辑)
|
||||
schedulerInstance.runScrapeTask().then(async (result) => {
|
||||
if (result.skipped) {
|
||||
await updateTaskHistory(historyId, 'skipped', result, result.message);
|
||||
await LogsModel.logInfo(`手动抓取任务跳过: ${result.message}`, 'manual_action', result);
|
||||
} else {
|
||||
await updateTaskHistory(historyId, 'success', {
|
||||
scraped: result.scraped,
|
||||
total: result.total,
|
||||
valid: result.valid
|
||||
});
|
||||
await LogsModel.logInfo(`手动抓取任务完成: 抓取${result.scraped}个代理`, 'manual_action', result);
|
||||
}
|
||||
}).catch(async (error) => {
|
||||
await updateTaskHistory(historyId, 'failed', null, error.message);
|
||||
await LogsModel.logError(`手动抓取任务失败: ${error.message}`, 'manual_action', { error: error.message });
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: '已触发定时抓取任务',
|
||||
history_id: historyId
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('启动抓取任务失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '启动抓取任务失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/actions/validate', async (req, res) => {
|
||||
try {
|
||||
const { limit = 50 } = req.body;
|
||||
|
||||
// 创建执行历史记录
|
||||
const historyId = await createTaskHistory('validation', '手动验证任务');
|
||||
|
||||
// 异步执行验证任务
|
||||
validator.validateDatabaseProxies(limit).then(async (result) => {
|
||||
await updateTaskHistory(historyId, 'success', {
|
||||
validated: result.validated,
|
||||
valid: result.valid,
|
||||
invalid: result.invalid
|
||||
});
|
||||
await LogsModel.logInfo(`手动验证任务完成: 验证${result.validated}个代理`, 'manual_action', result);
|
||||
}).catch(async (error) => {
|
||||
await updateTaskHistory(historyId, 'failed', null, error.message);
|
||||
await LogsModel.logError(`手动验证任务失败: ${error.message}`, 'manual_action', { error: error.message });
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `验证任务已启动,将验证 ${limit} 个代理`,
|
||||
history_id: historyId
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('启动验证任务失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '启动验证任务失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 辅助函数
|
||||
|
||||
// 获取每日代理统计
|
||||
async function getDailyProxyStats(days) {
|
||||
const db = require('../database/db');
|
||||
|
||||
const sql = `
|
||||
SELECT
|
||||
DATE(created_at) as date,
|
||||
COUNT(*) as total_added,
|
||||
COUNT(CASE WHEN is_valid = 1 THEN 1 END) as valid_added
|
||||
FROM proxies
|
||||
WHERE created_at >= datetime('now', '-${days} days')
|
||||
GROUP BY DATE(created_at)
|
||||
ORDER BY date
|
||||
`;
|
||||
|
||||
try {
|
||||
const dailyStats = await db.all(sql);
|
||||
return dailyStats;
|
||||
} catch (error) {
|
||||
console.error('获取每日代理统计失败:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// 获取每日任务统计
|
||||
async function getDailyTaskStats(days) {
|
||||
try {
|
||||
const scrapeStats = await HistoryModel.getDailyStats('scrape', days);
|
||||
const validationStats = await HistoryModel.getDailyStats('validation', days);
|
||||
|
||||
// 合并数据
|
||||
const mergedStats = {};
|
||||
const dates = new Set();
|
||||
|
||||
scrapeStats.forEach(stat => {
|
||||
mergedStats[stat.date] = {
|
||||
date: stat.date,
|
||||
scrape_total: stat.total,
|
||||
scrape_success: stat.success,
|
||||
scrape_failed: stat.failed,
|
||||
validation_total: 0,
|
||||
validation_success: 0,
|
||||
validation_failed: 0
|
||||
};
|
||||
dates.add(stat.date);
|
||||
});
|
||||
|
||||
validationStats.forEach(stat => {
|
||||
if (mergedStats[stat.date]) {
|
||||
mergedStats[stat.date].validation_total = stat.total;
|
||||
mergedStats[stat.date].validation_success = stat.success;
|
||||
mergedStats[stat.date].validation_failed = stat.failed;
|
||||
} else {
|
||||
mergedStats[stat.date] = {
|
||||
date: stat.date,
|
||||
scrape_total: 0,
|
||||
scrape_success: 0,
|
||||
scrape_failed: 0,
|
||||
validation_total: stat.total,
|
||||
validation_success: stat.success,
|
||||
validation_failed: stat.failed
|
||||
};
|
||||
dates.add(stat.date);
|
||||
}
|
||||
});
|
||||
|
||||
return Object.values(mergedStats).sort((a, b) => new Date(a.date) - new Date(b.date));
|
||||
} catch (error) {
|
||||
console.error('获取每日任务统计失败:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// 获取下次执行时间
|
||||
function getNextRunTimes() {
|
||||
const now = new Date();
|
||||
const nextScrape = new Date(now);
|
||||
const nextValidation = new Date(now);
|
||||
const nextHealthCheck = new Date(now);
|
||||
|
||||
// 下次抓取时间(每小时整点)
|
||||
nextScrape.setHours(nextScrape.getHours() + 1, 0, 0, 0);
|
||||
|
||||
// 下次验证时间(每10分钟)
|
||||
const minutes = now.getMinutes();
|
||||
const nextMinute = Math.ceil((minutes + 1) / 10) * 10;
|
||||
nextValidation.setMinutes(nextMinute, 0, 0);
|
||||
|
||||
// 下次健康检查时间(每小时30分)
|
||||
if (now.getMinutes() < 30) {
|
||||
nextHealthCheck.setHours(now.getHours(), 30, 0, 0);
|
||||
} else {
|
||||
nextHealthCheck.setHours(now.getHours() + 1, 30, 0, 0);
|
||||
}
|
||||
|
||||
return {
|
||||
scrape: nextScrape.toISOString(),
|
||||
validation: nextValidation.toISOString(),
|
||||
healthCheck: nextHealthCheck.toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
// 创建任务历史记录
|
||||
async function createTaskHistory(taskType, taskName) {
|
||||
try {
|
||||
const result = await HistoryModel.create({
|
||||
task_type: taskType,
|
||||
task_name: taskName,
|
||||
status: 'running',
|
||||
start_time: new Date().toISOString()
|
||||
});
|
||||
|
||||
return result.id;
|
||||
} catch (error) {
|
||||
console.error('创建任务历史记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 更新任务历史记录
|
||||
async function updateTaskHistory(id, status, details = null, errorMessage = null) {
|
||||
try {
|
||||
const updateData = {
|
||||
status: status,
|
||||
end_time: new Date().toISOString(),
|
||||
duration: null,
|
||||
details: details,
|
||||
error_message: errorMessage
|
||||
};
|
||||
|
||||
// 计算执行时长
|
||||
const history = await HistoryModel.findById(id);
|
||||
if (history && history.start_time) {
|
||||
const startTime = new Date(history.start_time);
|
||||
const endTime = new Date();
|
||||
updateData.duration = endTime.getTime() - startTime.getTime();
|
||||
}
|
||||
|
||||
await HistoryModel.update(id, updateData);
|
||||
} catch (error) {
|
||||
console.error('更新任务历史记录失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = router;
|
||||
337
src/routes/history.js
Normal file
337
src/routes/history.js
Normal file
@@ -0,0 +1,337 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const HistoryModel = require('../database/models/history');
|
||||
const LogsModel = require('../database/models/logs');
|
||||
|
||||
// 获取执行历史列表
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
const taskType = req.query.taskType || null;
|
||||
const status = req.query.status || null;
|
||||
const limit = Math.min(parseInt(req.query.limit) || 50, 200);
|
||||
const offset = parseInt(req.query.offset) || 0;
|
||||
|
||||
const histories = await HistoryModel.findAll(taskType, status, limit, offset);
|
||||
const totalCount = await HistoryModel.count(taskType, status);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: histories,
|
||||
pagination: {
|
||||
total: totalCount,
|
||||
limit: limit,
|
||||
offset: offset,
|
||||
hasMore: offset + limit < totalCount
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取执行历史失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取执行历史失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取执行历史详情
|
||||
router.get('/:id', async (req, res) => {
|
||||
try {
|
||||
const id = parseInt(req.params.id);
|
||||
|
||||
if (isNaN(id)) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '无效的历史记录ID'
|
||||
});
|
||||
}
|
||||
|
||||
const history = await HistoryModel.findById(id);
|
||||
|
||||
if (!history) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: '未找到指定的历史记录'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: history
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取执行历史详情失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取执行历史详情失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取执行历史统计
|
||||
router.get('/stats/summary', async (req, res) => {
|
||||
try {
|
||||
const taskType = req.query.taskType || null;
|
||||
const days = Math.min(parseInt(req.query.days) || 7, 30);
|
||||
|
||||
const stats = await HistoryModel.getStatistics(taskType, days);
|
||||
const dailyStats = await HistoryModel.getDailyStats(taskType, days);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
summary: stats,
|
||||
daily: dailyStats
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取执行历史统计失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取执行历史统计失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取系统日志列表
|
||||
router.get('/logs/system', async (req, res) => {
|
||||
try {
|
||||
const level = req.query.level || null;
|
||||
const category = req.query.category || null;
|
||||
const limit = Math.min(parseInt(req.query.limit) || 100, 500);
|
||||
const offset = parseInt(req.query.offset) || 0;
|
||||
|
||||
const logs = await LogsModel.findAll(level, category, limit, offset);
|
||||
const totalCount = await LogsModel.count(level, category);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: logs,
|
||||
pagination: {
|
||||
total: totalCount,
|
||||
limit: limit,
|
||||
offset: offset,
|
||||
hasMore: offset + limit < totalCount
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取系统日志失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取系统日志失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 搜索系统日志
|
||||
router.get('/logs/search', async (req, res) => {
|
||||
try {
|
||||
const keyword = req.query.keyword;
|
||||
const level = req.query.level || null;
|
||||
const category = req.query.category || null;
|
||||
const limit = Math.min(parseInt(req.query.limit) || 50, 200);
|
||||
|
||||
if (!keyword || keyword.trim().length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '搜索关键词不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
const logs = await LogsModel.search(keyword.trim(), level, category, limit);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: logs,
|
||||
count: logs.length
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('搜索系统日志失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '搜索系统日志失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取日志统计
|
||||
router.get('/logs/stats', async (req, res) => {
|
||||
try {
|
||||
const days = Math.min(parseInt(req.query.days) || 7, 30);
|
||||
|
||||
const stats = await LogsModel.getStatistics(days);
|
||||
const hourlyStats = await LogsModel.getHourlyStats(days);
|
||||
const categories = await LogsModel.getCategories();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
summary: stats,
|
||||
hourly: hourlyStats,
|
||||
categories: categories
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取日志统计失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取日志统计失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取日志分类
|
||||
router.get('/logs/categories', async (req, res) => {
|
||||
try {
|
||||
const categories = await LogsModel.getCategories();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: categories
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取日志分类失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取日志分类失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 清理旧记录
|
||||
router.delete('/cleanup', async (req, res) => {
|
||||
try {
|
||||
const { days = 30, type = 'all' } = req.body;
|
||||
|
||||
if (days < 1 || days > 365) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '保留天数必须在1-365之间'
|
||||
});
|
||||
}
|
||||
|
||||
let deletedHistory = 0;
|
||||
let deletedLogs = 0;
|
||||
|
||||
if (type === 'all' || type === 'history') {
|
||||
const historyResult = await HistoryModel.deleteOldRecords(days);
|
||||
deletedHistory = historyResult.changes || 0;
|
||||
}
|
||||
|
||||
if (type === 'all' || type === 'logs') {
|
||||
const logsResult = await LogsModel.deleteOldRecords(days);
|
||||
deletedLogs = logsResult.changes || 0;
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `清理完成,删除了 ${deletedHistory} 条历史记录和 ${deletedLogs} 条日志记录`,
|
||||
data: {
|
||||
deleted_history: deletedHistory,
|
||||
deleted_logs: deletedLogs,
|
||||
days: days
|
||||
}
|
||||
});
|
||||
|
||||
// 记录清理操作日志
|
||||
await LogsModel.logInfo(
|
||||
`手动清理旧记录:删除${deletedHistory}条历史记录和${deletedLogs}条日志记录`,
|
||||
'cleanup',
|
||||
{ deleted_history: deletedHistory, deleted_logs: deletedLogs, days: days },
|
||||
'manual_action'
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('清理旧记录失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '清理旧记录失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取任务类型统计
|
||||
router.get('/task-types', async (req, res) => {
|
||||
try {
|
||||
const sql = `
|
||||
SELECT DISTINCT task_type
|
||||
FROM execution_history
|
||||
ORDER BY task_type
|
||||
`;
|
||||
|
||||
const taskTypes = await HistoryModel.all(sql);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: taskTypes.map(row => row.task_type)
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取任务类型失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取任务类型失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 导出历史记录
|
||||
router.get('/export', async (req, res) => {
|
||||
try {
|
||||
const taskType = req.query.taskType || null;
|
||||
const status = req.query.status || null;
|
||||
const format = req.query.format || 'json';
|
||||
|
||||
const histories = await HistoryModel.findAll(taskType, status, 1000, 0);
|
||||
|
||||
if (format === 'csv') {
|
||||
// 生成CSV格式
|
||||
const csvHeader = 'ID,任务类型,任务名称,状态,开始时间,结束时间,执行时长(ms),结果摘要,错误信息\n';
|
||||
const csvData = histories.map(history => {
|
||||
return [
|
||||
history.id,
|
||||
history.task_type,
|
||||
history.task_name,
|
||||
history.status,
|
||||
history.start_time,
|
||||
history.end_time || '',
|
||||
history.duration || '',
|
||||
`"${(history.result_summary || '').replace(/"/g, '""')}"`,
|
||||
`"${(history.error_message || '').replace(/"/g, '""')}"`
|
||||
].join(',');
|
||||
}).join('\n');
|
||||
|
||||
const csv = csvHeader + csvData;
|
||||
|
||||
res.setHeader('Content-Type', 'text/csv');
|
||||
res.setHeader('Content-Disposition', `attachment; filename=execution_history_${new Date().toISOString().slice(0, 10)}.csv`);
|
||||
res.send(csv);
|
||||
} else {
|
||||
// JSON格式
|
||||
res.setHeader('Content-Type', 'application/json');
|
||||
res.setHeader('Content-Disposition', `attachment; filename=execution_history_${new Date().toISOString().slice(0, 10)}.json`);
|
||||
res.json({
|
||||
success: true,
|
||||
export_time: new Date().toISOString(),
|
||||
filters: { taskType, status },
|
||||
count: histories.length,
|
||||
data: histories
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('导出历史记录失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '导出历史记录失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
485
src/routes/proxies.js
Normal file
485
src/routes/proxies.js
Normal file
@@ -0,0 +1,485 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const ProxyModel = require('../database/models/proxy');
|
||||
const ProxyValidator = require('../services/validator');
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
|
||||
const validator = new ProxyValidator();
|
||||
const scraper = new ProxyScraper();
|
||||
|
||||
// 获取所有可用代理
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
const limit = parseInt(req.query.limit) || 100;
|
||||
const offset = parseInt(req.query.offset) || 0;
|
||||
const sortBy = req.query.sortBy || 'response_time';
|
||||
const order = req.query.order || 'ASC';
|
||||
|
||||
let sql = 'SELECT * FROM proxies WHERE is_valid = 1';
|
||||
const params = [];
|
||||
|
||||
// 添加排序
|
||||
const allowedSortFields = ['response_time', 'created_at', 'updated_at', 'ip', 'port'];
|
||||
if (allowedSortFields.includes(sortBy)) {
|
||||
sql += ` ORDER BY ${sortBy} ${order.toUpperCase() === 'DESC' ? 'DESC' : 'ASC'}`;
|
||||
} else {
|
||||
sql += ' ORDER BY response_time ASC';
|
||||
}
|
||||
|
||||
// 添加分页
|
||||
sql += ' LIMIT ? OFFSET ?';
|
||||
params.push(limit, offset);
|
||||
|
||||
const proxies = await ProxyModel.all(sql, params);
|
||||
const totalCount = await ProxyModel.count(true);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: proxies,
|
||||
pagination: {
|
||||
total: totalCount,
|
||||
limit: limit,
|
||||
offset: offset,
|
||||
hasMore: offset + limit < totalCount
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取代理列表失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取代理列表失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取随机可用代理
|
||||
router.get('/random', async (req, res) => {
|
||||
try {
|
||||
const count = Math.min(parseInt(req.query.count) || 1, 10); // 最多返回10个
|
||||
|
||||
if (count === 1) {
|
||||
const proxy = await ProxyModel.findRandom(true, 1);
|
||||
if (!proxy) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: '没有可用的代理'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: proxy
|
||||
});
|
||||
} else {
|
||||
const proxies = await ProxyModel.findRandom(true, count);
|
||||
res.json({
|
||||
success: true,
|
||||
data: proxies,
|
||||
count: proxies.length
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('获取随机代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取随机代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 获取代理统计信息
|
||||
router.get('/stats', async (req, res) => {
|
||||
try {
|
||||
const stats = await validator.getProxyStatistics();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: stats
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取统计信息失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取统计信息失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 手动验证代理
|
||||
router.post('/verify', async (req, res) => {
|
||||
try {
|
||||
const { ip, port } = req.body;
|
||||
|
||||
if (!ip || !port) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'IP和端口不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
const result = await validator.validateSingleProxy(ip, port);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: result
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('验证代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '验证代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 批量验证代理
|
||||
router.post('/verify-batch', async (req, res) => {
|
||||
try {
|
||||
const { proxies } = req.body;
|
||||
|
||||
if (!Array.isArray(proxies) || proxies.length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '代理列表不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
// 验证格式
|
||||
const validProxies = proxies.filter(p => p.ip && p.port);
|
||||
if (validProxies.length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '没有有效的代理格式'
|
||||
});
|
||||
}
|
||||
|
||||
const results = await validator.validateMultipleProxies(validProxies, 3);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
validated: results.length,
|
||||
valid: results.filter(r => r.isValid).length,
|
||||
invalid: results.filter(r => !r.isValid).length,
|
||||
results: results
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('批量验证代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '批量验证代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 手动触发抓取任务
|
||||
router.post('/scrape', async (req, res) => {
|
||||
try {
|
||||
const { pages = 40 } = req.body;
|
||||
|
||||
// if (pages < 1 || pages > 10) {
|
||||
// return res.status(400).json({
|
||||
// success: false,
|
||||
// error: '页数必须在1-10之间'
|
||||
// });
|
||||
// }
|
||||
|
||||
// 检查是否有抓取任务正在进行
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
if (ProxyScraper.isScrapingInProgress()) {
|
||||
return res.status(409).json({
|
||||
success: false,
|
||||
error: '有抓取任务正在进行',
|
||||
message: '请等待当前抓取任务完成后再试'
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`手动触发抓取任务,页数: ${pages}`);
|
||||
|
||||
// 异步执行抓取任务
|
||||
scraper.scrapeMultiplePages(pages).then(result => {
|
||||
console.log('手动抓取任务完成:', result);
|
||||
}).catch(error => {
|
||||
console.error('手动抓取任务失败:', error);
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `抓取任务已启动,将抓取前 ${pages} 页`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('触发抓取任务失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '触发抓取任务失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 手动触发验证任务
|
||||
router.post('/validate-all', async (req, res) => {
|
||||
try {
|
||||
const { limit = 50 } = req.body;
|
||||
|
||||
if (limit < 1 || limit > 200) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '验证数量必须在1-200之间'
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`手动触发验证任务,数量: ${limit}`);
|
||||
|
||||
// 异步执行验证任务
|
||||
validator.validateDatabaseProxies(limit).then(result => {
|
||||
console.log('手动验证任务完成:', result);
|
||||
}).catch(error => {
|
||||
console.error('手动验证任务失败:', error);
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `验证任务已启动,将验证 ${limit} 个代理`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('触发验证任务失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '触发验证任务失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 根据IP和端口查找代理
|
||||
router.get('/search', async (req, res) => {
|
||||
try {
|
||||
const { ip, port } = req.query;
|
||||
|
||||
if (!ip || !port) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'IP和端口参数不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
const proxy = await ProxyModel.findByIpAndPort(ip, port);
|
||||
|
||||
if (!proxy) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: '未找到指定的代理'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: proxy
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('查找代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '查找代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 删除无效代理
|
||||
router.delete('/cleanup', async (req, res) => {
|
||||
try {
|
||||
const result = await ProxyModel.deleteInvalid();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `已删除 ${result.changes} 个无效代理`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('清理无效代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '清理无效代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 检查抓取任务状态
|
||||
router.get('/scraping-status', (req, res) => {
|
||||
try {
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
const isInProgress = ProxyScraper.isScrapingInProgress();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
isScrapingInProgress: isInProgress,
|
||||
message: isInProgress ? '有抓取任务正在进行' : '当前没有抓取任务'
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('获取抓取状态失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '获取抓取状态失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 强制清除抓取标志(用于异常情况)
|
||||
router.post('/clear-scraping-flag', (req, res) => {
|
||||
try {
|
||||
const ProxyScraper = require('../services/scraper');
|
||||
ProxyScraper.clearScrapingFlag();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: '已强制清除抓取进行中标志'
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('清除抓取标志失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '清除抓取标志失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 导入代理JSON
|
||||
router.post('/import', async (req, res) => {
|
||||
try {
|
||||
const { proxies } = req.body;
|
||||
|
||||
if (!Array.isArray(proxies) || proxies.length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '代理列表不能为空'
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`开始导入 ${proxies.length} 个代理`);
|
||||
|
||||
// 验证格式并转换为标准格式
|
||||
const validProxies = [];
|
||||
const invalidProxies = [];
|
||||
|
||||
for (const proxy of proxies) {
|
||||
let ip, port;
|
||||
|
||||
// 支持多种字段名
|
||||
if (proxy.ip_address && proxy.port) {
|
||||
ip = proxy.ip_address;
|
||||
port = proxy.port;
|
||||
} else if (proxy.ip && proxy.port) {
|
||||
ip = proxy.ip;
|
||||
port = proxy.port;
|
||||
} else if (proxy.host && proxy.port) {
|
||||
ip = proxy.host;
|
||||
port = proxy.port;
|
||||
} else {
|
||||
invalidProxies.push(proxy);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 验证IP和端口格式
|
||||
if (/^\d+\.\d+\.\d+\.\d+$/.test(ip) && /^\d+$/.test(port)) {
|
||||
const portNum = parseInt(port);
|
||||
if (portNum > 0 && portNum <= 65535) {
|
||||
validProxies.push({
|
||||
ip: ip,
|
||||
port: portNum,
|
||||
location: proxy.location || proxy.area || null,
|
||||
speed: proxy.speed || null,
|
||||
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
|
||||
});
|
||||
} else {
|
||||
invalidProxies.push(proxy);
|
||||
}
|
||||
} else {
|
||||
invalidProxies.push(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
if (validProxies.length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: '没有有效的代理格式',
|
||||
details: {
|
||||
total: proxies.length,
|
||||
valid: 0,
|
||||
invalid: invalidProxies.length
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`验证完成,有效代理: ${validProxies.length},无效代理: ${invalidProxies.length}`);
|
||||
|
||||
// 立即验证代理
|
||||
const ProxyValidator = require('../services/validator');
|
||||
const validator = new ProxyValidator();
|
||||
|
||||
const validationResult = await validator.validateMultipleProxies(validProxies, 10);
|
||||
|
||||
// 保存验证通过的代理到数据库
|
||||
let savedCount = 0;
|
||||
for (const result of validationResult) {
|
||||
if (result.isValid) {
|
||||
try {
|
||||
await ProxyModel.create({
|
||||
ip: result.ip,
|
||||
port: result.port,
|
||||
location: null,
|
||||
speed: null,
|
||||
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' '),
|
||||
is_valid: 1,
|
||||
response_time: result.responseTime
|
||||
});
|
||||
savedCount++;
|
||||
console.log(`✓ 保存代理: ${result.ip}:${result.port} - ${result.responseTime}ms`);
|
||||
} catch (error) {
|
||||
if (!error.message.includes('UNIQUE constraint failed')) {
|
||||
console.warn(`保存代理 ${result.ip}:${result.port} 失败:`, error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const validCount = validationResult.filter(r => r.isValid).length;
|
||||
|
||||
console.log(`导入完成: 总数 ${proxies.length},格式有效 ${validProxies.length},验证通过 ${validCount},保存成功 ${savedCount}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
total: proxies.length,
|
||||
format_valid: validProxies.length,
|
||||
format_invalid: invalidProxies.length,
|
||||
validated: validationResult.length,
|
||||
valid: validCount,
|
||||
invalid: validationResult.length - validCount,
|
||||
saved: savedCount,
|
||||
results: validationResult
|
||||
},
|
||||
message: `导入完成:保存了 ${savedCount} 个可用代理`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('导入代理失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: '导入代理失败',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
398
src/services/scheduler.js
Normal file
398
src/services/scheduler.js
Normal file
@@ -0,0 +1,398 @@
|
||||
const cron = require('node-cron');
|
||||
const ProxyScraper = require('./scraper');
|
||||
const ProxyValidator = require('./validator');
|
||||
const ProxyModel = require('../database/models/proxy');
|
||||
const HistoryModel = require('../database/models/history');
|
||||
const LogsModel = require('../database/models/logs');
|
||||
|
||||
class ProxyScheduler {
|
||||
constructor() {
|
||||
this.scraper = new ProxyScraper();
|
||||
this.validator = new ProxyValidator();
|
||||
this.tasks = new Map();
|
||||
}
|
||||
|
||||
async log(message) {
|
||||
const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
|
||||
console.log(`[${timestamp}] ${message}`);
|
||||
}
|
||||
|
||||
// 抓取任务 - 每小时执行一次
|
||||
async runScrapeTask() {
|
||||
let historyId = null;
|
||||
|
||||
try {
|
||||
await this.log('开始执行定时抓取任务');
|
||||
|
||||
// 检查是否有抓取任务正在进行
|
||||
if (this.scraper.constructor.isScrapingInProgress()) {
|
||||
await this.log('⚠️ 有抓取任务正在进行,跳过本次定时抓取任务');
|
||||
return {
|
||||
scraped: 0,
|
||||
total: 0,
|
||||
valid: 0,
|
||||
skipped: true,
|
||||
message: '有抓取任务正在进行,跳过本次执行'
|
||||
};
|
||||
}
|
||||
|
||||
// 创建执行历史记录
|
||||
historyId = await HistoryModel.create({
|
||||
task_type: 'scrape',
|
||||
task_name: '定时抓取任务',
|
||||
status: 'running',
|
||||
start_time: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 抓取代理
|
||||
const scrapeResult = await this.scraper.scrape();
|
||||
|
||||
// 更新历史记录
|
||||
const duration = Date.now() - new Date(historyId ? await this.getHistoryStartTime(historyId) : Date.now());
|
||||
await HistoryModel.update(historyId, {
|
||||
status: 'success',
|
||||
end_time: new Date().toISOString(),
|
||||
duration: duration,
|
||||
result_summary: `抓取 ${scrapeResult.scraped} 个代理,可用 ${scrapeResult.valid} 个`,
|
||||
details: scrapeResult
|
||||
});
|
||||
|
||||
if (scrapeResult.skipped) {
|
||||
await this.log(`定时抓取任务跳过: ${scrapeResult.message}`);
|
||||
|
||||
// 更新历史记录
|
||||
if (historyId) {
|
||||
await HistoryModel.update(historyId, {
|
||||
status: 'skipped',
|
||||
end_time: new Date().toISOString(),
|
||||
result_summary: scrapeResult.message,
|
||||
details: scrapeResult
|
||||
});
|
||||
}
|
||||
} else {
|
||||
await this.log(`定时抓取任务完成: 抓取 ${scrapeResult.scraped} 个,总数 ${scrapeResult.total},可用 ${scrapeResult.valid}`);
|
||||
|
||||
// 获取当前统计信息
|
||||
const stats = await this.validator.getProxyStatistics();
|
||||
await this.log(`当前代理状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
|
||||
|
||||
// 记录系统日志
|
||||
await LogsModel.logInfo(
|
||||
`定时抓取任务完成: 抓取${scrapeResult.scraped}个代理,可用${scrapeResult.valid}个`,
|
||||
'scheduler',
|
||||
scrapeResult,
|
||||
'automated_task'
|
||||
);
|
||||
}
|
||||
|
||||
return scrapeResult;
|
||||
} catch (error) {
|
||||
const errorMessage = `定时抓取任务失败: ${error.message}`;
|
||||
await this.log(errorMessage);
|
||||
|
||||
// 更新历史记录
|
||||
if (historyId) {
|
||||
const duration = Date.now() - new Date(await this.getHistoryStartTime(historyId));
|
||||
await HistoryModel.update(historyId, {
|
||||
status: 'failed',
|
||||
end_time: new Date().toISOString(),
|
||||
duration: duration,
|
||||
error_message: error.message
|
||||
});
|
||||
}
|
||||
|
||||
// 记录错误日志
|
||||
await LogsModel.logError(
|
||||
errorMessage,
|
||||
'scheduler',
|
||||
{ error: error.message, stack: error.stack },
|
||||
'automated_task'
|
||||
);
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 验证任务 - 每10分钟执行一次
|
||||
async runValidationTask() {
|
||||
let historyId = null;
|
||||
|
||||
try {
|
||||
await this.log('开始执行定时验证任务');
|
||||
|
||||
// 创建执行历史记录
|
||||
historyId = await HistoryModel.create({
|
||||
task_type: 'validation',
|
||||
task_name: '定时验证任务',
|
||||
status: 'running',
|
||||
start_time: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 验证数据库中的代理(每次验证50个)
|
||||
const validationResult = await this.validator.validateDatabaseProxies(50);
|
||||
|
||||
// 更新历史记录
|
||||
const duration = Date.now() - new Date(historyId ? await this.getHistoryStartTime(historyId) : Date.now());
|
||||
await HistoryModel.update(historyId, {
|
||||
status: 'success',
|
||||
end_time: new Date().toISOString(),
|
||||
duration: duration,
|
||||
result_summary: `验证 ${validationResult.validated} 个代理,有效 ${validationResult.valid} 个`,
|
||||
details: validationResult
|
||||
});
|
||||
|
||||
await this.log(`定时验证任务完成: 验证 ${validationResult.validated} 个,有效 ${validationResult.valid},无效 ${validationResult.invalid}`);
|
||||
|
||||
// 清理无效代理
|
||||
if (validationResult.invalid > 0) {
|
||||
await this.log(`已清理 ${validationResult.invalid} 个无效代理`);
|
||||
}
|
||||
|
||||
// 获取当前统计信息
|
||||
const stats = await this.validator.getProxyStatistics();
|
||||
await this.log(`当前代理状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
|
||||
|
||||
// 记录系统日志
|
||||
await LogsModel.logInfo(
|
||||
`定时验证任务完成: 验证${validationResult.validated}个代理,有效${validationResult.valid}个`,
|
||||
'scheduler',
|
||||
validationResult,
|
||||
'automated_task'
|
||||
);
|
||||
|
||||
return validationResult;
|
||||
} catch (error) {
|
||||
const errorMessage = `定时验证任务失败: ${error.message}`;
|
||||
await this.log(errorMessage);
|
||||
|
||||
// 更新历史记录
|
||||
if (historyId) {
|
||||
const duration = Date.now() - new Date(await this.getHistoryStartTime(historyId));
|
||||
await HistoryModel.update(historyId, {
|
||||
status: 'failed',
|
||||
end_time: new Date().toISOString(),
|
||||
duration: duration,
|
||||
error_message: error.message
|
||||
});
|
||||
}
|
||||
|
||||
// 记录错误日志
|
||||
await LogsModel.logError(
|
||||
errorMessage,
|
||||
'scheduler',
|
||||
{ error: error.message, stack: error.stack },
|
||||
'automated_task'
|
||||
);
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 健康检查任务 - 每小时执行一次
|
||||
async runHealthCheckTask() {
|
||||
try {
|
||||
await this.log('开始执行健康检查任务');
|
||||
|
||||
const stats = await this.validator.getProxyStatistics();
|
||||
|
||||
await this.log(`代理池健康状态: 总数 ${stats.total},可用 ${stats.valid},无效 ${stats.invalid},可用率 ${stats.validRate}`);
|
||||
|
||||
// 如果可用代理太少,触发紧急抓取
|
||||
if (stats.valid < 10) {
|
||||
await this.log('可用代理数量过少,触发紧急抓取任务');
|
||||
await this.runScrapeTask();
|
||||
}
|
||||
|
||||
return stats;
|
||||
} catch (error) {
|
||||
await this.log(`健康检查任务失败: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 启动所有定时任务
|
||||
start() {
|
||||
this.log('启动代理服务定时任务调度器');
|
||||
|
||||
// 每小时的第0分钟执行抓取任务(异步执行,不阻塞其他任务)
|
||||
const scrapeTask = cron.schedule('0 * * * *', async () => {
|
||||
try {
|
||||
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
await this.runScrapeTask();
|
||||
} catch (error) {
|
||||
this.log(`定时抓取任务异常: ${error.message}`);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
this.log(`定时抓取任务启动异常: ${error.message}`);
|
||||
}
|
||||
}, {
|
||||
scheduled: false,
|
||||
timezone: 'Asia/Shanghai'
|
||||
});
|
||||
|
||||
// 每10分钟执行验证任务(异步执行,不阻塞其他任务)
|
||||
const validationTask = cron.schedule('*/10 * * * *', async () => {
|
||||
try {
|
||||
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
await this.runValidationTask();
|
||||
} catch (error) {
|
||||
this.log(`定时验证任务异常: ${error.message}`);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
this.log(`定时验证任务启动异常: ${error.message}`);
|
||||
}
|
||||
}, {
|
||||
scheduled: false,
|
||||
timezone: 'Asia/Shanghai'
|
||||
});
|
||||
|
||||
// 每小时的第30分钟执行健康检查(异步执行,不阻塞其他任务)
|
||||
const healthCheckTask = cron.schedule('30 * * * *', async () => {
|
||||
try {
|
||||
// 使用 setImmediate 确保任务异步执行,不阻塞事件循环
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
await this.runHealthCheckTask();
|
||||
} catch (error) {
|
||||
this.log(`健康检查任务异常: ${error.message}`);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
this.log(`健康检查任务启动异常: ${error.message}`);
|
||||
}
|
||||
}, {
|
||||
scheduled: false,
|
||||
timezone: 'Asia/Shanghai'
|
||||
});
|
||||
|
||||
// 保存任务引用
|
||||
this.tasks.set('scrape', scrapeTask);
|
||||
this.tasks.set('validation', validationTask);
|
||||
this.tasks.set('healthCheck', healthCheckTask);
|
||||
|
||||
// 启动所有任务
|
||||
this.tasks.forEach((task, name) => {
|
||||
task.start();
|
||||
this.log(`定时任务已启动: ${name}`);
|
||||
});
|
||||
|
||||
this.log('所有定时任务已启动');
|
||||
this.printNextRunTimes();
|
||||
}
|
||||
|
||||
// 停止所有定时任务
|
||||
stop() {
|
||||
this.log('停止所有定时任务');
|
||||
|
||||
this.tasks.forEach((task, name) => {
|
||||
task.stop();
|
||||
this.log(`定时任务已停止: ${name}`);
|
||||
});
|
||||
|
||||
this.tasks.clear();
|
||||
this.log('定时任务调度器已停止');
|
||||
}
|
||||
|
||||
// 手动执行任务
|
||||
async runTask(taskName) {
|
||||
this.log(`手动执行任务: ${taskName}`);
|
||||
|
||||
switch (taskName) {
|
||||
case 'scrape':
|
||||
return await this.runScrapeTask();
|
||||
case 'validation':
|
||||
return await this.runValidationTask();
|
||||
case 'healthCheck':
|
||||
return await this.runHealthCheckTask();
|
||||
default:
|
||||
throw new Error(`未知的任务名称: ${taskName}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取任务状态
|
||||
getStatus() {
|
||||
const status = {};
|
||||
|
||||
this.tasks.forEach((task, name) => {
|
||||
status[name] = {
|
||||
running: task.running || false,
|
||||
scheduled: task.scheduled || false
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
tasks: status,
|
||||
taskCount: this.tasks.size
|
||||
};
|
||||
}
|
||||
|
||||
// 打印下次执行时间
|
||||
printNextRunTimes() {
|
||||
const now = new Date();
|
||||
|
||||
this.log('定时任务执行计划:');
|
||||
this.log(`- 抓取任务: 每小时整点执行 (下次: ${this.getNextHourTime(now)})`);
|
||||
this.log(`- 验证任务: 每10分钟执行 (下次: ${this.getNext10MinuteTime(now)})`);
|
||||
this.log(`- 健康检查: 每小时30分执行 (下次: ${this.getNextHour30Time(now)})`);
|
||||
}
|
||||
|
||||
getNextHourTime(now) {
|
||||
const next = new Date(now);
|
||||
next.setHours(next.getHours() + 1, 0, 0, 0);
|
||||
return next.toTimeString().slice(0, 5);
|
||||
}
|
||||
|
||||
getNext10MinuteTime(now) {
|
||||
const next = new Date(now);
|
||||
const minutes = next.getMinutes();
|
||||
const nextMinute = Math.ceil((minutes + 1) / 10) * 10;
|
||||
next.setMinutes(nextMinute, 0, 0);
|
||||
return next.toTimeString().slice(0, 5);
|
||||
}
|
||||
|
||||
getNextHour30Time(now) {
|
||||
const next = new Date(now);
|
||||
if (now.getMinutes() < 30) {
|
||||
next.setHours(now.getHours(), 30, 0, 0);
|
||||
} else {
|
||||
next.setHours(now.getHours() + 1, 30, 0, 0);
|
||||
}
|
||||
return next.toTimeString().slice(0, 5);
|
||||
}
|
||||
|
||||
// 获取历史记录开始时间
|
||||
async getHistoryStartTime(historyId) {
|
||||
try {
|
||||
const history = await HistoryModel.findById(historyId);
|
||||
return history ? new Date(history.start_time).getTime() : Date.now();
|
||||
} catch (error) {
|
||||
return Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
// 获取系统统计信息
|
||||
async getSystemStats() {
|
||||
try {
|
||||
const proxyStats = await this.validator.getProxyStatistics();
|
||||
const taskStatus = this.getStatus();
|
||||
|
||||
return {
|
||||
timestamp: new Date().toISOString(),
|
||||
proxies: proxyStats,
|
||||
scheduler: taskStatus,
|
||||
uptime: process.uptime()
|
||||
};
|
||||
} catch (error) {
|
||||
this.log(`获取系统统计信息失败: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProxyScheduler;
|
||||
596
src/services/scraper.js
Normal file
596
src/services/scraper.js
Normal file
@@ -0,0 +1,596 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const ProxyModel = require('../database/models/proxy');
|
||||
|
||||
// 全局变量:标记是否有抓取任务正在进行
|
||||
let isScrapingInProgress = false;
|
||||
|
||||
class ProxyScraper {
|
||||
constructor() {
|
||||
this.baseUrl = 'https://www.kuaidaili.com/free/inha';
|
||||
this.userAgents = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
|
||||
];
|
||||
this.currentProxyIndex = 0;
|
||||
this.localProxies = [];
|
||||
}
|
||||
|
||||
getRandomUserAgent() {
|
||||
return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// 获取本地数据库中的有效代理
|
||||
async loadLocalProxies() {
|
||||
try {
|
||||
console.log('正在加载本地代理数据库...');
|
||||
this.localProxies = await ProxyModel.findAll(true);
|
||||
this.currentProxyIndex = 0;
|
||||
console.log(`成功加载 ${this.localProxies.length} 个本地代理`);
|
||||
|
||||
if (this.localProxies.length > 0) {
|
||||
console.log('将使用本地代理进行抓取');
|
||||
} else {
|
||||
console.log('本地无可用代理,将使用直连');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('加载本地代理失败:', error);
|
||||
this.localProxies = [];
|
||||
}
|
||||
}
|
||||
|
||||
// 获取下一个本地代理(循环使用)
|
||||
getNextLocalProxy() {
|
||||
if (this.localProxies.length === 0) {
|
||||
return null; // 没有本地代理,返回null使用直连
|
||||
}
|
||||
|
||||
const proxy = this.localProxies[this.currentProxyIndex];
|
||||
this.currentProxyIndex = (this.currentProxyIndex + 1) % this.localProxies.length;
|
||||
|
||||
console.log(`使用本地代理: ${proxy.ip}:${proxy.port}`);
|
||||
return {
|
||||
host: proxy.ip,
|
||||
port: proxy.port,
|
||||
protocol: 'http'
|
||||
};
|
||||
}
|
||||
|
||||
// 测试代理是否可用(用于抓取)
|
||||
async testProxyForScraping(proxyConfig) {
|
||||
try {
|
||||
const response = await axios.get('https://www.baidu.com', {
|
||||
proxy: proxyConfig,
|
||||
timeout: 10000,
|
||||
headers: {
|
||||
'User-Agent': this.getRandomUserAgent()
|
||||
},
|
||||
validateStatus: (status) => status === 200
|
||||
});
|
||||
return response.status === 200 && response.data.includes('百度');
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 获取可用的代理配置
|
||||
async getWorkingProxy() {
|
||||
if (this.localProxies.length === 0) {
|
||||
return null; // 无本地代理,使用直连
|
||||
}
|
||||
|
||||
// 尝试几个代理,找到可用的
|
||||
for (let i = 0; i < Math.min(5, this.localProxies.length); i++) {
|
||||
const proxyConfig = this.getNextLocalProxy();
|
||||
|
||||
if (await this.testProxyForScraping(proxyConfig)) {
|
||||
console.log(`✓ 代理 ${proxyConfig.host}:${proxyConfig.port} 可用`);
|
||||
return proxyConfig;
|
||||
} else {
|
||||
console.log(`✗ 代理 ${proxyConfig.host}:${proxyConfig.port} 不可用,尝试下一个`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('测试的本地代理都不可用,使用直连');
|
||||
return null; // 所有测试的代理都不可用,使用直连
|
||||
}
|
||||
|
||||
// 验证抓取到的代理
|
||||
async validateScrapedProxies(proxies, concurrency = 20) {
|
||||
if (!proxies || proxies.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`开始验证 ${proxies.length} 个抓取到的代理,并发数: ${concurrency}`);
|
||||
const validProxies = [];
|
||||
|
||||
// 分批处理,避免过多并发连接
|
||||
for (let i = 0; i < proxies.length; i += concurrency) {
|
||||
const batch = proxies.slice(i, i + concurrency);
|
||||
const batchPromises = batch.map(proxy => this.validateScrapedProxy(proxy));
|
||||
|
||||
try {
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
|
||||
// 收集验证通过的代理
|
||||
batchResults.forEach(result => {
|
||||
if (result.isValid) {
|
||||
validProxies.push({
|
||||
...result.proxy,
|
||||
is_valid: 1,
|
||||
response_time: result.responseTime
|
||||
});
|
||||
console.log(`✓ 代理验证通过: ${result.proxy.ip}:${result.proxy.port} - ${result.responseTime}ms`);
|
||||
} else {
|
||||
console.log(`✗ 代理验证失败: ${result.proxy.ip}:${result.proxy.port} - ${result.error || '连接失败'}`);
|
||||
}
|
||||
});
|
||||
|
||||
// 批次间延迟,避免请求过于频繁
|
||||
if (i + concurrency < proxies.length) {
|
||||
await this.sleep(1000);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`批量验证第 ${Math.floor(i / concurrency) + 1} 批时发生错误:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`代理验证完成,${validProxies.length}/${proxies.length} 个代理验证通过`);
|
||||
return validProxies;
|
||||
}
|
||||
|
||||
// 验证单个抓取到的代理
|
||||
async validateScrapedProxy(proxy, retryCount = 2) {
|
||||
const testUrls = [
|
||||
'https://www.baidu.com',
|
||||
];
|
||||
|
||||
for (let attempt = 1; attempt <= retryCount; attempt++) {
|
||||
for (const testUrl of testUrls) {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const proxyConfig = {
|
||||
host: proxy.ip,
|
||||
port: proxy.port,
|
||||
protocol: 'http'
|
||||
};
|
||||
|
||||
const response = await axios.get(testUrl, {
|
||||
proxy: proxyConfig,
|
||||
timeout: 10000, // 3秒超时
|
||||
headers: {
|
||||
'User-Agent': this.getRandomUserAgent(),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Connection': 'keep-alive'
|
||||
},
|
||||
validateStatus: (status) => status >= 200 && status < 300 // 接受2xx状态码
|
||||
});
|
||||
|
||||
const responseTime = Date.now() - startTime;
|
||||
let isValid = false;
|
||||
|
||||
// 检查响应内容
|
||||
if (response.status === 200) {
|
||||
if (testUrl.includes('baidu.com')) {
|
||||
isValid = response.data.includes('百度');
|
||||
} else if (testUrl.includes('httpbin.org')) {
|
||||
isValid = response.data.includes('origin');
|
||||
} else if (testUrl.includes('google.com')) {
|
||||
isValid = response.data.includes('google');
|
||||
} else {
|
||||
isValid = true; // 对于其他URL,只要能连接就认为有效
|
||||
}
|
||||
}
|
||||
|
||||
if (isValid) {
|
||||
console.log(`✓ 代理验证通过: ${proxy.ip}:${proxy.port} - ${testUrl} - ${responseTime}ms`);
|
||||
return {
|
||||
proxy: proxy,
|
||||
isValid: true,
|
||||
responseTime: responseTime,
|
||||
error: null,
|
||||
testUrl: testUrl
|
||||
};
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
const responseTime = Date.now() - startTime;
|
||||
// 如果是最后一次尝试,返回失败
|
||||
if (attempt === retryCount && testUrl === testUrls[testUrls.length - 1]) {
|
||||
return {
|
||||
proxy: proxy,
|
||||
isValid: false,
|
||||
responseTime: responseTime,
|
||||
error: error.message,
|
||||
testUrl: testUrl
|
||||
};
|
||||
}
|
||||
// 否则继续尝试下一个URL或重试
|
||||
await this.sleep(500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 所有尝试都失败
|
||||
return {
|
||||
proxy: proxy,
|
||||
isValid: false,
|
||||
responseTime: 0,
|
||||
error: 'All validation attempts failed',
|
||||
testUrl: null
|
||||
};
|
||||
}
|
||||
|
||||
async fetchPage(pageNum, retryCount = 3) {
|
||||
const url = pageNum === 1 ? `${this.baseUrl}/` : `${this.baseUrl}/${pageNum}/`;
|
||||
|
||||
console.log(`正在抓取第 ${pageNum} 页: ${url}`);
|
||||
|
||||
for (let attempt = 1; attempt <= retryCount; attempt++) {
|
||||
let proxyConfig = null;
|
||||
let proxyUsed = '';
|
||||
|
||||
try {
|
||||
const userAgent = this.getRandomUserAgent();
|
||||
|
||||
// 获取可用代理配置(每次请求都尝试不同的代理)
|
||||
proxyConfig = await this.getWorkingProxy();
|
||||
|
||||
const requestConfig = {
|
||||
headers: {
|
||||
'User-Agent': userAgent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'Referer': 'https://www.kuaidaili.com/free/',
|
||||
'Origin': 'https://www.kuaidaili.com'
|
||||
},
|
||||
timeout: 15000,
|
||||
validateStatus: (status) => status === 200,
|
||||
maxRedirects: 3
|
||||
};
|
||||
|
||||
// 如果有可用代理,添加到请求配置中
|
||||
if (proxyConfig) {
|
||||
requestConfig.proxy = proxyConfig;
|
||||
proxyUsed = `代理 ${proxyConfig.host}:${proxyConfig.port}`;
|
||||
console.log(`使用代理 ${proxyConfig.host}:${proxyConfig.port} 抓取第 ${pageNum} 页`);
|
||||
} else {
|
||||
proxyUsed = '直连';
|
||||
console.log(`使用直连抓取第 ${pageNum} 页`);
|
||||
}
|
||||
|
||||
const response = await axios.get(url, requestConfig);
|
||||
|
||||
// 检查响应内容
|
||||
const content = response.data;
|
||||
if (content.includes('访问过于频繁') || content.includes('请稍后再试')) {
|
||||
console.log(`第 ${pageNum} 页被限制访问,延长等待时间`);
|
||||
if (attempt < retryCount) {
|
||||
await this.sleep(5000 * attempt);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`第 ${pageNum} 页获取成功,内容长度: ${content.length}`);
|
||||
return content;
|
||||
} catch (error) {
|
||||
console.error(`第 ${attempt} 次尝试抓取第 ${pageNum} 页失败 (${proxyUsed}):`, error.message);
|
||||
|
||||
if (attempt === retryCount) {
|
||||
throw new Error(`抓取第 ${pageNum} 页失败,已重试 ${retryCount} 次: ${error.message}`);
|
||||
}
|
||||
|
||||
// 递增等待时间
|
||||
await this.sleep(3000 * attempt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extractProxiesFromHtml(html) {
|
||||
try {
|
||||
const $ = cheerio.load(html);
|
||||
const proxies = [];
|
||||
|
||||
console.log('开始解析HTML,页面标题:', $('title').text());
|
||||
|
||||
// 方法1: 尝试从JavaScript变量中提取
|
||||
const scripts = $('script').map((i, elem) => $(elem).html()).get();
|
||||
console.log(`找到 ${scripts.length} 个script标签`);
|
||||
|
||||
for (let i = 0; i < scripts.length; i++) {
|
||||
const script = scripts[i];
|
||||
|
||||
// 尝试多种匹配模式 - 优先使用 const 模式
|
||||
const patterns = [
|
||||
/const fpsList = (\[[\s\S]*?\]);/,
|
||||
/var fpsList = (\[[\s\S]*?\]);/,
|
||||
/let fpsList = (\[[\s\S]*?\]);/,
|
||||
/fpsList = (\[[\s\S]*?\]);/,
|
||||
/var\s+fpsList\s*=\s*(\[[\s\S]*?\]);/,
|
||||
/const\s+fpsList\s*=\s*(\[[\s\S]*?\]);/,
|
||||
/let\s+fpsList\s*=\s*(\[[\s\S]*?\]);/
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = script.match(pattern);
|
||||
if (match) {
|
||||
try {
|
||||
console.log('找到JavaScript变量,尝试解析...');
|
||||
const fpsList = eval(match[1]);
|
||||
console.log(`从JavaScript变量中提取到 ${fpsList.length} 个代理`);
|
||||
|
||||
if (Array.isArray(fpsList) && fpsList.length > 0) {
|
||||
return fpsList.map(item => ({
|
||||
ip: item.ip,
|
||||
port: parseInt(item.port), // 确保端口是数字
|
||||
location: item.location || null,
|
||||
speed: item.speed ? parseInt(item.speed) : null,
|
||||
last_check_time: item.last_check_time || new Date().toISOString().slice(0, 19).replace('T', ' ')
|
||||
}));
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('解析JavaScript变量失败:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 方法2: 尝试从表格中提取 - 使用多种选择器
|
||||
const tableSelectors = [
|
||||
'table tbody tr',
|
||||
'.table tbody tr',
|
||||
'#list table tbody tr',
|
||||
'.list table tbody tr',
|
||||
'table tr',
|
||||
'.proxy-table tbody tr'
|
||||
];
|
||||
|
||||
for (const selector of tableSelectors) {
|
||||
const rows = $(selector);
|
||||
console.log(`尝试选择器 "${selector}": 找到 ${rows.length} 行`);
|
||||
|
||||
if (rows.length > 0) {
|
||||
rows.each((i, row) => {
|
||||
try {
|
||||
const cols = $(row).find('td');
|
||||
|
||||
// 尝试多种列结构
|
||||
let ip, port, location, speed;
|
||||
|
||||
if (cols.length >= 2) {
|
||||
// 标准格式:IP、端口、位置、匿名度、类型、位置、速度、最后验证时间
|
||||
ip = $(cols[0]).text().trim();
|
||||
port = $(cols[1]).text().trim();
|
||||
|
||||
if (cols.length >= 5) {
|
||||
location = $(cols[4]).text().trim();
|
||||
}
|
||||
if (cols.length >= 6) {
|
||||
speed = $(cols[5]).text().trim();
|
||||
}
|
||||
|
||||
// 清理和验证数据
|
||||
ip = ip.replace(/[^\d\.]/g, '');
|
||||
port = port.replace(/[^\d]/g, '');
|
||||
|
||||
if (ip && port && /^\d+\.\d+\.\d+\.\d+$/.test(ip) && /^\d+$/.test(port)) {
|
||||
const portNum = parseInt(port);
|
||||
if (portNum > 0 && portNum <= 65535) {
|
||||
proxies.push({
|
||||
ip: ip,
|
||||
port: portNum,
|
||||
location: location || null,
|
||||
speed: speed ? parseInt(speed.replace(/[^\d]/g, '')) : null,
|
||||
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`解析第 ${i} 行失败:`, e.message);
|
||||
}
|
||||
});
|
||||
|
||||
if (proxies.length > 0) {
|
||||
console.log(`从表格中提取到 ${proxies.length} 个代理`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 方法3: 尝试从JSON数据中提取
|
||||
const jsonPatterns = [
|
||||
/"data":\s*(\[[\s\S]*?\])/,
|
||||
/"proxies":\s*(\[[\s\S]*?\])/,
|
||||
/window\.__INITIAL_STATE__\s*=\s*({[\s\S]*?});/
|
||||
];
|
||||
|
||||
const pageContent = $.html();
|
||||
for (const pattern of jsonPatterns) {
|
||||
const match = pageContent.match(pattern);
|
||||
if (match) {
|
||||
try {
|
||||
console.log('找到JSON数据,尝试解析...');
|
||||
const data = JSON.parse(match[1]);
|
||||
// 处理JSON数据格式
|
||||
if (Array.isArray(data)) {
|
||||
const jsonProxies = data.map(item => ({
|
||||
ip: item.ip || item.IP,
|
||||
port: item.port || item.PORT,
|
||||
location: item.location || item.area || null,
|
||||
speed: item.speed || null,
|
||||
last_check_time: new Date().toISOString().slice(0, 19).replace('T', ' ')
|
||||
})).filter(p => p.ip && p.port);
|
||||
|
||||
if (jsonProxies.length > 0) {
|
||||
console.log(`从JSON数据中提取到 ${jsonProxies.length} 个代理`);
|
||||
return jsonProxies;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('解析JSON数据失败:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`所有提取方法都失败,最终提取到 ${proxies.length} 个代理`);
|
||||
|
||||
// 如果没有找到代理,输出页面内容用于调试
|
||||
if (proxies.length === 0) {
|
||||
console.log('页面内容片段:', pageContent.substring(0, 500) + '...');
|
||||
}
|
||||
|
||||
return proxies;
|
||||
} catch (error) {
|
||||
console.error('提取代理数据失败:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async scrapePage(pageNum) {
|
||||
try {
|
||||
const html = await this.fetchPage(pageNum);
|
||||
const proxies = this.extractProxiesFromHtml(html);
|
||||
|
||||
if (proxies.length === 0) {
|
||||
console.warn(`第 ${pageNum} 页没有提取到代理数据`);
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`第 ${pageNum} 页处理完成,获取到 ${proxies.length} 个代理,开始验证...`);
|
||||
|
||||
// 立即验证当前页面的所有代理
|
||||
const validatedProxies = await this.validateScrapedProxies(proxies);
|
||||
|
||||
let savedCount = 0;
|
||||
for (const proxy of validatedProxies) {
|
||||
try {
|
||||
await ProxyModel.create(proxy);
|
||||
savedCount++;
|
||||
console.log(`✓ 保存验证通过的代理: ${proxy.ip}:${proxy.port}`);
|
||||
} catch (error) {
|
||||
// 忽略重复插入错误
|
||||
if (!error.message.includes('UNIQUE constraint failed')) {
|
||||
console.warn(`保存代理 ${proxy.ip}:${proxy.port} 失败:`, error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`第 ${pageNum} 页验证完成,验证通过 ${validatedProxies.length}/${proxies.length} 个代理,保存了 ${savedCount} 个新代理`);
|
||||
|
||||
return validatedProxies;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`处理第 ${pageNum} 页失败:`, error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async scrapeMultiplePages(maxPages = 40) {
|
||||
console.log(`开始抓取前 ${maxPages} 页代理IP`);
|
||||
|
||||
// 首先加载本地代理
|
||||
await this.loadLocalProxies();
|
||||
|
||||
const allValidatedProxies = [];
|
||||
|
||||
for (let page = 1; page <= maxPages; page++) {
|
||||
try {
|
||||
const validatedProxies = await this.scrapePage(page);
|
||||
allValidatedProxies.push(...validatedProxies);
|
||||
|
||||
// 页面间延迟,避免请求过于频繁
|
||||
if (page < maxPages) {
|
||||
await this.sleep(3000 + Math.random() * 2000);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`抓取第 ${page} 页失败,跳过此页:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
const totalValidated = allValidatedProxies.length;
|
||||
console.log(`抓取完成,共获取到 ${totalValidated} 个验证通过的代理IP`);
|
||||
|
||||
return totalValidated;
|
||||
}
|
||||
|
||||
async scrape() {
|
||||
console.log('开始执行代理IP抓取任务');
|
||||
|
||||
// 检查是否有抓取任务正在进行
|
||||
if (isScrapingInProgress) {
|
||||
console.log('⚠️ 有抓取任务正在进行,跳过本次抓取');
|
||||
const totalCount = await ProxyModel.count();
|
||||
const validCount = await ProxyModel.count(true);
|
||||
|
||||
return {
|
||||
scraped: 0,
|
||||
total: totalCount,
|
||||
valid: validCount,
|
||||
skipped: true
|
||||
};
|
||||
}
|
||||
|
||||
// 设置抓取进行中标志
|
||||
isScrapingInProgress = true;
|
||||
console.log('🚀 开始抓取,设置抓取进行中标志');
|
||||
|
||||
try {
|
||||
const validatedCount = await this.scrapeMultiplePages(40);
|
||||
|
||||
// 获取数据库中的总代理数量
|
||||
const totalCount = await ProxyModel.count();
|
||||
const validCount = await ProxyModel.count(true);
|
||||
|
||||
console.log(`抓取任务完成。数据库中共有 ${totalCount} 个代理,其中 ${validCount} 个可用`);
|
||||
|
||||
return {
|
||||
scraped: validatedCount,
|
||||
total: totalCount,
|
||||
valid: validCount,
|
||||
skipped: false
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('代理抓取任务失败:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
// 无论成功还是失败,都要清除抓取进行中标志
|
||||
isScrapingInProgress = false;
|
||||
console.log('✅ 清除抓取进行中标志');
|
||||
}
|
||||
}
|
||||
|
||||
// 静态方法:检查是否有抓取任务正在进行
|
||||
static isScrapingInProgress() {
|
||||
return isScrapingInProgress;
|
||||
}
|
||||
|
||||
// 静态方法:强制清除抓取标志(用于异常情况)
|
||||
static clearScrapingFlag() {
|
||||
isScrapingInProgress = false;
|
||||
console.log('🔧 强制清除抓取进行中标志');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProxyScraper;
|
||||
202
src/services/validator.js
Normal file
202
src/services/validator.js
Normal file
@@ -0,0 +1,202 @@
|
||||
const axios = require('axios');
|
||||
const ProxyModel = require('../database/models/proxy');
|
||||
|
||||
class ProxyValidator {
|
||||
constructor() {
|
||||
this.testUrl = 'https://www.baidu.com';
|
||||
this.timeout = 10000; // 3秒超时
|
||||
this.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
|
||||
}
|
||||
|
||||
async validateProxy(ip, port) {
|
||||
const startTime = Date.now();
|
||||
const proxy = {
|
||||
host: ip,
|
||||
port: port,
|
||||
protocol: 'http'
|
||||
};
|
||||
|
||||
console.log(`正在验证代理 ${ip}:${port}`);
|
||||
|
||||
try {
|
||||
const response = await axios.get(this.testUrl, {
|
||||
proxy: proxy,
|
||||
timeout: this.timeout,
|
||||
headers: {
|
||||
'User-Agent': this.userAgent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive'
|
||||
},
|
||||
validateStatus: (status) => status === 200
|
||||
});
|
||||
|
||||
const responseTime = Date.now() - startTime;
|
||||
const isValid = response.status === 200 && response.data.includes('百度');
|
||||
|
||||
if (isValid) {
|
||||
console.log(`✓ 代理 ${ip}:${port} 验证成功,响应时间: ${responseTime}ms`);
|
||||
} else {
|
||||
console.log(`✗ 代理 ${ip}:${port} 验证失败,响应不正确`);
|
||||
}
|
||||
|
||||
// 更新数据库中的验证结果
|
||||
await ProxyModel.updateValidity(ip, port, isValid ? 1 : 0, responseTime);
|
||||
|
||||
return {
|
||||
ip: ip,
|
||||
port: port,
|
||||
isValid: isValid,
|
||||
responseTime: responseTime,
|
||||
error: null
|
||||
};
|
||||
} catch (error) {
|
||||
const responseTime = Date.now() - startTime;
|
||||
console.log(`✗ 代理 ${ip}:${port} 验证失败:`, error.message);
|
||||
|
||||
// 更新数据库中的验证结果
|
||||
await ProxyModel.updateValidity(ip, port, 0, responseTime);
|
||||
|
||||
return {
|
||||
ip: ip,
|
||||
port: port,
|
||||
isValid: false,
|
||||
responseTime: responseTime,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async validateSingleProxy(ip, port) {
|
||||
try {
|
||||
const result = await this.validateProxy(ip, port);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error(`验证代理 ${ip}:${port} 时发生错误:`, error);
|
||||
return {
|
||||
ip: ip,
|
||||
port: port,
|
||||
isValid: false,
|
||||
responseTime: null,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async validateMultipleProxies(proxies, concurrency = 20) {
|
||||
console.log(`开始批量验证 ${proxies.length} 个代理,并发数: ${concurrency}`);
|
||||
const results = [];
|
||||
|
||||
// 分批处理,避免过多并发连接
|
||||
for (let i = 0; i < proxies.length; i += concurrency) {
|
||||
const batch = proxies.slice(i, i + concurrency);
|
||||
const batchPromises = batch.map(proxy =>
|
||||
this.validateProxy(proxy.ip, proxy.port)
|
||||
);
|
||||
|
||||
try {
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
results.push(...batchResults);
|
||||
} catch (error) {
|
||||
console.error(`批量验证第 ${Math.floor(i / concurrency) + 1} 批时发生错误:`, error);
|
||||
}
|
||||
|
||||
// 批次间延迟
|
||||
if (i + concurrency < proxies.length) {
|
||||
await this.sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
const validCount = results.filter(r => r.isValid).length;
|
||||
console.log(`批量验证完成,${validCount}/${results.length} 个代理可用`);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async validateDatabaseProxies(limit = 50) {
|
||||
console.log(`开始验证数据库中的代理IP,限制数量: ${limit}`);
|
||||
|
||||
try {
|
||||
// 获取待验证的代理(优先验证最久未验证的)
|
||||
const proxies = await ProxyModel.findAllForValidation(limit);
|
||||
|
||||
if (proxies.length === 0) {
|
||||
console.log('没有需要验证的代理');
|
||||
return { validated: 0, valid: 0, invalid: 0 };
|
||||
}
|
||||
|
||||
console.log(`找到 ${proxies.length} 个待验证的代理`);
|
||||
|
||||
const results = await this.validateMultipleProxies(proxies, 10);
|
||||
|
||||
// 清理无效代理
|
||||
const invalidCount = results.filter(r => !r.isValid).length;
|
||||
if (invalidCount > 0) {
|
||||
await ProxyModel.deleteInvalid();
|
||||
console.log(`已删除 ${invalidCount} 个无效代理`);
|
||||
}
|
||||
|
||||
const validCount = results.filter(r => r.isValid).length;
|
||||
|
||||
return {
|
||||
validated: results.length,
|
||||
valid: validCount,
|
||||
invalid: invalidCount,
|
||||
results: results
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('验证数据库代理失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getProxyStatistics() {
|
||||
try {
|
||||
const totalCount = await ProxyModel.count(false);
|
||||
const validCount = await ProxyModel.count(true);
|
||||
const invalidCount = totalCount - validCount;
|
||||
|
||||
return {
|
||||
total: totalCount,
|
||||
valid: validCount,
|
||||
invalid: invalidCount,
|
||||
validRate: totalCount > 0 ? ((validCount / totalCount) * 100).toFixed(2) + '%' : '0%'
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('获取代理统计信息失败:', error);
|
||||
return {
|
||||
total: 0,
|
||||
valid: 0,
|
||||
invalid: 0,
|
||||
validRate: '0%'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// 验证新抓取的代理
|
||||
async validateNewProxies(proxies) {
|
||||
if (!proxies || proxies.length === 0) {
|
||||
return { validated: 0, valid: 0, invalid: 0 };
|
||||
}
|
||||
|
||||
console.log(`开始验证新抓取的 ${proxies.length} 个代理`);
|
||||
const results = await this.validateMultipleProxies(proxies, 2);
|
||||
|
||||
const validCount = results.filter(r => r.isValid).length;
|
||||
console.log(`新代理验证完成,${validCount}/${results.length} 个代理可用`);
|
||||
|
||||
return {
|
||||
validated: results.length,
|
||||
valid: validCount,
|
||||
invalid: results.length - validCount,
|
||||
results: results
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProxyValidator;
|
||||
Reference in New Issue
Block a user