diff --git a/README.en.md b/README.en.md index ca6baff..b84a3f7 100644 --- a/README.en.md +++ b/README.en.md @@ -43,7 +43,7 @@ graph TB Nginx --> Express["Express Backend
31 Routes | 20+ Services | JWT Auth"] React <-->|"WebSocket Real-time"| Express Express --> SQLite[("SQLite Database
39 Tables | AES-256 Encryption")] - Express --> LLM["🤖 LLM API
Doubao | OpenAI"] + Express --> LLM["🤖 LLM Model Pool
Doubao | DeepSeek | Qwen
OpenAI | ZhiPu | Local Models"] Express --> SSH["🖥️ SSH Remote Servers"] Express --> Webhook["🚨 Alert Webhook
Prometheus | Zabbix"] Express --> Notify["📬 Notifications
Email | WeCom | DingTalk"] @@ -55,6 +55,8 @@ graph TB - **Multi-Agent Collaboration** — 9 preset IT operations Agents with custom creation support, covering alerts, diagnostics, inspection, compliance, and more - **Visual Workflow** — Drag-and-drop orchestration with serial/parallel/conditional branching and real-time WebSocket progress pushing +- **HITL Human Approval** — Workflow supports approval nodes, pauses execution for human confirmation, supports auto-reject/wait on timeout, approval requests auto-push to WeCom/DingTalk/Email +- **AI Intelligent Remediation Loop** — Alerts trigger AI analysis → Auto-generate structured remediation commands → Approval node confirmation → Auto-execute remediation → Verify results and feedback - **Web SSH Terminal** — Interactive remote terminal based on xterm.js with real-time I/O, window auto-resize, and bidirectional communication - **Host Management Enhancement** — Multi-level group tree structure, CSV/JSON bulk import, automatic SSH information collection (CPU/Memory/Disk/OS) - **Data Import/Export** — Bulk server import via CSV/JSON, export alerts, audit logs, and report data @@ -95,13 +97,36 @@ Multi-layered security design to protect your servers and data: ## Supported AI Models -| Type | Provider/Framework | Support Status | Recommended Scenario | -|------|------------|---------|---------| -| **Domestic Cloud API** | VolcEngine · Doubao | ✅ Fully Supported | Recommended for users in China | -| **International Cloud API** | OpenAI (GPT-4o, etc.) | ✅ Fully Supported | Users with external network access | -| **Local Deployment** | Ollama / LM Studio / vLLM | ✅ Fully Supported | High data security requirements | +The project supports most mainstream large language models worldwide, managed through a unified AI model pool with primary-backup degradation chains. -**Recommended Local Models**: Qwen2.5, Llama3, DeepSeek-Coder, Yi, ChatGLM, Phi-3, etc. (OpenAI-compatible). +| Type | Provider/Model | Integration | Recommended Scenario | +|------|------------|---------|---------| +| **Domestic Cloud API** | VolcEngine · Doubao | Native API | Recommended for users in China | +| **Domestic Cloud API** | Alibaba Cloud · Qwen | OpenAI Compatible | Enterprise applications in China | +| **Domestic Cloud API** | DeepSeek | OpenAI Compatible | Strong code generation and reasoning | +| **Domestic Cloud API** | ZhiPu AI (GLM-4) | OpenAI Compatible | Excellent Chinese understanding | +| **Domestic Cloud API** | Moonshot · Kimi | OpenAI Compatible | Long text processing | +| **Domestic Cloud API** | Baidu · Wenxin | OpenAI Compatible | Enterprise applications in China | +| **Domestic Cloud API** | 01.AI (Yi) | OpenAI Compatible | Open source models | +| **Domestic Cloud API** | Baichuan | OpenAI Compatible | Open source models | +| **International Cloud API** | OpenAI (GPT-4o, o1, o3) | Native API | Users with external network access | +| **International Cloud API** | Anthropic Claude | OpenAI Compatible Layer | Complex reasoning tasks | +| **International Cloud API** | Meta Llama | Ollama/vLLM | Open source models | +| **International Cloud API** | Mistral | OpenAI Compatible | Open source models | +| **Local Deployment** | Ollama | OpenAI Compatible | High data security requirements | +| **Local Deployment** | LM Studio | OpenAI Compatible | Desktop local models | +| **Local Deployment** | vLLM | OpenAI Compatible | High-performance inference | +| **Local Deployment** | Other OpenAI Compatible | OpenAI Compatible | Any compatible service | + +**Recommended Local Models**: Qwen2.5, Llama3, DeepSeek-Coder, Yi, ChatGLM, Phi-3, Mistral, etc. + +**Features**: +- ✅ Unified AI model pool management, support adding unlimited models +- ✅ Primary-backup model degradation chain (primary_model_id + fallback_model_id) +- ✅ Independent circuit breaker per provider, preventing single point of failure +- ✅ Drag-and-drop sorting to define priority +- ✅ Model connectivity testing +- ✅ API Key inheritance mechanism to reduce duplicate configuration ## Tech Stack @@ -345,6 +370,25 @@ System overview displaying servers, alerts, tasks, and other core metrics. - Report template management - Report viewing and download +### Approval Center (HITL) + +- Workflow supports human approval nodes, can be drag-and-dropped in workflow editor +- Approval node configuration: approval description, timeout, timeout behavior (auto-reject/continue waiting) +- Unified approval center page showing pending, approved, and rejected approval requests +- Approval actions: approve/reject (with reason required) +- Approval requests auto-push notifications (WeCom, DingTalk, Email) +- Support quick approval from mobile devices +- WebSocket real-time push of approval status changes + +### AI Remediation Records + +- AI analysis of alerts auto-generates structured remediation commands (JSON format) +- Auto-creates remediation workflow: [Approval Node] → [Execute Remediation Agent Node] +- Auto-sets approval timeout based on risk level (low: 30min, medium: 1hr, high: 2hr) +- Displays complete remediation process: diagnosis report, remediation commands, risk level, execution status +- Supports viewing execution results and error information +- Deep integration with alert and task systems + ## Project Structure ``` diff --git a/README.md b/README.md index 24e4857..4ad8af2 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ graph TB Nginx --> Express["Express 后端
31个路由 | 20+个服务 | JWT认证"] React <-->|"WebSocket 实时通信"| Express Express --> SQLite[("SQLite 数据库
39张表 | AES-256加密")] - Express --> LLM["🤖 LLM API
豆包 | OpenAI"] + Express --> LLM["🤖 LLM 模型池
豆包 | DeepSeek | 通义千问
OpenAI | 智谱 | 本地模型"] Express --> SSH["🖥️ SSH 远程服务器"] Express --> Webhook["🚨 告警 Webhook
Prometheus | Zabbix"] Express --> Notify["📬 通知渠道
邮件 | 企业微信 | 钉钉"] @@ -55,6 +55,8 @@ graph TB - **多 Agent 协作** — 9 个预设运维 Agent,支持自定义创建,覆盖告警、诊断、巡检、变更等场景 - **可视化工作流** — 拖拽式编排,支持串行/并行/条件分支,实时 WebSocket 推送执行进度 +- **HITL 人工审批** — 工作流支持审批节点,暂停执行等待人工确认,支持超时自动拒绝/等待,审批请求自动推送企业微信/钉钉/邮箱 +- **AI 智能修复闭环** — 告警自动触发 AI 分析 → 自动生成结构化修复命令 → 审批节点确认 → 自动执行修复 → 验证结果反馈 - **Web SSH 终端** — 基于 xterm.js 的交互式远程终端,支持实时输入输出、窗口自适应、双向实时通信 - **主机管理增强** — 多级分组树形结构、CSV/JSON 批量导入、SSH 自动信息采集(CPU/内存/磁盘/OS) - **数据导入导出** — 支持 CSV/JSON 格式批量导入服务器,导出告警、审计日志、报表数据 @@ -95,13 +97,36 @@ graph TB ## 支持的 AI 模型 -| 类型 | 提供商/框架 | 支持情况 | 推荐场景 | -|------|------------|---------|---------| -| **国内云 API** | 火山引擎 · 豆包 (Doubao) | ✅ 完全支持 | 国内用户推荐,稳定快速 | -| **国际云 API** | OpenAI (GPT-4o 等) | ✅ 完全支持 | 有外网环境用户 | -| **本地部署** | Ollama / LM Studio / vLLM | ✅ 完全支持 | 数据安全要求高,内网部署 | +项目支持国内外绝大多数主流大模型,通过 AI 模型池统一管理,支持主备降级链。 -**本地模型推荐**:Qwen2.5、Llama3、DeepSeek-Coder、Yi、ChatGLM、Phi-3 等开源大模型(兼容 OpenAI 接口)。 +| 类型 | 提供商/模型 | 接入方式 | 推荐场景 | +|------|------------|---------|---------| +| **国内云 API** | 火山引擎 · 豆包 (Doubao) | 原生 API | 国内用户推荐,稳定快速 | +| **国内云 API** | 阿里云 · 通义千问 (Qwen) | OpenAI 兼容 | 国内企业级应用 | +| **国内云 API** | DeepSeek (深度求索) | OpenAI 兼容 | 代码生成、推理能力强 | +| **国内云 API** | 智谱 AI (GLM-4) | OpenAI 兼容 | 中文理解优秀 | +| **国内云 API** | Moonshot · Kimi | OpenAI 兼容 | 长文本处理 | +| **国内云 API** | 百度 · 文心一言 | OpenAI 兼容 | 国内企业应用 | +| **国内云 API** | 零一万物 (Yi) | OpenAI 兼容 | 开源模型 | +| **国内云 API** | 百川智能 (Baichuan) | OpenAI 兼容 | 开源模型 | +| **国际云 API** | OpenAI (GPT-4o, o1, o3) | 原生 API | 有外网环境用户 | +| **国际云 API** | Anthropic Claude | OpenAI 兼容层 | 复杂推理任务 | +| **国际云 API** | Meta Llama | Ollama/vLLM | 开源模型 | +| **国际云 API** | Mistral | OpenAI 兼容 | 开源模型 | +| **本地部署** | Ollama | OpenAI 兼容 | 数据安全要求高,内网部署 | +| **本地部署** | LM Studio | OpenAI 兼容 | 桌面端本地模型 | +| **本地部署** | vLLM | OpenAI 兼容 | 高性能推理服务 | +| **本地部署** | 其他 OpenAI 兼容接口 | OpenAI 兼容 | 任意兼容服务 | + +**本地模型推荐**:Qwen2.5、Llama3、DeepSeek-Coder、Yi、ChatGLM、Phi-3、Mistral 等开源大模型。 + +**特性**: +- ✅ AI 模型池统一管理,支持添加任意数量模型 +- ✅ 主备模型降级链(primary_model_id + fallback_model_id) +- ✅ 每个提供商独立熔断器,防止单点故障 +- ✅ 拖拽排序定义优先级 +- ✅ 模型连通性测试验证 +- ✅ API Key 继承机制,减少重复配置 ## 技术栈 @@ -350,6 +375,25 @@ npm run dev - 报告模板管理 - 报告查看与下载 +### 审批中心(HITL) + +- 工作流支持人工审批节点,可在工作流编排中拖拽添加 +- 审批节点支持配置:审批说明、超时时间、超时行为(自动拒绝/继续等待) +- 统一审批中心页面,展示待审批、已通过、已拒绝的审批请求 +- 审批操作:通过/拒绝(需填写原因) +- 审批请求自动推送通知(企业微信、钉钉、邮箱) +- 支持从手机移动端快速审批 +- WebSocket 实时推送审批状态变更 + +### AI 修复记录 + +- AI 分析告警后自动生成结构化修复命令(JSON 格式) +- 自动创建修复工作流:[审批节点] → [执行修复 Agent 节点] +- 根据风险等级自动设置审批超时时间(low: 30分钟, medium: 1小时, high: 2小时) +- 展示完整修复流程:诊断报告、修复命令、风险等级、执行状态 +- 支持查看执行结果和错误信息 +- 与告警、任务系统深度集成 + ## 项目结构 ``` @@ -357,15 +401,15 @@ npm run dev │ └── src/ │ ├── app.ts # Express 应用入口 │ ├── models/database.ts # SQLite 数据库初始化和预设数据 -│ ├── routes/ # API 路由(31 个模块) -│ ├── services/ # 业务逻辑(20+ 个服务) +│ ├── routes/ # API 路由(32 个模块) +│ ├── services/ # 业务逻辑(20+ 个服务,含 aiRemediationService) │ ├── middleware/ # 中间件(6 个:auth, errorHandler, rateLimiter, validation, trace, commandFilter) │ ├── websocket/ # WebSocket 实时通信 │ └── utils/ # 工具函数 ├── frontend/ │ └── src/ │ ├── App.tsx # React 应用入口 -│ ├── pages/ # 页面组件(27 个) +│ ├── pages/ # 页面组件(28 个,含 Approvals、AiRemediations) │ ├── components/ # 通用组件 │ ├── contexts/ # React Context │ ├── hooks/ # 自定义 Hooks diff --git a/backend/src/app.ts b/backend/src/app.ts index 7e69f70..a1e04f0 100644 --- a/backend/src/app.ts +++ b/backend/src/app.ts @@ -47,6 +47,8 @@ import sshKeyRoutes from './routes/sshKeyRoutes'; import topologyRoutes from './routes/topologyRoutes'; import changeRoutes from './routes/changeRoutes'; import aiModelRoutes from './routes/aiModelRoutes'; +import approvalRoutes from './routes/approvalRoutes'; +import aiRemediationRoutes from './routes/aiRemediationRoutes'; import { schedulerService } from './services/schedulerService'; import { reportService } from './services/reportService'; import { copilotService } from './services/copilotService'; @@ -71,6 +73,7 @@ import { alertAutoAnalyzer } from './services/alertAutoAnalyzer'; import { alertCorrelationService } from './services/alertCorrelationService'; import { setServerInstances } from './services/restartService'; import { checkDbskiterAvailability } from './services/dbskiterService'; +import { timeoutApproval } from './services/workflowExecutor'; import { queueService } from './services/queueService'; import importExportRouter from './routes/importExportRoutes'; import alertAutoRouter from './routes/alertAutoRoutes'; @@ -153,6 +156,7 @@ async function initializeApp() { initTokenBlacklist(); startCircuitBreakerCleanup(); + startApprovalTimeoutChecker(); logger.info('✅ Application initialization complete'); } @@ -259,6 +263,8 @@ app.use('/api/ssh-keys', rateLimiter, sshKeyRoutes); app.use('/api/topology', rateLimiter, topologyRoutes); app.use('/api/changes', rateLimiter, changeRoutes); app.use('/api/ai-models', rateLimiter, aiModelRoutes); +app.use('/api/approvals', rateLimiter, approvalRoutes); +app.use('/api/ai-remediations', rateLimiter, aiRemediationRoutes); app.use('/api', rateLimiter, alertAutoRouter); app.use('/api', rateLimiter, linkageRouter); app.use('/api', rateLimiter, networkDiscoveryRouter); @@ -270,6 +276,32 @@ app.use(errorHandler); const PORT = env.PORT; const HOST = process.env.HOST || '0.0.0.0'; +// 审批超时检查器 +let approvalTimeoutInterval: NodeJS.Timeout | null = null; + +function startApprovalTimeoutChecker() { + // 每 30 秒检查一次超时的审批请求 + approvalTimeoutInterval = setInterval(async () => { + try { + const expiredApprovals = db.prepare(` + SELECT id FROM approval_requests + WHERE status = 'pending' + AND timeout_at IS NOT NULL + AND timeout_at < datetime('now', 'localtime') + `).all() as Array<{ id: string }>; + + for (const approval of expiredApprovals) { + logger.info(`⏰ Approval ${approval.id} timed out, processing...`); + await timeoutApproval(approval.id); + } + } catch (error) { + logger.error('Error in approval timeout checker:', error); + } + }, 30000); + + logger.info('✅ Approval timeout checker started (checking every 30s)'); +} + // 等待数据库初始化完成后再启动 HTTP 服务器,避免竞态 async function startServer() { await initializeApp(); @@ -294,6 +326,11 @@ const gracefulShutdown = async (signal: string) => { process.exit(1); }, 30000); + // 停止审批超时检查器 + if (approvalTimeoutInterval) { + clearInterval(approvalTimeoutInterval); + } + try { await Promise.all([ new Promise((resolve) => httpServer.close(() => { diff --git a/backend/src/models/migrations/index.ts b/backend/src/models/migrations/index.ts index 99bfa9b..358b194 100644 --- a/backend/src/models/migrations/index.ts +++ b/backend/src/models/migrations/index.ts @@ -13,6 +13,7 @@ import v013NetworkDiscovery from './v013_network_discovery'; import v014AlertCorrelation from './v014_alert_correlation'; import v015NotificationColumns from './v015_notification_columns'; import v016DatabasesTable from './v016_databases_table'; +import v017ApprovalRequests from './v017_approval_requests'; // v009 / v010 导出的不是 Migration 对象,手动包装 const v009NetworkCompleteCoverage: Migration = { @@ -48,6 +49,7 @@ export const ALL_MIGRATIONS: Migration[] = [ v014AlertCorrelation, v015NotificationColumns, v016DatabasesTable, + v017ApprovalRequests, ]; export function createMigrationManager(db: any): MigrationManager { diff --git a/backend/src/models/migrations/v017_approval_requests.ts b/backend/src/models/migrations/v017_approval_requests.ts new file mode 100644 index 0000000..b0e44fb --- /dev/null +++ b/backend/src/models/migrations/v017_approval_requests.ts @@ -0,0 +1,40 @@ +import { Migration } from './migrationFramework'; + +const v017ApprovalRequests: Migration = { + id: '20260614000017', + version: 17, + name: 'approval_requests', + description: 'Add approval_requests table for HITL workflow', + + up: async (db: any) => { + db.exec(` + CREATE TABLE IF NOT EXISTS approval_requests ( + id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + node_id TEXT NOT NULL, + node_label TEXT NOT NULL, + description TEXT, + status TEXT NOT NULL DEFAULT 'pending', + requested_by TEXT, + approved_by TEXT, + approved_at DATETIME, + reject_reason TEXT, + timeout_at DATETIME, + timeout_action TEXT DEFAULT 'reject', + created_at DATETIME DEFAULT (datetime('now','localtime')), + updated_at DATETIME DEFAULT (datetime('now','localtime')), + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_approval_task ON approval_requests(task_id); + CREATE INDEX IF NOT EXISTS idx_approval_status ON approval_requests(status); + CREATE INDEX IF NOT EXISTS idx_approval_created ON approval_requests(created_at DESC); + `); + }, + + down: async (db: any) => { + db.exec(`DROP TABLE IF EXISTS approval_requests`); + } +}; + +export default v017ApprovalRequests; diff --git a/backend/src/routes/aiRemediationRoutes.ts b/backend/src/routes/aiRemediationRoutes.ts new file mode 100644 index 0000000..745fa09 --- /dev/null +++ b/backend/src/routes/aiRemediationRoutes.ts @@ -0,0 +1,47 @@ +import { Router, Request, Response } from 'express'; +import { aiRemediationService } from '../services/aiRemediationService'; +import { authenticateToken } from '../middleware/auth'; + +const router = Router(); + +// 获取所有 AI 修复记录 +router.get('/', authenticateToken, (req: Request, res: Response) => { + try { + const limit = parseInt(req.query.limit as string) || 50; + const records = aiRemediationService.listRecords(limit); + res.json({ success: true, data: records }); + } catch (error) { + console.error('Failed to list AI remediations:', error); + res.status(500).json({ success: false, message: 'Failed to list AI remediations' }); + } +}); + +// 根据 ID 获取 AI 修复记录 +router.get('/:id', authenticateToken, (req: Request, res: Response) => { + try { + const record = aiRemediationService.getRecord(req.params.id); + if (!record) { + return res.status(404).json({ success: false, message: 'AI remediation not found' }); + } + res.json({ success: true, data: record }); + } catch (error) { + console.error('Failed to get AI remediation:', error); + res.status(500).json({ success: false, message: 'Failed to get AI remediation' }); + } +}); + +// 根据告警 ID 获取 AI 修复记录 +router.get('/alert/:alertId', authenticateToken, (req: Request, res: Response) => { + try { + const record = aiRemediationService.getByAlertId(req.params.alertId); + if (!record) { + return res.status(404).json({ success: false, message: 'AI remediation not found for this alert' }); + } + res.json({ success: true, data: record }); + } catch (error) { + console.error('Failed to get AI remediation by alert:', error); + res.status(500).json({ success: false, message: 'Failed to get AI remediation' }); + } +}); + +export default router; diff --git a/backend/src/routes/approvalRoutes.ts b/backend/src/routes/approvalRoutes.ts new file mode 100644 index 0000000..59feb8d --- /dev/null +++ b/backend/src/routes/approvalRoutes.ts @@ -0,0 +1,126 @@ +import { Router, Request, Response } from 'express'; +import db from '../models/database'; +import { requireRole } from '../middleware/auth'; +import { resumeWorkflow, rejectWorkflow } from '../services/workflowExecutor'; +import type { ApprovalRequest } from '../types'; + +const router = Router(); + +// 查询审批列表 +router.get('/', requireRole('admin', 'operator'), (req: Request, res: Response) => { + try { + const { status, limit } = req.query; + let query = 'SELECT * FROM approval_requests'; + const params: unknown[] = []; + + if (status) { + query += ' WHERE status = ?'; + params.push(status); + } + + query += ' ORDER BY created_at DESC'; + + if (limit) { + query += ' LIMIT ?'; + params.push(parseInt(limit as string)); + } + + const approvals = db.prepare(query).all(...params) as ApprovalRequest[]; + res.json({ success: true, data: approvals }); + } catch (error) { + res.status(500).json({ success: false, error: 'Failed to fetch approvals' }); + } +}); + +// 查询待审批数量(用于前端角标) +router.get('/pending/count', requireRole('admin', 'operator'), (_req: Request, res: Response) => { + try { + const result = db.prepare("SELECT COUNT(*) as count FROM approval_requests WHERE status = 'pending'").get() as { count: number }; + res.json({ success: true, data: { count: result.count } }); + } catch (error) { + res.status(500).json({ success: false, error: 'Failed to count pending approvals' }); + } +}); + +// 查询审批详情 +router.get('/:id', requireRole('admin', 'operator'), (req: Request, res: Response) => { + try { + const approval = db.prepare('SELECT * FROM approval_requests WHERE id = ?').get(req.params.id) as ApprovalRequest | undefined; + if (!approval) { + return res.status(404).json({ success: false, error: 'Approval not found' }); + } + res.json({ success: true, data: approval }); + } catch (error) { + res.status(500).json({ success: false, error: 'Failed to fetch approval' }); + } +}); + +// 审批通过 +router.post('/:id/approve', requireRole('admin', 'operator'), async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { comment } = req.body; + const userId = (req as any).user?.id || 'unknown'; + + const approval = db.prepare('SELECT * FROM approval_requests WHERE id = ?').get(id) as ApprovalRequest | undefined; + if (!approval) { + return res.status(404).json({ success: false, error: 'Approval not found' }); + } + + if (approval.status !== 'pending') { + return res.status(400).json({ success: false, error: `Approval already ${approval.status}` }); + } + + // 异步恢复工作流,不阻塞响应 + res.json({ success: true, message: 'Approval granted, resuming workflow' }); + + // 恢复工作流执行(异步) + setImmediate(async () => { + try { + await resumeWorkflow(approval.task_id, id, userId, comment); + } catch (error) { + console.error('Failed to resume workflow:', error); + } + }); + } catch (error) { + res.status(500).json({ success: false, error: 'Failed to approve' }); + } +}); + +// 审批拒绝 +router.post('/:id/reject', requireRole('admin', 'operator'), async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { reason } = req.body; + const userId = (req as any).user?.id || 'unknown'; + + if (!reason || typeof reason !== 'string' || reason.trim().length === 0) { + return res.status(400).json({ success: false, error: 'Reject reason is required' }); + } + + const approval = db.prepare('SELECT * FROM approval_requests WHERE id = ?').get(id) as ApprovalRequest | undefined; + if (!approval) { + return res.status(404).json({ success: false, error: 'Approval not found' }); + } + + if (approval.status !== 'pending') { + return res.status(400).json({ success: false, error: `Approval already ${approval.status}` }); + } + + // 异步拒绝工作流,不阻塞响应 + res.json({ success: true, message: 'Approval rejected, workflow terminated' }); + + // 拒绝工作流(异步) + setImmediate(async () => { + try { + await rejectWorkflow(approval.task_id, id, userId, reason); + } catch (error) { + console.error('Failed to reject workflow:', error); + } + }); + } catch (error) { + res.status(500).json({ success: false, error: 'Failed to reject' }); + } +}); + +export default router; diff --git a/backend/src/services/aiRemediationService.ts b/backend/src/services/aiRemediationService.ts new file mode 100644 index 0000000..91613af --- /dev/null +++ b/backend/src/services/aiRemediationService.ts @@ -0,0 +1,608 @@ +/** + * ============================================================================= + * ITOps Agent Platform - AI 修复服务 + * ============================================================================= + * + * 将 AI 分析结果转化为可执行的修复工作流,并走审批流程 + * + * 工作流结构: + * [审批节点] → [执行修复 Agent 节点] → [验证结果 Agent 节点] + * + * 流程: + * AI 分析完成 → 提取修复命令 → 生成临时工作流 → 执行工作流 + * → 遇到审批节点暂停 → 等待人工审批 → 审批通过 → 执行修复命令 + * → 验证修复结果 → 反馈通知 + 审计日志 + * ============================================================================= + */ + +import { randomUUID } from 'crypto'; +import db from '../models/database'; +import { logger } from '../utils/logger'; +import { executeWorkflow } from './workflowExecutor'; +import { WorkflowNode, WorkflowEdge, WorkflowParsed } from '../types'; + +interface AiRemediationInput { + alertId: string; + alertTitle: string; + alertContent: string; + alertSeverity: string; + deviceId: string; + deviceName: string; + deviceIp: string; + deviceType: 'server' | 'network_device'; + diagnosis: string; + remediationCommands: string[]; + riskLevel: 'low' | 'medium' | 'high'; +} + +interface AiRemediationRecord { + id: string; + alert_id: string; + device_id: string; + device_name: string; + device_ip: string; + task_id: string | null; + workflow_id: string | null; + diagnosis: string; + remediation_commands: string[]; + risk_level: 'low' | 'medium' | 'high'; + status: 'pending' | 'waiting_approval' | 'approved' | 'rejected' | 'executing' | 'completed' | 'failed'; + execution_result?: string; + error_message?: string; + created_at: string; + updated_at: string; +} + +class AiRemediationService { + private initialized = false; + + /** 初始化数据库表 */ + private ensureTable(): void { + if (this.initialized) return; + + db.exec(` + CREATE TABLE IF NOT EXISTS ai_remediations ( + id TEXT PRIMARY KEY, + alert_id TEXT NOT NULL, + device_id TEXT, + device_name TEXT, + device_ip TEXT, + task_id TEXT, + workflow_id TEXT, + diagnosis TEXT, + remediation_commands TEXT, + risk_level TEXT CHECK(risk_level IN ('low', 'medium', 'high')), + status TEXT CHECK(status IN ('pending', 'waiting_approval', 'approved', 'rejected', 'executing', 'completed', 'failed')), + execution_result TEXT, + error_message TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY (alert_id) REFERENCES alerts(id), + FOREIGN KEY (task_id) REFERENCES tasks(id) + ) + `); + + this.initialized = true; + logger.info('✅ AI Remediation Service initialized'); + } + + /** + * 根据 AI 分析结果创建修复工作流并执行 + * 这是断点连接的核心方法 + */ + async createAndExecute(input: AiRemediationInput): Promise { + this.ensureTable(); + + const id = randomUUID(); + const now = new Date().toISOString(); + + const record: AiRemediationRecord = { + id, + alert_id: input.alertId, + device_id: input.deviceId, + device_name: input.deviceName, + device_ip: input.deviceIp, + task_id: null, + workflow_id: null, + diagnosis: input.diagnosis, + remediation_commands: input.remediationCommands, + risk_level: input.riskLevel, + status: 'pending', + created_at: now, + updated_at: now, + }; + + try { + // 1. 保存修复记录 + this.saveRecord(record); + logger.info(`🔧 [AI Remediation] Created record ${id} for alert ${input.alertId}`); + + // 2. 生成修复工作流 + const { workflow, workflowParsed } = this.generateRemediationWorkflow(input, id); + + // 3. 保存工作流到数据库 + const workflowId = this.saveWorkflow(workflow); + record.workflow_id = workflowId; + record.workflow_id = workflowId; + + // 4. 创建任务 + const taskId = randomUUID(); + db.prepare(` + INSERT INTO tasks (id, workflow_id, name, status, context, created_at) + VALUES (?, ?, ?, 'pending', ?, datetime('now','localtime')) + `).run( + taskId, + workflowId, + `AI 修复: ${input.alertTitle}`, + JSON.stringify({ + alert_id: input.alertId, + device_id: input.deviceId, + device_ip: input.deviceIp, + remediation_id: id, + risk_level: input.riskLevel, + }) + ); + record.task_id = taskId; + + // 5. 更新记录状态 + record.status = 'waiting_approval'; + this.updateRecord(record); + + // 6. 异步执行工作流(会在审批节点暂停) + setImmediate(async () => { + try { + await executeWorkflow(taskId, workflowParsed, undefined, { + alert_id: input.alertId, + device_id: input.deviceId, + device_ip: input.deviceIp, + remediation_id: id, + risk_level: input.riskLevel, + }); + } catch (err) { + logger.error(`[AI Remediation] Workflow execution failed:`, err); + record.status = 'failed'; + record.error_message = err instanceof Error ? err.message : String(err); + this.updateRecord(record); + } + }); + + logger.info(`✅ [AI Remediation] Workflow created and executing: taskId=${taskId}, workflowId=${workflowId}`); + return record; + + } catch (err) { + logger.error(`[AI Remediation] Failed to create remediation:`, err); + record.status = 'failed'; + record.error_message = err instanceof Error ? err.message : String(err); + this.updateRecord(record); + return record; + } + } + + /** + * 生成修复工作流 + * 结构:[审批节点] → [执行修复 Agent 节点] → [验证结果 Agent 节点] + * [回滚节点](断开连接,验证失败时由 finalizeWorkflow 自动触发) + */ + private generateRemediationWorkflow( + input: AiRemediationInput, + remediationId: string + ): { workflow: any; workflowParsed: WorkflowParsed } { + const approvalNodeId = randomUUID(); + const executionNodeId = randomUUID(); + const verificationNodeId = randomUUID(); + const rollbackNodeId = randomUUID(); + + // 审批节点配置 + const timeoutSeconds = input.riskLevel === 'high' ? 7200 : input.riskLevel === 'medium' ? 3600 : 1800; + + // 构建修复命令的 prompt + const commandsText = input.remediationCommands.map((cmd, i) => `${i + 1}. ${cmd}`).join('\n'); + const executionPrompt = `你是一个运维执行专家。请在设备 ${input.deviceName}(${input.deviceIp}) 上执行以下修复命令: + +${commandsText} + +执行要求: +1. 按顺序执行每个命令 +2. 每个命令执行后检查返回码 +3. 如果命令失败,记录错误信息但继续执行后续命令 +4. 最后汇总执行结果 + +告警信息: +- 告警标题: ${input.alertTitle} +- 告警级别: ${input.alertSeverity} +- 风险等级: ${input.riskLevel} + +AI 诊断结果: +${input.diagnosis.substring(0, 1000)} + +请开始执行修复命令。`; + + // 验证节点 prompt:根据修复命令生成对应的验证逻辑 + const verificationPrompt = this.generateVerificationPrompt(input, commandsText); + + // 回滚节点 prompt:生成修复命令的逆向操作 + const rollbackPrompt = this.generateRollbackPrompt(input, commandsText); + + // 节点定义 + const nodes: WorkflowNode[] = [ + { + id: approvalNodeId, + type: 'approval', + position: { x: 100, y: 200 }, + data: { + label: `审批修复方案 (${input.riskLevel.toUpperCase()} 风险)`, + description: `AI 建议对 ${input.deviceName}(${input.deviceIp}) 执行修复操作,共 ${input.remediationCommands.length} 条命令`, + approvalConfig: { + description: `修复方案:\n${commandsText}\n\n风险等级: ${input.riskLevel}\n目标设备: ${input.deviceName}(${input.deviceIp})`, + timeout: timeoutSeconds, + timeoutAction: 'reject' as const, + approvers: ['admin'], + }, + }, + }, + { + id: executionNodeId, + type: 'agent', + position: { x: 400, y: 200 }, + data: { + label: '执行修复命令', + agentId: 'server-command-agent', + avatar: '🔧', + description: '在目标设备上执行 AI 建议的修复命令', + prompt: executionPrompt, + inputKey: 'approval_result', + outputKey: 'execution_result', + }, + }, + { + id: verificationNodeId, + type: 'agent', + position: { x: 700, y: 200 }, + data: { + label: '验证修复结果', + agentId: 'server-command-agent', + avatar: '✅', + description: '验证修复命令是否成功执行,检查系统状态是否恢复正常', + prompt: verificationPrompt, + inputKey: 'execution_result', + outputKey: 'verification_result', + }, + }, + { + id: rollbackNodeId, + type: 'agent', + position: { x: 700, y: 400 }, + data: { + label: '自动回滚', + agentId: 'server-command-agent', + avatar: '↩️', + description: '验证失败时自动执行回滚操作,恢复系统到修复前状态', + prompt: rollbackPrompt, + inputKey: 'verification_result', + outputKey: 'rollback_result', + }, + }, + ]; + + // 边定义:审批 → 执行 → 验证 + // 回滚节点断开连接,由 finalizeWorkflow 在验证失败时自动触发 + const edges: WorkflowEdge[] = [ + { + id: `edge-${approvalNodeId}-${executionNodeId}`, + source: approvalNodeId, + target: executionNodeId, + animated: true, + }, + { + id: `edge-${executionNodeId}-${verificationNodeId}`, + source: executionNodeId, + target: verificationNodeId, + animated: true, + }, + ]; + + const workflow = { + id: randomUUID(), + name: `AI 修复工作流: ${input.alertTitle}`, + description: `AI 自动生成的修复工作流,针对告警: ${input.alertTitle}`, + nodes: JSON.stringify(nodes), + edges: JSON.stringify(edges), + agent_configs: JSON.stringify({}), + is_template: 0, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }; + + const workflowParsed: WorkflowParsed = { + id: workflow.id, + name: workflow.name, + description: workflow.description, + nodes, + edges, + agent_configs: {}, + is_template: 0, + created_at: workflow.created_at, + updated_at: workflow.updated_at, + }; + + return { workflow, workflowParsed }; + } + + /** + * 根据修复命令生成验证 prompt + * 智能推断需要执行的验证命令 + */ + private generateVerificationPrompt(input: AiRemediationInput, commandsText: string): string { + // 根据修复命令推断验证逻辑 + const verificationCmds: string[] = []; + const lowerCmds = input.remediationCommands.map(c => c.toLowerCase()); + + // 服务重启类 → 检查服务状态 + if (lowerCmds.some(c => c.includes('systemctl restart') || c.includes('service') || c.includes('restart'))) { + const services = input.remediationCommands + .filter(c => /systemctl\s+(restart|start|stop)/i.test(c)) + .map(c => { + const match = c.match(/systemctl\s+(?:restart|start|stop)\s+(\S+)/i); + return match ? match[1] : null; + }) + .filter(Boolean); + for (const svc of [...new Set(services)]) { + verificationCmds.push(`systemctl status ${svc} --no-pager`); + verificationCmds.push(`systemctl is-active ${svc}`); + } + if (verificationCmds.length === 0) { + verificationCmds.push('systemctl list-units --failed --no-pager'); + } + } + + // 磁盘清理类 → 检查磁盘空间 + if (lowerCmds.some(c => c.includes('rm ') || c.includes('clean') || c.includes('du ') || c.includes('disk'))) { + verificationCmds.push('df -h'); + } + + // 内存相关 → 检查内存 + if (lowerCmds.some(c => c.includes('memory') || c.includes('swap') || c.includes('oom') || c.includes('free'))) { + verificationCmds.push('free -m'); + } + + // CPU 相关 → 检查负载 + if (lowerCmds.some(c => c.includes('cpu') || c.includes('kill') || c.includes('top') || c.includes('nice'))) { + verificationCmds.push('uptime'); + verificationCmds.push('top -bn1 | head -5'); + } + + // 网络相关 → 检查网络连通性 + if (lowerCmds.some(c => c.includes('network') || c.includes('iptables') || c.includes('firewall') || c.includes('nginx'))) { + verificationCmds.push('ss -tlnp 2>/dev/null || netstat -tlnp 2>/dev/null'); + } + + // Docker 相关 → 检查容器状态 + if (lowerCmds.some(c => c.includes('docker'))) { + verificationCmds.push('docker ps --format "table {{.Names}}\t{{.Status}}"'); + } + + // 如果无法推断,使用通用验证 + if (verificationCmds.length === 0) { + verificationCmds.push('uptime'); + verificationCmds.push('systemctl list-units --failed --no-pager 2>/dev/null || echo "no systemctl"'); + verificationCmds.push('dmesg -T | tail -10 2>/dev/null || echo "no dmesg"'); + } + + const verificationCmdsText = verificationCmds.map((cmd, i) => `${i + 1}. ${cmd}`).join('\n'); + + return `你是一个运维验证专家。修复命令已在设备 ${input.deviceName}(${input.deviceIp}) 上执行完毕。 +请执行以下验证命令,确认修复是否成功: + +${verificationCmdsText} + +验证要求: +1. 依次执行上述验证命令 +2. 分析每条命令的输出,判断相关指标是否恢复正常 +3. 对比修复前的告警信息:${input.alertTitle} +4. 输出验证结论: + - ✅ 修复成功:指标恢复正常 + - ⚠️ 部分恢复:部分指标改善但仍有异常 + - ❌ 修复失败:指标未改善或恶化 + +告警原始信息: +- 告警标题: ${input.alertTitle} +- 告警级别: ${input.alertSeverity} +- 执行的修复命令: +${commandsText} + +请开始验证。`; + } + + /** + * 根据修复命令生成回滚 prompt + * 智能推断需要执行的回滚命令 + */ + private generateRollbackPrompt(input: AiRemediationInput, commandsText: string): string { + const rollbackCmds: string[] = []; + const lowerCmds = input.remediationCommands.map(c => c.toLowerCase()); + + // 服务重启类 → 停止服务 + if (lowerCmds.some(c => c.includes('systemctl start') || c.includes('systemctl restart'))) { + const services = input.remediationCommands + .filter(c => /systemctl\s+(start|restart)/i.test(c)) + .map(c => { + const match = c.match(/systemctl\s+(?:start|restart)\s+(\S+)/i); + return match ? match[1] : null; + }) + .filter(Boolean); + for (const svc of [...new Set(services)]) { + rollbackCmds.push(`systemctl stop ${svc}`); + } + } + + // 服务停止类 → 启动服务 + if (lowerCmds.some(c => c.includes('systemctl stop'))) { + const services = input.remediationCommands + .filter(c => /systemctl\s+stop/i.test(c)) + .map(c => { + const match = c.match(/systemctl\s+stop\s+(\S+)/i); + return match ? match[1] : null; + }) + .filter(Boolean); + for (const svc of [...new Set(services)]) { + rollbackCmds.push(`systemctl start ${svc}`); + } + } + + // 配置备份类 → 恢复配置 + if (lowerCmds.some(c => c.includes('cp') && c.includes('.bak'))) { + const backups = input.remediationCommands + .filter(c => /cp\s+\S+\s+\S+\.bak/i.test(c)) + .map(c => { + const match = c.match(/cp\s+(\S+)\s+(\S+)\.bak/i); + return match ? { original: match[1], backup: match[2] } : null; + }) + .filter((bk): bk is { original: string; backup: string } => bk !== null); + for (const bk of backups) { + rollbackCmds.push(`cp ${bk.backup}.bak ${bk.original}`); + } + } + + // Docker 容器类 → 停止/删除容器 + if (lowerCmds.some(c => c.includes('docker run') || c.includes('docker start'))) { + const containers = input.remediationCommands + .filter(c => /docker\s+(run|start)\s+.*?(-n|--name)\s+(\S+)/i.test(c)) + .map(c => { + const match = c.match(/docker\s+(?:run|start)\s+.*?(?:-n|--name)\s+(\S+)/i); + return match ? match[1] : null; + }) + .filter(Boolean); + for (const container of [...new Set(containers)]) { + rollbackCmds.push(`docker stop ${container}`); + rollbackCmds.push(`docker rm ${container}`); + } + } + + // 防火墙规则类 → 删除规则 + if (lowerCmds.some(c => c.includes('iptables -A') || c.includes('firewall-cmd --add'))) { + rollbackCmds.push('# 注意:防火墙规则回滚需要手动确认'); + rollbackCmds.push('iptables -L -n --line-numbers'); + rollbackCmds.push('firewall-cmd --list-all'); + } + + // 如果无法推断,提供通用回滚指导 + if (rollbackCmds.length === 0) { + rollbackCmds.push('# 无法自动推断回滚命令,请执行以下检查:'); + rollbackCmds.push('systemctl list-units --failed --no-pager'); + rollbackCmds.push('dmesg -T | tail -20'); + rollbackCmds.push('journalctl -xe --no-pager | tail -50'); + } + + const rollbackCmdsText = rollbackCmds.map((cmd, i) => `${i + 1}. ${cmd}`).join('\n'); + + return `你是一个运维回滚专家。修复命令在设备 ${input.deviceName}(${input.deviceIp}) 上执行后验证失败,需要执行回滚操作。 + +请执行以下回滚命令,恢复系统到修复前状态: + +${rollbackCmdsText} + +回滚要求: +1. 按顺序执行回滚命令 +2. 每个命令执行后检查返回码 +3. 如果回滚命令失败,记录错误但继续执行后续回滚 +4. 最后汇总回滚结果 + +原始修复命令(供参考): +${commandsText} + +告警信息: +- 告警标题: ${input.alertTitle} +- 告警级别: ${input.alertSeverity} + +请开始执行回滚。`; + } + + /** 保存工作流到数据库 */ + private saveWorkflow(workflow: any): string { + db.prepare(` + INSERT INTO workflows (id, name, description, nodes, edges, agent_configs, is_template, created_at, updated_at) + VALUES (@id, @name, @description, @nodes, @edges, @agent_configs, @is_template, @created_at, @updated_at) + `).run(workflow); + return workflow.id; + } + + /** 保存修复记录 */ + private saveRecord(record: AiRemediationRecord): void { + db.prepare(` + INSERT INTO ai_remediations ( + id, alert_id, device_id, device_name, device_ip, task_id, workflow_id, + diagnosis, remediation_commands, risk_level, status, execution_result, + error_message, created_at, updated_at + ) VALUES ( + @id, @alert_id, @device_id, @device_name, @device_ip, @task_id, @workflow_id, + @diagnosis, @remediation_commands, @risk_level, @status, @execution_result, + @error_message, @created_at, @updated_at + ) + `).run({ + ...record, + remediation_commands: JSON.stringify(record.remediation_commands), + }); + } + + /** 更新修复记录 */ + private updateRecord(record: AiRemediationRecord): void { + db.prepare(` + UPDATE ai_remediations SET + task_id = @task_id, + workflow_id = @workflow_id, + status = @status, + execution_result = @execution_result, + error_message = @error_message, + updated_at = @updated_at + WHERE id = @id + `).run({ + ...record, + remediation_commands: JSON.stringify(record.remediation_commands), + }); + } + + /** 获取修复记录 */ + getRecord(id: string): AiRemediationRecord | null { + this.ensureTable(); + const row = db.prepare('SELECT * FROM ai_remediations WHERE id = ?').get(id) as any; + if (!row) return null; + return { + ...row, + remediation_commands: JSON.parse(row.remediation_commands || '[]'), + }; + } + + /** 根据告警 ID 获取修复记录 */ + getByAlertId(alertId: string): AiRemediationRecord | null { + this.ensureTable(); + const row = db.prepare('SELECT * FROM ai_remediations WHERE alert_id = ? ORDER BY created_at DESC LIMIT 1').get(alertId) as any; + if (!row) return null; + return { + ...row, + remediation_commands: JSON.parse(row.remediation_commands || '[]'), + }; + } + + /** 获取所有修复记录 */ + listRecords(limit: number = 50): AiRemediationRecord[] { + this.ensureTable(); + const rows = db.prepare('SELECT * FROM ai_remediations ORDER BY created_at DESC LIMIT ?').all(limit) as any[]; + return rows.map(row => ({ + ...row, + remediation_commands: JSON.parse(row.remediation_commands || '[]'), + })); + } + + /** 更新修复状态(由工作流执行器调用) */ + updateStatus(remediationId: string, status: AiRemediationRecord['status'], result?: string): void { + this.ensureTable(); + const record = this.getRecord(remediationId); + if (!record) return; + record.status = status; + if (result) record.execution_result = result; + record.updated_at = new Date().toISOString(); + this.updateRecord(record); + } +} + +export const aiRemediationService = new AiRemediationService(); diff --git a/backend/src/services/alertAutoAnalyzer.ts b/backend/src/services/alertAutoAnalyzer.ts index c329ff5..c06c6c4 100644 --- a/backend/src/services/alertAutoAnalyzer.ts +++ b/backend/src/services/alertAutoAnalyzer.ts @@ -510,7 +510,7 @@ class AlertAutoAnalyzer { } /** AI 分析诊断输出 */ - private async aiAnalyze(alertTitle: string, alertContent: string, rawOutput: string): Promise<{ diagnosis: string; summary: string }> { + private async aiAnalyze(alertTitle: string, alertContent: string, rawOutput: string): Promise<{ diagnosis: string; summary: string; remediationCommands?: string[]; riskLevel?: 'low' | 'medium' | 'high' }> { // ── 查知识库获取相关历史方案 ── let knowledgeContext = ''; try { @@ -527,15 +527,76 @@ class AlertAutoAnalyzer { } } catch { /* 知识库表可能不存在 */ } - const systemPrompt = '你是一个网络运维专家。根据告警信息和设备诊断输出,判断根因并给出修复建议。先写一行摘要(50字内),再写详细诊断。4. 参考历史知识库中的方案,优先推荐已验证的修复方式'; - const prompt = `## 告警信息\n**标题**: ${alertTitle}\n**内容**: ${alertContent || '(无详细内容)'}${knowledgeContext}\n\n## 设备诊断输出\n${rawOutput.substring(0, 8000)}\n\n## 要求\n1. 判断根因\n2. 分析异常指标\n3. 给出修复建议\n4. 参考历史知识库中的方案,优先推荐已验证的修复方式`; + const systemPrompt = `你是一个网络运维专家。根据告警信息和设备诊断输出,判断根因并给出修复建议。 +你需要返回两部分内容: +1. 诊断报告(自然语言) +2. 修复命令(JSON 格式,可执行) + +输出格式要求: +- 第一行:摘要(50字内) +- 然后:详细诊断报告 +- 最后:一个 JSON 代码块,包含修复命令 + +JSON 格式示例: +\`\`\`json +{ + "remediation_commands": [ + "systemctl restart nginx", + "journalctl -u nginx --no-pager -n 50" + ], + "risk_level": "medium", + "description": "重启 Nginx 服务并检查日志" +} +\`\`\` + +risk_level 说明: +- low: 只读操作、查看日志、检查状态 +- medium: 重启服务、清理临时文件 +- high: 删除数据、修改配置、影响业务`; + + const prompt = `## 告警信息 +**标题**: ${alertTitle} +**内容**: ${alertContent || '(无详细内容)'} +${knowledgeContext} + +## 设备诊断输出 +${rawOutput.substring(0, 8000)} + +## 要求 +1. 判断根因 +2. 分析异常指标 +3. 给出修复建议 +4. **必须**在诊断报告最后输出一个 JSON 代码块,包含可执行的修复命令 +5. 修复命令应该是具体的 shell 命令,可以直接在设备上执行 +6. 评估风险等级(low/medium/high) +7. 参考历史知识库中的方案,优先推荐已验证的修复方式`; try { const text = await generateCompletion(prompt, systemPrompt, 0.3); // 第一行为摘要 const lines = text.trim().split('\n'); const summary = lines[0].replace(/^[#*]*\s*/, '').substring(0, 100); - return { diagnosis: text, summary }; + + // 提取 JSON 代码块中的修复命令 + let remediationCommands: string[] | undefined; + let riskLevel: 'low' | 'medium' | 'high' | undefined; + + const jsonMatch = text.match(/```json\s*([\s\S]*?)```/); + if (jsonMatch && jsonMatch[1]) { + try { + const jsonData = JSON.parse(jsonMatch[1].trim()); + if (Array.isArray(jsonData.remediation_commands)) { + remediationCommands = jsonData.remediation_commands; + } + if (['low', 'medium', 'high'].includes(jsonData.risk_level)) { + riskLevel = jsonData.risk_level; + } + } catch (parseErr) { + logger.warn('Failed to parse remediation JSON:', parseErr); + } + } + + return { diagnosis: text, summary, remediationCommands, riskLevel }; } catch (err: any) { logger.error('AI analysis failed:', err); return { @@ -622,7 +683,7 @@ class AlertAutoAnalyzer { // AI 分析 logger.info(`🤖 AI 分析告警: ${alert.title}`); - const { diagnosis, summary } = await this.aiAnalyze(alert.title, alert.content || '', rawOutput); + const { diagnosis, summary, remediationCommands, riskLevel } = await this.aiAnalyze(alert.title, alert.content || '', rawOutput); record.diagnosis = diagnosis; record.summary = summary; record.status = 'completed'; @@ -632,8 +693,36 @@ class AlertAutoAnalyzer { logger.info(`✅ 告警自动分析完成: ${alertId} → ${summary}`); - // ── SSH 设备 → 自动触发修复工作流 ── - if (device.auth_method === 'ssh') { + // ── AI 修复工作流(优先使用 AI 建议的修复命令) ── + if (device.auth_method === 'ssh' && remediationCommands && remediationCommands.length > 0) { + try { + logger.info(`🔧 [AI Remediation] AI 建议了 ${remediationCommands.length} 条修复命令,创建修复工作流`); + + // 动态导入避免循环依赖 + const { aiRemediationService } = await import('./aiRemediationService'); + + const remediation = await aiRemediationService.createAndExecute({ + alertId, + alertTitle: alert.title, + alertContent: alert.content || '', + alertSeverity: alert.severity || 'medium', + deviceId: device.id, + deviceName: device.name, + deviceIp: device.ip_address, + deviceType: device.device_type, + diagnosis, + remediationCommands, + riskLevel: riskLevel || 'medium', + }); + + if (remediation) { + logger.info(`✅ [AI Remediation] 修复工作流已创建: taskId=${remediation.task_id}, 等待审批`); + } + } catch (remediationErr: any) { + logger.error(`❌ [AI Remediation] 创建修复工作流失败: ${remediationErr.message}`, remediationErr); + } + } else if (device.auth_method === 'ssh') { + // AI 没有给出修复命令,尝试匹配预设策略 try { const matching = await remediationService.matchAlertToPolicies({ id: alertId, diff --git a/backend/src/services/workflowExecutor.ts b/backend/src/services/workflowExecutor.ts index 9f37377..0e24eac 100644 --- a/backend/src/services/workflowExecutor.ts +++ b/backend/src/services/workflowExecutor.ts @@ -3,12 +3,15 @@ import db, { getIOInstance } from '../models/database'; import { logger } from '../utils/logger'; import { executeAgentNode, getThinkingSteps } from './agentExecutor'; import { reportService } from './reportService'; +import { notificationService } from './notificationService'; +import { createAuditLog } from './auditService'; import { WorkflowNode, WorkflowEdge, NodeResult, TaskLogEntry, - WorkflowParsed + WorkflowParsed, + ExecutionContext } from '../types'; function calculateTextSimilarity(text1: string, text2: string): number { @@ -36,21 +39,17 @@ function isDuplicateKnowledgeBase(content: string, similarityThreshold: number = } } -interface ExecutionVariable { - name: string; - value: unknown; -} - -interface ExecutionContext { - variables: Record; - previousResults: Array<{ nodeId: string; status: string; output?: string; error?: string }>; - metadata: { - taskId: string; - workflowName: string; - currentNodeId?: string; - executionDepth: number; - startTime: string; - }; +/** 保存到 tasks.context 的执行上下文(用于审批恢复) */ +interface PersistedExecutionState { + workflowId: string; + workflowName: string; + initialInput?: string; + executionOrder: string[]; + nodes: WorkflowNode[]; + edges: WorkflowEdge[]; + nodeResults: Record; + executionContext: ExecutionContext; + pausedAtIndex: number; } export async function executeWorkflow( @@ -64,6 +63,7 @@ export async function executeWorkflow( let executionDepth = 0; const nodeResults: Record = {}; let nodes: WorkflowNode[] = []; + let edges: WorkflowEdge[] = []; let executionOrder: string[] = []; const startTime = new Date().toISOString(); const executionContext: ExecutionContext = { @@ -81,7 +81,7 @@ export async function executeWorkflow( logger.info('🔄 Starting workflow execution:', { taskId, workflowName: workflow.name, context }); nodes = Array.isArray(workflow.nodes) ? workflow.nodes : JSON.parse(workflow.nodes as unknown as string || '[]') as WorkflowNode[]; - const edges = Array.isArray(workflow.edges) ? workflow.edges : JSON.parse(workflow.edges as unknown as string || '[]') as WorkflowEdge[]; + edges = Array.isArray(workflow.edges) ? workflow.edges : JSON.parse(workflow.edges as unknown as string || '[]') as WorkflowEdge[]; executionOrder = topologicalSort(nodes, edges); if (executionOrder.length === 0) { @@ -100,177 +100,585 @@ export async function executeWorkflow( io?.to(`task:${taskId}`).emit('task:started', { taskId, executionOrder }); - for (const nodeId of executionOrder) { - if (executionDepth++ >= MAX_EXECUTION_DEPTH) { - logger.error(`❌ Workflow ${workflow.name} exceeded maximum execution depth`); - break; + await executeFromIndex( + taskId, workflow, nodes, edges, executionOrder, nodeResults, + executionContext, 0, initialInput, executionDepth, MAX_EXECUTION_DEPTH + ); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + db.prepare(` + UPDATE tasks + SET status = ?, end_time = datetime('now','localtime'), current_node_id = NULL + WHERE id = ? + `).run('failed', taskId); + + try { + await generateWorkflowExecutionReport(taskId, workflow, nodes, nodeResults, executionOrder, 'failed', errorMessage); + } catch (reportError) { + logger.error('Failed to generate workflow report (failed case):', reportError); + } + + io?.to(`task:${taskId}`).emit('task:failed', { + taskId, + error: errorMessage + }); + } +} + +/** + * 从指定索引开始执行工作流节点。 + * 遇到审批节点时暂停并保存状态,返回 'paused'。 + * 全部执行完返回 'completed'。 + */ +async function executeFromIndex( + taskId: string, + workflow: WorkflowParsed, + nodes: WorkflowNode[], + edges: WorkflowEdge[], + executionOrder: string[], + nodeResults: Record, + executionContext: ExecutionContext, + startIndex: number, + initialInput: string | undefined, + executionDepth: number, + MAX_EXECUTION_DEPTH: number +): Promise<'completed' | 'paused'> { + const io = getIOInstance(); + + for (let i = startIndex; i < executionOrder.length; i++) { + if (executionDepth++ >= MAX_EXECUTION_DEPTH) { + logger.error(`❌ Workflow ${workflow.name} exceeded maximum execution depth`); + break; + } + + const task = db.prepare('SELECT status FROM tasks WHERE id = ?').get(taskId) as { status: string } | undefined; + if (task?.status === 'cancelled') { + break; + } + + const nodeId = executionOrder[i]; + const node = nodes.find((n) => n.id === nodeId); + if (!node) continue; + + // ---- 审批节点处理 ---- + if (node.type === 'approval') { + logger.info(`🛑 Approval node ${nodeId} (${node.data.label}) reached, pausing workflow`); + + const approvalConfig = node.data.approvalConfig || { + description: node.data.label, + timeout: 3600, + timeoutAction: 'reject' as const, + approvers: ['admin'] + }; + + const approvalId = randomUUID(); + const timeoutAt = approvalConfig.timeout > 0 + ? new Date(Date.now() + approvalConfig.timeout * 1000).toISOString() + : null; + + db.prepare(` + INSERT INTO approval_requests (id, task_id, node_id, node_label, description, status, timeout_at, timeout_action) + VALUES (?, ?, ?, ?, ?, 'pending', ?, ?) + `).run(approvalId, taskId, nodeId, node.data.label, approvalConfig.description, timeoutAt, approvalConfig.timeoutAction); + + // 保存执行上下文到 tasks.context + const persistedState: PersistedExecutionState = { + workflowId: workflow.id, + workflowName: workflow.name, + initialInput, + executionOrder, + nodes, + edges, + nodeResults: { ...nodeResults }, + executionContext: { + ...executionContext, + previousResults: [...executionContext.previousResults], + metadata: { ...executionContext.metadata } + }, + pausedAtIndex: i + }; + db.prepare('UPDATE tasks SET status = ?, current_node_id = ?, context = ? WHERE id = ?') + .run('waiting_approval', nodeId, JSON.stringify(persistedState), taskId); + + io?.to(`task:${taskId}`).emit('task:node:started', { nodeId, nodeName: node.data.label }); + io?.to(`task:${taskId}`).emit('task:approval:requested', { + taskId, + approvalId, + nodeId, + nodeLabel: node.data.label, + description: approvalConfig.description, + timeout: approvalConfig.timeout, + timeoutAt + }); + io?.emit('approval:new', { + approvalId, + taskId, + nodeLabel: node.data.label, + description: approvalConfig.description + }); + + addTaskLog(taskId, { type: 'output', content: `⏸️ 等待审批: ${node.data.label} — ${approvalConfig.description}`, nodeId }); + + // 发送通知到企业微信/钉钉/邮箱 + try { + await notificationService.sendNotification({ + type: 'approval_request', + title: `⏸️ 工作流审批请求: ${node.data.label}`, + content: `**工作流**: ${workflow.name}\n**节点**: ${node.data.label}\n**说明**: ${approvalConfig.description}\n**超时**: ${approvalConfig.timeout}秒\n**任务ID**: ${taskId}\n**审批ID**: ${approvalId}\n\n请登录系统进入审批中心处理`, + related_task_id: taskId, + }); + logger.info('✅ 审批通知已发送'); + } catch (notifyError) { + logger.warn('⚠️ 审批通知发送失败(非致命错误):', notifyError); } - const task = db.prepare('SELECT status FROM tasks WHERE id = ?').get(taskId) as { status: string } | undefined; - if (task?.status === 'cancelled') { - break; + + logger.info(`✅ Approval request ${approvalId} created for task ${taskId}`); + return 'paused'; + } + + // ---- Agent 节点处理(原有逻辑) ---- + if (node.type !== 'agent') continue; + + logger.info(`🤖 Processing node ${nodeId}:`, node.data); + + io?.to(`task:${taskId}`).emit('task:node:started', { + nodeId, + nodeName: node.data.label + }); + + try { + const previousResults = Object.values(nodeResults).map((r) => r.output).filter(Boolean).join('\n\n'); + const input = previousResults || initialInput || '请开始执行任务'; + + executionContext.metadata.currentNodeId = nodeId; + executionContext.metadata.executionDepth = executionDepth; + executionContext.previousResults.push({ + nodeId, + status: 'running', + output: undefined, + error: undefined + }); + + const thinkingProcess = getThinkingSteps(node.data.label); + for (const step of thinkingProcess) { + await delay(300); + io?.to(`task:${taskId}`).emit('task:node:thinking', { + taskId, + nodeId, + content: step + }); + addTaskLog(taskId, { type: 'thinking', content: step, nodeId }); } + + logger.info(`🤖 Calling executeAgentNode with agentId: ${node.data.agentId} context:`, executionContext.variables); - const node = nodes.find((n) => n.id === nodeId); - if (!node || node.type !== 'agent') continue; - - logger.info(`🤖 Processing node ${nodeId}:`, node.data); + if (!node.data.agentId) { + throw new Error(`Node ${nodeId} is missing agentId`); + } - io?.to(`task:${taskId}`).emit('task:node:started', { + const output = await executeAgentNode(node.data.agentId, input, executionContext.variables); + + nodeResults[nodeId] = { + status: 'success', + output, + metadata: { + thinkingProcess: thinkingProcess.join('\n'), + executionTime: Date.now() + } + }; + + const lastResultIdx = executionContext.previousResults.findIndex(r => r.nodeId === nodeId && r.status === 'running'); + if (lastResultIdx !== -1) { + executionContext.previousResults[lastResultIdx] = { + nodeId, + status: 'success', + output + }; + } + + io?.to(`task:${taskId}`).emit('task:node:output', { taskId, nodeId, output }); + io?.to(`task:${taskId}`).emit('task:node:completed', { taskId, nodeId, status: 'success', output }); + addTaskLog(taskId, { type: 'output', content: output, nodeId }); + + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + nodeResults[nodeId] = { + status: 'failed', + error: errorMessage + }; + + io?.to(`task:${taskId}`).emit('task:node:completed', { + taskId, nodeId, - nodeName: node.data.label + status: 'failed', + error: errorMessage }); - + addTaskLog(taskId, { type: 'error', content: errorMessage, nodeId }); + + if (!node.data.allowFailure) { + throw error; + } + } + } + + // 所有节点执行完成 + await finalizeWorkflow(taskId, workflow, nodes, nodeResults, executionOrder, 'completed'); + return 'completed'; +} + +/** + * 审批通过后恢复工作流执行 + */ +export async function resumeWorkflow( + taskId: string, + approvalId: string, + approvedBy: string, + comment?: string +): Promise { + const io = getIOInstance(); + + const task = db.prepare('SELECT * FROM tasks WHERE id = ?').get(taskId) as { status: string; context?: string; workflow_id?: string } | undefined; + if (!task || task.status !== 'waiting_approval') { + throw new Error(`Task ${taskId} is not waiting for approval`); + } + + const persistedState = JSON.parse(task.context || '{}') as PersistedExecutionState; + if (!persistedState.executionOrder || persistedState.pausedAtIndex === undefined) { + throw new Error(`Task ${taskId} has no saved execution context`); + } + + // 更新审批记录 + db.prepare(` + UPDATE approval_requests + SET status = 'approved', approved_by = ?, approved_at = datetime('now','localtime'), updated_at = datetime('now','localtime') + WHERE id = ? + `).run(approvedBy, approvalId); + + // 通知审批结果 + io?.to(`task:${taskId}`).emit('task:approval:resolved', { + taskId, + approvalId, + status: 'approved', + approvedBy, + comment + }); + + addTaskLog(taskId, { type: 'output', content: `✅ 审批通过 by ${approvedBy}${comment ? `: ${comment}` : ''}`, nodeId: persistedState.executionOrder[persistedState.pausedAtIndex] }); + + // 恢复执行 + db.prepare('UPDATE tasks SET status = ?, current_node_id = NULL WHERE id = ?') + .run('running', taskId); + + const workflow = db.prepare('SELECT * FROM workflows WHERE id = ?').get(persistedState.workflowId) as Record | undefined; + const workflowParsed: WorkflowParsed = workflow ? { + id: workflow.id as string, + name: workflow.name as string, + description: workflow.description as string, + nodes: persistedState.nodes, + edges: persistedState.edges, + agent_configs: JSON.parse((workflow.agent_configs as string) || '{}'), + is_template: workflow.is_template as number, + created_at: workflow.created_at as string, + updated_at: workflow.updated_at as string, + } : { + id: persistedState.workflowId, + name: persistedState.workflowName, + nodes: persistedState.nodes, + edges: persistedState.edges, + agent_configs: {}, + is_template: 0, + created_at: '', + updated_at: '', + }; + + try { + await executeFromIndex( + taskId, + workflowParsed, + persistedState.nodes, + persistedState.edges, + persistedState.executionOrder, + persistedState.nodeResults, + persistedState.executionContext, + persistedState.pausedAtIndex + 1, + persistedState.initialInput, + persistedState.executionContext.metadata.executionDepth, + 50 + ); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + await finalizeWorkflow(taskId, workflowParsed, persistedState.nodes, persistedState.nodeResults, persistedState.executionOrder, 'failed', errorMessage); + io?.to(`task:${taskId}`).emit('task:failed', { taskId, error: errorMessage }); + } +} + +/** + * 审批拒绝,终止工作流 + */ +export async function rejectWorkflow( + taskId: string, + approvalId: string, + rejectedBy: string, + reason: string +): Promise { + const io = getIOInstance(); + + db.prepare(` + UPDATE approval_requests + SET status = 'rejected', approved_by = ?, reject_reason = ?, approved_at = datetime('now','localtime'), updated_at = datetime('now','localtime') + WHERE id = ? + `).run(rejectedBy, reason, approvalId); + + db.prepare(` + UPDATE tasks + SET status = 'failed', end_time = datetime('now','localtime'), current_node_id = NULL + WHERE id = ? + `).run(taskId); + + io?.to(`task:${taskId}`).emit('task:approval:resolved', { + taskId, + approvalId, + status: 'rejected', + approvedBy: rejectedBy, + comment: reason + }); + + addTaskLog(taskId, { type: 'error', content: `❌ 审批拒绝 by ${rejectedBy}: ${reason}` }); + + io?.to(`task:${taskId}`).emit('task:failed', { + taskId, + error: `审批被拒绝: ${reason}` + }); +} + +/** + * 审批超时处理 + */ +export async function timeoutApproval(approvalId: string): Promise { + const approval = db.prepare('SELECT * FROM approval_requests WHERE id = ?').get(approvalId) as { + id: string; task_id: string; node_id: string; timeout_action: string; + } | undefined; + if (!approval) return; + + db.prepare(` + UPDATE approval_requests + SET status = 'timeout', updated_at = datetime('now','localtime') + WHERE id = ? + `).run(approvalId); + + if (approval.timeout_action === 'reject') { + await rejectWorkflow(approval.task_id, approvalId, 'system', '审批超时自动拒绝'); + } +} + +async function finalizeWorkflow( + taskId: string, + workflow: WorkflowParsed, + nodes: WorkflowNode[], + nodeResults: Record, + executionOrder: string[], + status: 'completed' | 'failed', + errorMessage?: string +) { + const io = getIOInstance(); + + db.prepare(` + UPDATE tasks + SET status = ?, end_time = datetime('now','localtime'), + node_results = ?, current_node_id = NULL + WHERE id = ? + `).run(status, JSON.stringify(nodeResults), taskId); + + // 故障案例自动存入知识库 + try { + const failedNodes = Object.entries(nodeResults) + .filter(([_, result]) => result.status === 'failed') + .map(([nodeId, result]) => { + const node = nodes.find((n) => n.id === nodeId); + return { ...result, nodeId, node }; + }); + + if (failedNodes.length > 0) { + failedNodes.forEach((nodeResult) => { + const title = `${workflow.name} - 故障案例`; + const content = `**故障节点**: ${nodeResult.node?.data?.label || nodeResult.nodeId}\n**错误**: ${nodeResult.error}\n**分析时间**: ${new Date().toISOString()}`; + + const duplicateId = isDuplicateKnowledgeBase(content); + if (duplicateId) { + logger.info(`ℹ️ 跳过重复的故障案例,已存在相似条目: ${duplicateId}`); + return; + } + + db.prepare(` + INSERT INTO knowledge_base (id, title, category, content, created_at) + VALUES (?, ?, ?, ?, datetime('now','localtime')) + `).run(randomUUID(), title, '故障处理', content); + }); + logger.info('✅ 故障案例已自动存入知识库'); + } + } catch (insertError) { + logger.error('Failed to insert into knowledge_base:', insertError); + } + + try { + await generateWorkflowExecutionReport(taskId, workflow, nodes, nodeResults, executionOrder, status, errorMessage); + } catch (reportError) { + logger.error('Failed to generate workflow report:', reportError); + } + + // ── 验证失败时自动回滚 ── + // 检测验证节点是否失败,如果失败且存在回滚节点,则自动执行回滚 + const verificationNode = nodes.find(n => + n.data?.label?.includes('验证') && n.type === 'agent' + ); + const verificationFailed = verificationNode && + nodeResults[verificationNode.id]?.status === 'failed'; + + if (verificationFailed) { + logger.warn(`⚠️ 验证节点 "${verificationNode.data.label}" 执行失败,尝试自动回滚...`); + const rollbackNode = nodes.find(n => + n.data?.label?.includes('回滚') && n.type === 'agent' + ); + + if (rollbackNode) { try { - const previousResults = Object.values(nodeResults).map((r) => r.output).filter(Boolean).join('\n\n'); - const input = previousResults || initialInput || '请开始执行任务'; - - executionContext.metadata.currentNodeId = nodeId; - executionContext.metadata.executionDepth = executionDepth; - executionContext.previousResults.push({ - nodeId, - status: 'running', - output: undefined, - error: undefined + addTaskLog(taskId, { + type: 'output', + content: '⚠️ 验证失败,正在执行自动回滚...', + nodeId: rollbackNode.id, }); - - // 显示思考进度 - const thinkingProcess = getThinkingSteps(node.data.label); - for (const step of thinkingProcess) { - await delay(300); - io?.to(`task:${taskId}`).emit('task:node:thinking', { - taskId, - nodeId, - content: step - }); - addTaskLog(taskId, { type: 'thinking', content: step, nodeId }); - } - - logger.info(`🤖 Calling executeAgentNode with agentId: ${node.data.agentId} context:`, context); - const output = await executeAgentNode(node.data.agentId, input, context); - - nodeResults[nodeId] = { + io?.to(`task:${taskId}`).emit('task:node:started', { + taskId, nodeId: rollbackNode.id, nodeName: rollbackNode.data.label, + }); + + const rollbackOutput = await executeAgentNode( + rollbackNode.data.agentId || 'server-command-agent', + rollbackNode.data.prompt || '执行回滚操作', + {} + ); + + nodeResults[rollbackNode.id] = { status: 'success', - output, - metadata: { - thinkingProcess: thinkingProcess.join('\n'), - executionTime: Date.now() - } + output: rollbackOutput, + metadata: { executionTime: Date.now() }, }; - - const lastResultIdx = executionContext.previousResults.findIndex(r => r.nodeId === nodeId && r.status === 'running'); - if (lastResultIdx !== -1) { - executionContext.previousResults[lastResultIdx] = { - nodeId, - status: 'success', - output - }; - } - + io?.to(`task:${taskId}`).emit('task:node:output', { - taskId, - nodeId, - output + taskId, nodeId: rollbackNode.id, output: rollbackOutput, }); - io?.to(`task:${taskId}`).emit('task:node:completed', { - taskId, - nodeId, - status: 'success', - output + taskId, nodeId: rollbackNode.id, status: 'success', output: rollbackOutput, + }); + addTaskLog(taskId, { + type: 'output', + content: `✅ 自动回滚完成: ${rollbackOutput.substring(0, 200)}`, + nodeId: rollbackNode.id, }); - - addTaskLog(taskId, { type: 'output', content: output, nodeId }); - - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - nodeResults[nodeId] = { + + // 更新 ai_remediations 记录状态 + try { + const taskCtx = db.prepare('SELECT context FROM tasks WHERE id = ?').get(taskId) as { context?: string } | undefined; + const ctx = taskCtx?.context ? JSON.parse(taskCtx.context) : {}; + if (ctx.remediation_id) { + db.prepare(` + UPDATE ai_remediations SET status = 'failed', execution_result = ?, updated_at = datetime('now','localtime') + WHERE id = ? + `).run(JSON.stringify({ verification: 'failed', rollback: 'executed', rollback_output: rollbackOutput.substring(0, 500) }), ctx.remediation_id); + } + } catch { /* ai_remediations 表可能不存在 */ } + + logger.info(`✅ 自动回滚执行完成 (task: ${taskId})`); + } catch (rollbackError) { + const rollbackErrMsg = rollbackError instanceof Error ? rollbackError.message : String(rollbackError); + nodeResults[rollbackNode.id] = { status: 'failed', - error: errorMessage + error: rollbackErrMsg, + metadata: { executionTime: Date.now() }, }; - - io?.to(`task:${taskId}`).emit('task:node:completed', { - taskId, - nodeId, - status: 'failed', - error: errorMessage + addTaskLog(taskId, { + type: 'error', + content: `❌ 自动回滚失败: ${rollbackErrMsg}`, + nodeId: rollbackNode.id, }); - - addTaskLog(taskId, { type: 'error', content: errorMessage, nodeId }); - - if (!node.data.allowFailure) { - throw error; - } - } - } - - db.prepare(` - UPDATE tasks - SET status = ?, end_time = datetime('now','localtime'), - node_results = ?, current_node_id = NULL - WHERE id = ? - `).run('completed', JSON.stringify(nodeResults), taskId); - - try { - const failedNodes = Object.entries(nodeResults) - .filter(([_, result]) => result.status === 'failed') - .map(([nodeId, result]) => { - const node = nodes.find((n) => n.id === nodeId); - return { ...result, nodeId, node }; - }); - - if (failedNodes.length > 0) { - failedNodes.forEach((nodeResult) => { - const title = `${workflow.name} - 故障案例`; - const content = `**故障节点**: ${nodeResult.node?.data?.label || nodeResult.nodeId}\n**错误**: ${nodeResult.error}\n**分析时间**: ${new Date().toISOString()}`; - - const duplicateId = isDuplicateKnowledgeBase(content); - if (duplicateId) { - logger.info(`ℹ️ 跳过重复的故障案例,已存在相似条目: ${duplicateId}`); - return; + logger.error(`❌ 自动回滚执行失败: ${rollbackErrMsg}`); + + // 更新 ai_remediations 记录 + try { + const taskCtx = db.prepare('SELECT context FROM tasks WHERE id = ?').get(taskId) as { context?: string } | undefined; + const ctx = taskCtx?.context ? JSON.parse(taskCtx.context) : {}; + if (ctx.remediation_id) { + db.prepare(` + UPDATE ai_remediations SET status = 'failed', error_message = ?, updated_at = datetime('now','localtime') + WHERE id = ? + `).run(`验证失败且回滚失败: ${rollbackErrMsg}`, ctx.remediation_id); } - - db.prepare(` - INSERT INTO knowledge_base (id, title, category, content, created_at) - VALUES (?, ?, ?, ?, datetime('now','localtime')) - `).run(randomUUID(), title, '故障处理', content); - }); - - logger.info('✅ 故障案例已自动存入知识库'); + } catch { /* ai_remediations 表可能不存在 */ } } - } catch (insertError) { - logger.error('Failed to insert into knowledge_base:', insertError); - } - try { - await generateWorkflowExecutionReport(taskId, workflow, nodes, nodeResults, executionOrder, 'completed'); - } catch (reportError) { - logger.error('Failed to generate workflow report:', reportError); + // 更新 tasks 表中的 node_results(包含回滚节点结果) + db.prepare('UPDATE tasks SET node_results = ? WHERE id = ?') + .run(JSON.stringify(nodeResults), taskId); } - - io?.to(`task:${taskId}`).emit('task:completed', { - taskId, - status: 'completed', - nodeResults - }); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - db.prepare(` - UPDATE tasks - SET status = ?, end_time = datetime('now','localtime'), current_node_id = NULL - WHERE id = ? - `).run('failed', taskId); - + } + + // ── 反馈通知 ── + const successCount = Object.values(nodeResults).filter(r => r.status === 'success').length; + const failedCount = Object.values(nodeResults).filter(r => r.status === 'failed').length; + + try { + await notificationService.sendTaskNotification( + { id: taskId, name: workflow.name, workflow_id: workflow.id }, + status + ); + } catch (notifyError) { + logger.warn('⚠️ 工作流完成通知发送失败(非致命错误):', notifyError); + } + + // 验证失败回滚的额外通知 + if (verificationFailed) { try { - await generateWorkflowExecutionReport(taskId, workflow, nodes, nodeResults, executionOrder, 'failed', errorMessage); - } catch (reportError) { - logger.error('Failed to generate workflow report (failed case):', reportError); + await notificationService.sendNotification({ + type: 'remediation_rollback', + title: `⚠️ AI 修复验证失败并已回滚: ${workflow.name}`, + content: `**工作流**: ${workflow.name}\n**验证结果**: 失败\n**回滚操作**: 已自动执行\n**任务ID**: ${taskId}\n\n请登录系统查看详细信息`, + related_task_id: taskId, + }); + } catch (notifyError) { + logger.warn('⚠️ 回滚通知发送失败:', notifyError); } - - io?.to(`task:${taskId}`).emit('task:failed', { - taskId, - error: errorMessage + } + + // ── 审计日志 ── + createAuditLog({ + action: status === 'completed' ? 'workflow_completed' : 'workflow_failed', + resource_type: 'task', + resource_id: taskId, + details: { + workflowName: workflow.name, + workflowId: workflow.id, + successCount, + failedCount, + verificationFailed: !!verificationFailed, + errorMessage: errorMessage || null, + }, + }); + + if (verificationFailed) { + createAuditLog({ + action: 'remediation_rollback_triggered', + resource_type: 'task', + resource_id: taskId, + details: { + workflowName: workflow.name, + reason: '验证节点执行失败,自动触发回滚', + rollbackResult: nodeResults[nodes.find(n => n.data?.label?.includes('回滚'))?.id || '']?.status || 'unknown', + }, }); } + + if (status === 'completed') { + io?.to(`task:${taskId}`).emit('task:completed', { taskId, status: 'completed', nodeResults }); + } } async function generateWorkflowExecutionReport( @@ -307,8 +715,8 @@ async function generateWorkflowExecutionReport( const executionOrderDesc = executionOrder.map((nodeId, index) => { const node = nodes.find(n => n.id === nodeId); const nodeResult = nodeResults[nodeId]; - const status = nodeResult?.status || 'pending'; - return `${index + 1}. ${node?.data?.label || nodeId} (${status})`; + const nodeStatus = nodeResult?.status || 'pending'; + return `${index + 1}. ${node?.data?.label || nodeId} (${nodeStatus})`; }).join('\n'); const nodeDetails = executionOrder.map((nodeId, index) => { diff --git a/backend/src/types/index.ts b/backend/src/types/index.ts index bfe6c2d..25af767 100644 --- a/backend/src/types/index.ts +++ b/backend/src/types/index.ts @@ -1,10 +1,23 @@ +export interface ApprovalConfig { + description: string; + timeout: number; + timeoutAction: 'reject' | 'wait'; + approvers: string[]; +} + export interface WorkflowNode { id: string; type: string; data: { label: string; - agentId: string; + agentId?: string; allowFailure?: boolean; + approvalConfig?: ApprovalConfig; + description?: string; + avatar?: string; + prompt?: string; + inputKey?: string; + outputKey?: string; }; position: { x: number; @@ -16,6 +29,7 @@ export interface WorkflowEdge { id: string; source: string; target: string; + animated?: boolean; } export interface Workflow { @@ -78,6 +92,35 @@ export interface Task { logs?: string; } +export interface ApprovalRequest { + id: string; + task_id: string; + node_id: string; + node_label: string; + description: string; + status: 'pending' | 'approved' | 'rejected' | 'timeout'; + requested_by: string; + approved_by?: string; + approved_at?: string; + reject_reason?: string; + timeout_at?: string; + timeout_action: 'reject' | 'wait'; + created_at: string; + updated_at: string; +} + +export interface ExecutionContext { + variables: Record; + previousResults: Array<{ nodeId: string; status: string; output?: string; error?: string }>; + metadata: { + taskId: string; + workflowName: string; + currentNodeId?: string; + executionDepth: number; + startTime: string; + }; +} + export interface CommandExecutionResult { success: boolean; stdout: string; diff --git a/docs-assets/images/126.png b/docs-assets/images/126.png new file mode 100644 index 0000000..8a53c6a Binary files /dev/null and b/docs-assets/images/126.png differ diff --git a/docs/ARCHITECTURE_DIAGRAM.md b/docs/ARCHITECTURE_DIAGRAM.md index 2a18adc..24deee0 100644 --- a/docs/ARCHITECTURE_DIAGRAM.md +++ b/docs/ARCHITECTURE_DIAGRAM.md @@ -41,7 +41,7 @@ graph TB end subgraph External["🌐 外部服务"] - LLM["🤖 LLM API
豆包 Doubao | OpenAI"] + LLM["🤖 LLM 模型池
豆包 | DeepSeek | 通义千问
OpenAI | 智谱 | 本地模型"] SSH["🖥️ SSH
远程服务器"] Alerts["🚨 告警源
Prometheus | Zabbix | 通用Webhook"] Notify["📬 通知渠道
Webhook | 邮件 | 企业微信 | 钉钉"] diff --git a/docs/AUTO_REMEDIATION_DESIGN.md b/docs/AUTO_REMEDIATION_DESIGN.md index 6a582ca..7c6398f 100644 --- a/docs/AUTO_REMEDIATION_DESIGN.md +++ b/docs/AUTO_REMEDIATION_DESIGN.md @@ -1,8 +1,9 @@ # 自动修复策略引擎 - 技术设计方案 -> 版本:v1.0 -> 日期:2025-05-25 -> 作者:AI Assistant +> 版本:v1.1 +> 日期:2026-06-14 +> 作者:AI Assistant +> 更新:补充 AI 自动修复工作流实现细节 --- @@ -16,6 +17,7 @@ - **智能修复决策**:基于历史修复记录和 AI 分析,推荐最佳修复方案 - **分级执行策略**:支持自动执行、审批后执行、仅建议三种模式 - **效果验证闭环**:修复后自动验证问题是否真正解决 +- **自动回滚机制**:验证失败时自动执行回滚操作,恢复系统到修复前状态 ### 1.2 与现有系统的关系 @@ -30,6 +32,12 @@ │ 修复效果验证 │ ◀────────── │ Agent 执行 │ │ (Verification) │ 返回结果 │ (Executor) │ └──────────────────┘ └─────────────┘ + │ + ▼ + ┌──────────────────┐ + │ 自动回滚机制 │ + │ (Auto-Rollback) │ + └──────────────────┘ ``` --- diff --git a/docs/HITL_APPROVAL_DESIGN.md b/docs/HITL_APPROVAL_DESIGN.md new file mode 100644 index 0000000..98cd451 --- /dev/null +++ b/docs/HITL_APPROVAL_DESIGN.md @@ -0,0 +1,379 @@ +# HITL 审批节点(Human-in-the-Loop)功能设计文档 + +## 1. 背景与目标 + +当前工作流执行器(`workflowExecutor.ts`)采用纯串行执行模式,所有 Agent 节点一旦启动便自动执行到底。在生产运维环境中,运维主管无法在关键操作前进行人工确认,导致该功能无法落地企业场景。 + +**目标**:在工作流中新增 `approval` 节点类型,执行到该节点时暂停工作流,等待人工审批(批准/拒绝)后继续执行,实现 Human-in-the-Loop 闭环。 + +## 2. 功能范围 + +| 能力 | 说明 | +|------|------| +| 审批节点类型 | 工作流编辑器中新增 `approval` 节点,与现有 `agent` 节点并列 | +| 工作流暂停/恢复 | 执行到审批节点时暂停,审批通过后继续执行后续节点 | +| 审批超时 | 可配置审批超时时间,超时后自动拒绝或保持等待 | +| WebSocket 实时推送 | 审批请求通过 WebSocket 实时推送到前端 | +| REST API 审批操作 | 提供审批通过/拒绝的 REST 接口 | +| 通知渠道集成 | 审批请求推送到企业微信/钉钉(复用现有通知系统) | +| 审计记录 | 审批操作记录到审计日志 | + +## 3. 数据模型变更 + +### 3.1 WorkflowNode 类型扩展 + +```typescript +// backend/src/types/index.ts + +// 现有 WorkflowNode.data 扩展 +export interface WorkflowNode { + id: string; + type: string; // 'agent' | 'approval' + data: { + label: string; + agentId?: string; // agent 节点使用 + allowFailure?: boolean; + // ---- 审批节点新增字段 ---- + approvalConfig?: { + description: string; // 审批说明(展示给审批人) + timeout: number; // 超时时间(秒),默认 3600 + timeoutAction: 'reject' | 'wait'; // 超时行为:自动拒绝 or 继续等待 + approvers: string[]; // 审批人角色列表,如 ['admin', 'operator'] + requireAll: boolean; // 是否需要所有审批人同意(暂不实现,预留) + }; + }; + position: { x: number; y: number }; +} +``` + +### 3.2 新增 approval_requests 表 + +```sql +CREATE TABLE IF NOT EXISTS approval_requests ( + id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + node_id TEXT NOT NULL, + node_label TEXT NOT NULL, + description TEXT, + status TEXT NOT NULL DEFAULT 'pending', -- pending | approved | rejected | timeout + requested_by TEXT, -- 触发审批的任务创建者 + approved_by TEXT, -- 审批操作人 + approved_at DATETIME, + reject_reason TEXT, + timeout_at DATETIME, + timeout_action TEXT DEFAULT 'reject', + created_at DATETIME DEFAULT (datetime('now','localtime')), + updated_at DATETIME DEFAULT (datetime('now','localtime')), + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_approval_task ON approval_requests(task_id); +CREATE INDEX IF NOT EXISTS idx_approval_status ON approval_requests(status); +CREATE INDEX IF NOT EXISTS idx_approval_created ON approval_requests(created_at DESC); +``` + +### 3.3 tasks 表状态扩展 + +现有 `tasks.status` 字段新增 `waiting_approval` 状态: + +``` +pending → running → waiting_approval → running → completed + ↘ failed +``` + +## 4. 后端改动 + +### 4.1 工作流执行器改造(workflowExecutor.ts) + +**核心逻辑变更**:在节点执行循环中,遇到 `approval` 类型节点时暂停循环,写入审批记录,等待恢复信号。 + +``` +当前流程: + for (nodeId of executionOrder) → executeAgentNode → 继续下一个 + +改造后流程: + for (nodeId of executionOrder) { + if (node.type === 'approval') { + → 创建 approval_request 记录 + → 更新 task.status = 'waiting_approval' + → WebSocket 推送 task:approval:requested + → 通知渠道推送 + → 暂停循环(return / break) + } + if (node.type === 'agent') { + → executeAgentNode(不变) + } + } +``` + +**恢复执行**:审批通过后,从暂停的节点继续执行后续节点。实现方式: + +- 方案 A(推荐):将 `executeWorkflow` 拆分为可恢复函数,接收 `resumeFromNodeId` 参数 +- 方案 B:使用 Promise + EventEmitter 等待审批结果 + +**采用方案 A**:将执行循环提取为独立函数,审批通过后由 API 调用重新进入循环。 + +```typescript +// 伪代码 +export async function executeWorkflow(taskId, workflow, initialInput?, context?) { + // ... 现有初始化逻辑 ... + await executeFromNode(taskId, workflow, nodes, executionOrder, nodeResults, startIdx, context); +} + +async function executeFromNode(taskId, workflow, nodes, executionOrder, nodeResults, startIdx, context) { + for (let i = startIdx; i < executionOrder.length; i++) { + const nodeId = executionOrder[i]; + const node = nodes.find(n => n.id === nodeId); + + if (node.type === 'approval') { + // 创建审批请求,暂停执行 + const approvalId = createApprovalRequest(taskId, node); + // 更新任务状态 + updateTaskStatus(taskId, 'waiting_approval', nodeId); + // 推送 WebSocket + emitApprovalRequested(taskId, node, approvalId); + // 保存执行上下文到 tasks 表(用于恢复) + saveExecutionContext(taskId, { executionIndex: i, nodeResults, context }); + return; // 暂停 + } + + // agent 节点执行(现有逻辑不变) + await executeAgentNode(...); + } + // 所有节点执行完成 +} + +export async function resumeWorkflow(taskId: string, approved: boolean, approverId: string, reason?: string) { + // 读取保存的执行上下文 + const ctx = loadExecutionContext(taskId); + if (approved) { + // 记录审批结果,继续执行下一个节点 + await executeFromNode(taskId, workflow, nodes, executionOrder, nodeResults, ctx.executionIndex + 1, context); + } else { + // 拒绝,标记任务失败 + updateTaskStatus(taskId, 'failed'); + } +} +``` + +### 4.2 新增审批 API 路由(approvalRoutes.ts) + +| 方法 | 路径 | 说明 | 权限 | +|------|------|------|------| +| GET | `/api/approvals` | 查询审批列表(支持 status 过滤) | admin, operator | +| GET | `/api/approvals/:id` | 查询审批详情 | admin, operator | +| POST | `/api/approvals/:id/approve` | 审批通过 | admin, operator | +| POST | `/api/approvals/:id/reject` | 审批拒绝 | admin, operator | +| GET | `/api/approvals/pending/count` | 待审批数量(用于前端角标) | admin, operator | + +**审批通过请求体**: +```json +POST /api/approvals/:id/approve +{ + "comment": "确认可以执行" // 可选 +} +``` + +**审批拒绝请求体**: +```json +POST /api/approvals/:id/reject +{ + "reason": "当前时间段不允许变更" // 必填 +} +``` + +### 4.3 WebSocket 事件 + +| 事件 | 方向 | 数据 | 说明 | +|------|------|------|------| +| `task:approval:requested` | Server → Client | `{ taskId, approvalId, nodeId, nodeLabel, description, timeout }` | 审批请求 | +| `task:approval:resolved` | Server → Client | `{ taskId, approvalId, status, approvedBy, comment }` | 审批结果 | +| `approval:new` | Server → Client | `{ approvalId, taskId, nodeLabel, description }` | 全局广播新审批(用于导航栏角标) | + +### 4.4 审批超时处理 + +在 `app.ts` 启动时注册定时检查任务: + +```typescript +// 每 30 秒检查一次超时的审批请求 +setInterval(() => { + const expired = db.prepare(` + SELECT * FROM approval_requests + WHERE status = 'pending' AND timeout_at IS NOT NULL AND timeout_at < datetime('now','localtime') + `).all(); + + for (const req of expired) { + if (req.timeout_action === 'reject') { + rejectApproval(req.id, null, '审批超时自动拒绝'); + } + // timeout_action === 'wait' 则不做处理,继续等待 + } +}, 30000); +``` + +### 4.5 通知集成 + +审批请求创建后,复用现有 `notificationService.ts` 发送通知: + +```typescript +await notificationService.send({ + type: 'approval_request', + title: `工作流审批: ${nodeLabel}`, + content: description, + metadata: { approvalId, taskId } +}); +``` + +## 5. 前端改动 + +### 5.1 工作流编辑器(WorkflowEditor.tsx) + +在节点面板中新增「审批节点」类型: + +- 节点样式:盾牌图标 + 黄色/橙色背景,与 Agent 节点(蓝色)区分 +- 配置面板: + - 审批说明(textarea) + - 超时时间(数字输入,单位:分钟,默认 60) + - 超时行为(下拉:自动拒绝 / 继续等待) + - 审批人角色(多选:admin / operator) + +### 5.2 审批管理页面(新增 ApprovalCenter.tsx) + +独立页面,展示所有审批请求: + +- 待审批列表(卡片式) + - 显示:工作流名称、节点名称、审批说明、发起时间、倒计时 + - 操作:通过(绿色按钮)/ 拒绝(红色按钮,弹窗填写原因) +- 历史审批记录(表格) + - 状态筛选:待审批 / 已通过 / 已拒绝 / 已超时 + +### 5.3 任务执行页面改造 + +- 任务详情中,审批节点显示为橙色「等待审批」状态 +- 审批倒计时实时显示 +- 可直接在任务详情页操作审批 + +### 5.4 导航栏审批角标 + +- 定时轮询 `/api/approvals/pending/count` +- 有待审批时显示红色数字角标 + +## 6. 数据库迁移 + +新增迁移文件 `v017_approval_requests.ts`: + +```typescript +// backend/src/models/migrations/v017_approval_requests.ts +import { Migration } from './migrationFramework'; + +const v017ApprovalRequests: Migration = { + id: '20260614000017', + version: 17, + name: 'approval_requests', + description: 'Add approval_requests table for HITL workflow', + up: async (db) => { + db.exec(` + CREATE TABLE IF NOT EXISTS approval_requests ( + id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + node_id TEXT NOT NULL, + node_label TEXT NOT NULL, + description TEXT, + status TEXT NOT NULL DEFAULT 'pending', + requested_by TEXT, + approved_by TEXT, + approved_at DATETIME, + reject_reason TEXT, + timeout_at DATETIME, + timeout_action TEXT DEFAULT 'reject', + created_at DATETIME DEFAULT (datetime('now','localtime')), + updated_at DATETIME DEFAULT (datetime('now','localtime')), + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE + ); + CREATE INDEX IF NOT EXISTS idx_approval_task ON approval_requests(task_id); + CREATE INDEX IF NOT EXISTS idx_approval_status ON approval_requests(status); + CREATE INDEX IF NOT EXISTS idx_approval_created ON approval_requests(created_at DESC); + `); + }, + down: async (db) => { + db.exec(`DROP TABLE IF EXISTS approval_requests`); + } +}; +export default v017ApprovalRequests; +``` + +## 7. 文件变更清单 + +| 文件 | 变更类型 | 说明 | +|------|----------|------| +| `backend/src/types/index.ts` | 修改 | WorkflowNode 增加 approvalConfig 字段,新增 ApprovalRequest 类型 | +| `backend/src/models/migrations/v017_approval_requests.ts` | 新增 | 数据库迁移 | +| `backend/src/models/migrations/index.ts` | 修改 | 注册新迁移 | +| `backend/src/services/workflowExecutor.ts` | 修改 | 支持审批节点暂停/恢复 | +| `backend/src/routes/approvalRoutes.ts` | 新增 | 审批 CRUD + 操作 API | +| `backend/src/app.ts` | 修改 | 注册审批路由 + 超时检查定时器 | +| `backend/src/websocket/handler.ts` | 修改 | 新增审批相关 WebSocket 事件 | +| `backend/src/services/notificationService.ts` | 修改 | 支持审批通知类型 | +| `frontend/src/pages/ApprovalCenter.tsx` | 新增 | 审批中心页面 | +| `frontend/src/pages/WorkflowEditor.tsx` | 修改 | 新增审批节点类型和配置面板 | +| `frontend/src/pages/TaskDetail.tsx` | 修改 | 审批节点状态展示和操作 | +| `frontend/src/components/ApprovalNode.tsx` | 新增 | 审批节点可视化组件 | +| `frontend/src/App.tsx` | 修改 | 新增审批中心路由 | + +## 8. 执行流程图 + +``` +用户创建工作流(含审批节点) + │ + ▼ + POST /api/tasks(启动任务) + │ + ▼ + executeWorkflow() 开始执行 + │ + ▼ + ┌─ 遍历执行顺序 ─────────────────────┐ + │ │ + │ agent 节点 → executeAgentNode() │ + │ │ │ + │ ▼ │ + │ approval 节点? │ + │ ├─ 是 → 创建审批记录 │ + │ │ task.status = │ + │ │ 'waiting_approval' │ + │ │ WebSocket 推送审批请求 │ + │ │ 发送通知 │ + │ │ return(暂停执行) │ + │ │ │ + │ └─ 否 → 继续下一个节点 │ + │ │ + └──────────────────────────────────────┘ + │ + ┌───────────┘(等待审批) + ▼ + POST /api/approvals/:id/approve + │ + ▼ + resumeWorkflow(taskId) + │ + ▼ + 从暂停位置继续执行后续节点 + │ + ▼ + 所有节点完成 → task.status = 'completed' +``` + +## 9. 安全考量 + +- 审批操作需要 JWT 认证,仅 `admin` 和 `operator` 角色可操作 +- `viewer` 角色只能查看审批列表,不能操作 +- 审批操作记录到 `audit_logs` 表 +- 审批超时默认行为为自动拒绝,防止任务无限挂起 +- 审批接口做幂等处理,同一审批请求不可重复操作 + +## 10. 后续扩展(本期不实现) + +- 多级审批链(串行/并行审批人) +- 移动端审批(企业微信/钉钉卡片交互) +- 审批条件表达式(根据上下文自动判断是否需要审批) +- 审批统计报表 diff --git a/docs/ZABBIX_AUTO_REMEDIATION_GUIDE.md b/docs/ZABBIX_AUTO_REMEDIATION_GUIDE.md new file mode 100644 index 0000000..3d04f44 --- /dev/null +++ b/docs/ZABBIX_AUTO_REMEDIATION_GUIDE.md @@ -0,0 +1,522 @@ +# Zabbix 告警自动修复使用指南 + +> 版本:v1.0\ +> 日期:2026-06-14\ +> 适用版本:ITOps Agent Platform v3.0.6+ + +*** + +## 一、功能概述 + +### 1.1 核心能力 + +系统支持从 Zabbix 告警接收、AI 智能分析、自动修复、人工审批、结果验证到自动回滚的完整闭环。 + +### 1.2 完整链路 + +``` +Zabbix 告警 + ↓ +[1] Webhook 接收(签名验证 + IP 白名单) + ↓ +[2] AI 自动分析(SSH 诊断 + LLM 分析) + ↓ +[3] 生成结构化修复建议(JSON 格式) + ↓ +[4] 自动创建修复工作流(4 节点) + ↓ +[5] 发送审批通知(企业微信/钉钉/邮箱) + ↓ +[6] 人工审批(审批中心页面) + ↓ +[7] 审批通过 → 执行修复命令 + ↓ +[8] 验证修复结果 + ↓ +[9] 反馈通知 + 记录审计日志 + ↓ +[10] 验证失败 → 自动回滚 +``` + +*** + +## 二、系统架构 + +### 2.1 工作流结构 + +AI 自动生成的修复工作流包含 4 个节点: + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ 审批节点 │ ──▶ │ 执行修复节点 │ ──▶ │ 验证结果节点 │ +│ (Approval) │ │ (Execution) │ │ (Verification)│ +└──────────────┘ └──────────────┘ └──────────────┘ + │ + 验证失败 + │ + ▼ + ┌──────────────┐ + │ 自动回滚节点 │ + │ (Rollback) │ + └──────────────┘ +``` + +| 节点 | 类型 | 说明 | +| ------ | ---------- | ----------------------------------- | +| 审批节点 | `approval` | 等待人工审批,支持超时自动拒绝 | +| 执行修复节点 | `agent` | 使用 `server-command-agent` 执行修复命令 | +| 验证结果节点 | `agent` | 验证修复是否成功,检查系统状态 | +| 自动回滚节点 | `agent` | 断开连接,验证失败时由 `finalizeWorkflow` 自动触发 | + +### 2.2 数据流 + +``` +告警数据 → 设备识别 → SSH/SNMP 诊断 → LLM 分析 + ↓ +修复命令 JSON → 工作流创建 → 审批暂停 + ↓ +审批通过 → 执行修复 → 验证结果 + ↓ +成功 → 通知 + 审计日志 +失败 → 自动回滚 → 通知 + 审计日志 +``` + +*** + +## 三、配置步骤 + +### 3.1 Zabbix 端配置 + +#### 3.1.1 创建告警媒介 + +1. 登录 Zabbix 管理界面 +2. 进入 **管理 → 报警媒介类型 → 创建媒介类型** +3. 配置如下: + +| 配置项 | 值 | +| ------------ | ----------------------------------------------- | +| 名称 | `ITOps Webhook` | +| 类型 | `Webhook` | +| URL | `http://:3001/api/webhooks/zabbix` | +| 请求方法 | `POST` | +| Content-Type | `application/json` | + +#### 3.1.2 配置 Webhook 参数 + +```json +{ + "alertid": "{ALERT.ID}", + "host": "{HOST.NAME}", + "ip": "{HOST.IP}", + "trigger": "{TRIGGER.NAME}", + "severity": "{TRIGGER.SEVERITY}", + "status": "{TRIGGER.STATUS}", + "description": "{TRIGGER.DESCRIPTION}", + "value": "{TRIGGER.VALUE}", + "event_id": "{EVENT.ID}", + "time": "{EVENT.DATE} {EVENT.TIME}" +} +``` + +#### 3.1.3 配置告警动作 + +1. 进入 **配置 → 动作 → 创建动作** +2. 条件:选择需要自动修复的触发器 +3. 操作:选择 `ITOps Webhook` 媒介类型 +4. 发送到:选择接收用户组 + +### 3.2 系统端配置 + +#### 3.2.1 环境变量配置 + +在 `.env` 文件中配置: + +```bash +# Webhook 安全配置 +WEBHOOK_IP_WHITELIST=192.168.1.100,10.0.0.0/8 # Zabbix 服务器 IP +WEBHOOK_SECRET=your-webhook-secret-key # HMAC 签名密钥(可选) + +# 通知渠道配置 +WECHAT_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx +DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/robot/send?access_token=xxx +SMTP_HOST=smtp.example.com +SMTP_PORT=465 +SMTP_USER=alert@example.com +SMTP_PASSWORD=your-password +NOTIFICATION_EMAIL=admin@example.com + +# AI 分析配置 +LLM_API_KEY=your-api-key +LLM_API_BASE=https://api.example.com/v1 +LLM_MODEL=gpt-4 + +# 审批超时配置(秒) +APPROVAL_TIMEOUT_HIGH=7200 # 高风险 2 小时 +APPROVAL_TIMEOUT_MEDIUM=3600 # 中风险 1 小时 +APPROVAL_TIMEOUT_LOW=1800 # 低风险 30 分钟 +``` + +#### 3.2.2 设备信息配置 + +确保以下表中包含目标设备信息: + +- `network_devices`:网络设备(交换机、路由器) +- `servers`:服务器 + +AI 分析时会根据告警 IP 自动查找设备,获取 SSH 凭证进行诊断。 + +*** + +## 四、AI 自动分析 + +### 4.1 诊断方式 + +| 方式 | 条件 | 说明 | +| ------- | ---------- | ------------ | +| SSH 诊断 | 设备有 SSH 凭证 | 登录设备执行诊断命令 | +| SNMP 诊断 | 无 SSH 凭证 | 使用 SNMP 巡检数据 | + +### 4.2 厂商适配 + +系统支持以下厂商的自动诊断命令: + +| 厂商 | 诊断命令示例 | +| --------- | ------------------------------------------- | +| 华为 | `display cpu-usage`, `display memory-usage` | +| 思科 | `show cpu`, `show memory` | +| H3C | `display cpu-usage`, `display memory` | +| 锐捷 | `show cpu`, `show memory` | +| 中兴 | `show cpu`, `show memory` | +| Linux 服务器 | `top -bn1`, `free -m`, `df -h` | + +### 4.3 LLM 分析输出 + +AI 分析后生成结构化 JSON: + +```json +{ + "diagnosis": "CPU 使用率过高,由进程 java (PID: 12345) 占用 95%", + "summary": "Java 进程 CPU 占用异常", + "remediationCommands": [ + "kill -9 12345", + "systemctl restart myapp" + ], + "riskLevel": "medium" +} +``` + +*** + +## 五、审批流程 + +### 5.1 审批通知 + +审批请求创建后,系统会通过以下渠道发送通知: + +| 渠道 | 配置项 | 说明 | +| ------- | ------------------------------- | ------------- | +| 企业微信 | `WECHAT_WEBHOOK_URL` | Markdown 格式消息 | +| 钉钉 | `DINGTALK_WEBHOOK_URL` | Markdown 格式消息 | +| 邮箱 | `SMTP_*` + `NOTIFICATION_EMAIL` | HTML 邮件 | +| Webhook | 自定义 URL | JSON 格式推送 | + +### 5.2 审批操作 + +#### 审批中心页面 + +访问 `/approval-center` 页面,可以看到: + +- 待审批列表 +- 审批详情(修复方案、风险等级、目标设备) +- 操作按钮:通过 / 拒绝 + +#### API 操作 + +```bash +# 审批通过 +POST /api/approvals/{id}/approve +Authorization: Bearer +{ + "comment": "同意执行" +} + +# 审批拒绝 +POST /api/approvals/{id}/reject +Authorization: Bearer +{ + "reason": "当前时间段不允许变更" +} +``` + +### 5.3 审批超时 + +| 风险等级 | 默认超时 | 超时行为 | +| ---- | ----- | ---- | +| 高风险 | 2 小时 | 自动拒绝 | +| 中风险 | 1 小时 | 自动拒绝 | +| 低风险 | 30 分钟 | 自动拒绝 | + +*** + +## 六、验证机制 + +### 6.1 智能验证命令 + +系统根据修复命令自动推断验证命令: + +| 修复类型 | 验证命令 | +| ----------------------------- | ----------------------------------------------------------------- | +| `systemctl restart ` | `systemctl status `, `systemctl is-active ` | +| 磁盘清理(`rm`, `clean`) | `df -h` | +| 内存相关(`memory`, `swap`) | `free -m` | +| CPU 相关(`kill`, `top`) | `uptime`, `top -bn1 \| head -5` | +| 网络相关(`iptables`, `nginx`) | `ss -tlnp` | +| Docker 相关 | `docker ps --format "table {{.Names}}\t{{.Status}}"` | +| 无法推断 | `uptime`, `systemctl list-units --failed`, `dmesg -T \| tail -10` | + +### 6.2 验证结论 + +验证节点输出三种结论: + +- ✅ **修复成功**:指标恢复正常 +- ⚠️ **部分恢复**:部分指标改善但仍有异常 +- ❌ **修复失败**:指标未改善或恶化 + +*** + +## 七、自动回滚 + +### 7.1 触发条件 + +当验证节点执行失败时,系统自动触发回滚: + +1. 检测验证节点状态为 `failed` +2. 查找工作流中的回滚节点(label 包含 "回滚") +3. 执行回滚命令 +4. 更新 `ai_remediations` 表状态 +5. 发送回滚通知 +6. 记录审计日志 + +### 7.2 智能回滚命令 + +系统根据修复命令自动推断回滚命令: + +| 修复命令 | 回滚命令 | +| ------------------------------- | ------------------------------------------------------- | +| `systemctl start ` | `systemctl stop ` | +| `systemctl restart ` | `systemctl stop ` | +| `systemctl stop ` | `systemctl start ` | +| `cp .bak` | `cp .bak ` | +| `docker run --name ` | `docker stop && docker rm ` | +| `iptables -A ...` | 列出当前规则(需手动确认) | +| 无法推断 | `systemctl list-units --failed`, `dmesg -T \| tail -20` | + +### 7.3 回滚通知 + +回滚执行后,系统发送额外通知: + +```json +{ + "type": "remediation_rollback", + "title": "⚠️ AI 修复验证失败并已回滚: ", + "content": "**工作流**: ...\n**验证结果**: 失败\n**回滚操作**: 已自动执行\n**任务ID**: ...", + "related_task_id": "" +} +``` + +*** + +## 八、审计日志 + +### 8.1 日志记录 + +系统在工作流完成时自动记录审计日志: + +| 事件 | action | 说明 | +| ----- | -------------------------------- | ---------- | +| 工作流完成 | `workflow_completed` | 所有节点执行成功 | +| 工作流失败 | `workflow_failed` | 存在节点执行失败 | +| 触发回滚 | `remediation_rollback_triggered` | 验证失败触发自动回滚 | + +### 8.2 日志详情 + +```json +{ + "action": "workflow_completed", + "resource_type": "task", + "resource_id": "", + "details": { + "workflowName": "AI 修复工作流: CPU 使用率过高", + "workflowId": "", + "successCount": 3, + "failedCount": 0, + "verificationFailed": false, + "errorMessage": null + } +} +``` + +*** + +## 九、WebSocket 实时事件 + +### 9.1 任务执行事件 + +| 事件 | 数据 | 说明 | +| --------------------- | ------------------------------------ | ------ | +| `task:node:started` | `{ taskId, nodeId, nodeName }` | 节点开始执行 | +| `task:node:output` | `{ taskId, nodeId, output }` | 节点输出 | +| `task:node:completed` | `{ taskId, nodeId, status, output }` | 节点完成 | +| `task:completed` | `{ taskId, status, nodeResults }` | 任务完成 | +| `task:failed` | `{ taskId, error }` | 任务失败 | + +### 9.2 审批事件 + +| 事件 | 数据 | 说明 | +| ------------------------- | ----------------------------------- | ---- | +| `task:approval:requested` | `{ taskId, approvalId, nodeLabel }` | 审批请求 | +| `task:approval:resolved` | `{ taskId, approvalId, status }` | 审批结果 | + +*** + +## 十、常见问题 + +### Q1:Zabbix 告警未触发自动修复? + +**排查步骤**: + +1. 检查 Webhook 是否收到请求:查看后端日志 `docker logs backend` +2. 检查 IP 白名单:确认 `WEBHOOK_IP_WHITELIST` 包含 Zabbix 服务器 IP +3. 检查设备信息:确认 `network_devices` 或 `servers` 表中有对应 IP 的设备 +4. 检查 SSH 凭证:确认设备有有效的 SSH 登录信息 + +### Q2:AI 分析未生成修复命令? + +**可能原因**: + +1. LLM API 配置错误:检查 `LLM_API_KEY` 和 `LLM_API_BASE` +2. 诊断数据不足:SSH 连接失败或 SNMP 数据不完整 +3. AI 判断无需修复:告警级别较低或问题不明确 + +### Q3:审批通知未收到? + +**排查步骤**: + +1. 检查通知渠道配置:确认 `WECHAT_WEBHOOK_URL` 等环境变量正确 +2. 检查网络连接:确保后端能访问通知服务 API +3. 查看后端日志:搜索 `notification` 相关错误 + +### Q4:验证节点判断修复失败? + +**可能原因**: + +1. 修复命令未生效:需要更长时间或需要重启服务 +2. 验证命令不匹配:系统推断的验证命令不适用于当前场景 +3. 系统状态未恢复:修复后需要等待一段时间才能验证 + +### Q5:回滚执行失败? + +**处理方式**: + +1. 查看回滚日志:在任务详情页查看回滚节点输出 +2. 手动回滚:根据修复命令手动执行逆向操作 +3. 检查设备连通性:确认 SSH 连接正常 + +*** + +## 十一、最佳实践 + +### 11.1 告警配置 + +- 为关键告警配置自动修复,非关键告警使用仅建议模式 +- 设置合理的告警阈值,避免频繁触发 +- 配置告警依赖,减少重复告警 + +### 11.2 审批策略 + +- 高风险操作设置较长审批超时(2 小时) +- 低风险操作设置较短审批超时(30 分钟) +- 定期审查审批记录,优化审批策略 + +### 11.3 验证策略 + +- 验证命令应覆盖修复操作的核心指标 +- 验证超时设置合理(默认 120 秒) +- 验证失败时及时通知运维人员 + +### 11.4 回滚策略 + +- 修复前自动备份配置文件 +- 回滚命令应经过测试验证 +- 关键操作建议人工确认回滚 + +*** + +## 十二、API 参考 + +### 12.1 Webhook 接收 + +```http +POST /api/webhooks/zabbix +Content-Type: application/json +X-Webhook-Signature: + +{ + "alertid": "12345", + "host": "server-01", + "ip": "192.168.1.100", + "trigger": "CPU usage > 90%", + "severity": "high", + "status": "PROBLEM", + "description": "CPU usage is above 90% for 5 minutes", + "value": "1", + "event_id": "67890", + "time": "2026-06-14 10:30:00" +} +``` + +### 12.2 审批管理 + +```http +# 查询待审批列表 +GET /api/approvals?status=pending + +# 审批通过 +POST /api/approvals/{id}/approve + +# 审批拒绝 +POST /api/approvals/{id}/reject +``` + +### 12.3 任务管理 + +```http +# 查询任务列表 +GET /api/tasks + +# 查询任务详情 +GET /api/tasks/{id} + +# 查询任务日志 +GET /api/tasks/{id}/logs +``` + +*** + +## 十三、相关文件 + +| 文件 | 说明 | +| ---------------------------------------------- | ------------ | +| `backend/src/routes/webhookRoutes.ts` | Webhook 接收路由 | +| `backend/src/services/alertAutoAnalyzer.ts` | AI 自动分析服务 | +| `backend/src/services/aiRemediationService.ts` | AI 修复工作流生成 | +| `backend/src/services/workflowExecutor.ts` | 工作流执行引擎 | +| `backend/src/services/notificationService.ts` | 通知服务 | +| `backend/src/services/auditService.ts` | 审计日志服务 | +| `backend/src/routes/approvalRoutes.ts` | 审批管理路由 | + +*** + +**文档版本**:v1.0\ +**最后更新**:2026-06-14\ +**维护者**:谭策 diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index a12cbe6..bc4df29 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -50,6 +50,8 @@ const AIModels = lazy(() => import('./pages/AIModels')); const SNMPPage = lazy(() => import('./pages/SNMP')); const NetworkDiscoveryPage = lazy(() => import('./pages/NetworkDiscovery')); const AlertCorrelationGroupsPage = lazy(() => import('./pages/AlertCorrelationGroups')); +const Approvals = lazy(() => import('./pages/Approvals')); +const AiRemediations = lazy(() => import('./pages/AiRemediations')); const FrontendTests = lazy(() => import('./pages/FrontendTests')); const NotFound = lazy(() => import('./pages/NotFound')); @@ -131,6 +133,8 @@ function App() { } /> } /> } /> + } /> + } /> } /> diff --git a/frontend/src/components/layout/Layout.tsx b/frontend/src/components/layout/Layout.tsx index ed5a0f4..b3bc898 100644 --- a/frontend/src/components/layout/Layout.tsx +++ b/frontend/src/components/layout/Layout.tsx @@ -85,6 +85,7 @@ const navigationGroups = [ { name: 'Agent管理', href: '/agents', icon: Bot }, { name: '工作流', href: '/workflows', icon: GitBranch }, { name: '任务执行', href: '/tasks', icon: Play }, + { name: '审批中心', href: '/approvals', icon: ShieldCheck }, { name: '脚本中心', href: '/scripts', icon: FileCode }, { name: '定时任务', href: '/scheduled-tasks', icon: Clock }, ] @@ -113,6 +114,7 @@ const navigationGroups = [ { name: '修复效果仪表盘', href: '/remediation-dashboard', icon: BarChart3 }, { name: '修复执行记录', href: '/remediation-executions', icon: ListChecks }, { name: '自愈工作台', href: '/remediation-workbench', icon: Workflow }, + { name: 'AI 修复记录', href: '/ai-remediations', icon: Lightbulb }, ] }, { diff --git a/frontend/src/pages/AiRemediations.tsx b/frontend/src/pages/AiRemediations.tsx new file mode 100644 index 0000000..5ebfd2e --- /dev/null +++ b/frontend/src/pages/AiRemediations.tsx @@ -0,0 +1,222 @@ +import { useState } from 'react'; +import { useQuery } from '@tanstack/react-query'; +import { Bot, AlertTriangle, Shield, Clock, CheckCircle, XCircle, Loader2, Terminal, ChevronDown, ChevronRight } from 'lucide-react'; +import api from '../lib/api'; + +interface AiRemediation { + id: string; + alert_id: string; + device_id: string; + device_name: string; + device_ip: string; + task_id: string | null; + workflow_id: string | null; + diagnosis: string; + remediation_commands: string[]; + risk_level: 'low' | 'medium' | 'high'; + status: 'pending' | 'waiting_approval' | 'approved' | 'rejected' | 'executing' | 'completed' | 'failed'; + execution_result?: string; + error_message?: string; + created_at: string; + updated_at: string; +} + +export default function AiRemediations() { + const [expandedId, setExpandedId] = useState(null); + + const { data: remediations, isLoading } = useQuery({ + queryKey: ['ai-remediations'], + queryFn: async () => { + const res = await api.get('/api/ai-remediations?limit=50'); + return res.data.data as AiRemediation[]; + }, + refetchInterval: 5000, // Auto-refresh every 5 seconds + }); + + const getStatusBadge = (status: string) => { + const badges: Record = { + pending: { icon: Clock, color: 'bg-gray-500/10 text-gray-500 border-gray-500/30', label: '待处理' }, + waiting_approval: { icon: Shield, color: 'bg-yellow-500/10 text-yellow-500 border-yellow-500/30', label: '等待审批' }, + approved: { icon: CheckCircle, color: 'bg-green-500/10 text-green-500 border-green-500/30', label: '已批准' }, + rejected: { icon: XCircle, color: 'bg-red-500/10 text-red-500 border-red-500/30', label: '已拒绝' }, + executing: { icon: Loader2, color: 'bg-blue-500/10 text-blue-500 border-blue-500/30', label: '执行中' }, + completed: { icon: CheckCircle, color: 'bg-green-500/10 text-green-500 border-green-500/30', label: '已完成' }, + failed: { icon: XCircle, color: 'bg-red-500/10 text-red-500 border-red-500/30', label: '失败' }, + }; + const badge = badges[status] || badges.pending; + const Icon = badge.icon; + return ( + + + {badge.label} + + ); + }; + + const getRiskBadge = (risk: string) => { + const colors: Record = { + low: 'bg-green-500/10 text-green-600 border-green-500/30', + medium: 'bg-yellow-500/10 text-yellow-600 border-yellow-500/30', + high: 'bg-red-500/10 text-red-600 border-red-500/30', + }; + const labels: Record = { low: '低风险', medium: '中风险', high: '高风险' }; + return ( + + {labels[risk] || risk} + + ); + }; + + const formatTime = (dateStr: string) => { + return new Date(dateStr).toLocaleString('zh-CN', { + month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', second: '2-digit', + }); + }; + + return ( +
+ {/* Header */} +
+
+ +
+

AI 修复记录

+

AI 自动生成的修复方案及执行状态

+
+
+
+ 共 {remediations?.length || 0} 条记录 +
+
+ + {/* List */} + {isLoading ? ( +
+ +

加载中...

+
+ ) : !remediations || remediations.length === 0 ? ( +
+ +

暂无 AI 修复记录

+

当 AI 分析告警后自动生成修复方案时,会显示在这里

+
+ ) : ( +
+ {remediations.map((rem) => ( +
+ {/* Summary Row */} +
setExpandedId(expandedId === rem.id ? null : rem.id)} + > +
+
+
+

+ {rem.device_name} ({rem.device_ip}) +

+ {getStatusBadge(rem.status)} + {getRiskBadge(rem.risk_level)} +
+

+ {rem.diagnosis?.split('\n')[0] || 'AI 诊断中...'} +

+
+ 修复命令: {rem.remediation_commands?.length || 0} 条 + 创建: {formatTime(rem.created_at)} + {rem.task_id && 任务: {rem.task_id.slice(0, 8)}...} +
+
+
+ {expandedId === rem.id ? : } +
+
+
+ + {/* Expanded Detail */} + {expandedId === rem.id && ( +
+ {/* Diagnosis */} +
+

+ + AI 诊断报告 +

+
+                      {rem.diagnosis || '暂无诊断结果'}
+                    
+
+ + {/* Commands */} +
+

+ + 修复命令 ({rem.remediation_commands?.length || 0}) +

+
+ {rem.remediation_commands?.map((cmd, i) => ( +
+ {i + 1}. + {cmd} +
+ ))} + {(!rem.remediation_commands || rem.remediation_commands.length === 0) && ( +

AI 未提供修复命令

+ )} +
+
+ + {/* Execution Result */} + {rem.execution_result && ( +
+

执行结果

+
+                        {rem.execution_result}
+                      
+
+ )} + + {/* Error */} + {rem.error_message && ( +
+

错误信息

+
+                        {rem.error_message}
+                      
+
+ )} + + {/* Links */} + +
+ )} +
+ ))} +
+ )} +
+ ); +} diff --git a/frontend/src/pages/Approvals.tsx b/frontend/src/pages/Approvals.tsx new file mode 100644 index 0000000..ca166ce --- /dev/null +++ b/frontend/src/pages/Approvals.tsx @@ -0,0 +1,251 @@ +import { useState } from 'react'; +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; +import { Shield, CheckCircle, XCircle, Clock, AlertCircle, Check, X } from 'lucide-react'; +import api from '../lib/api'; +import { useToast } from '../contexts/ToastContext'; + +interface ApprovalRequest { + id: string; + task_id: string; + node_id: string; + node_label: string; + description: string; + status: 'pending' | 'approved' | 'rejected' | 'timeout'; + requested_by: string; + approved_by?: string; + approved_at?: string; + reject_reason?: string; + timeout_at?: string; + timeout_action: 'reject' | 'wait'; + created_at: string; + updated_at: string; +} + +export default function Approvals() { + const [filter, setFilter] = useState<'all' | 'pending' | 'approved' | 'rejected'>('pending'); + const [rejectModal, setRejectModal] = useState<{ open: boolean; approvalId: string | null }>({ + open: false, + approvalId: null, + }); + const [rejectReason, setRejectReason] = useState(''); + const toast = useToast(); + const queryClient = useQueryClient(); + + const { data: approvals, isLoading } = useQuery({ + queryKey: ['approvals', filter], + queryFn: async () => { + const params = filter !== 'all' ? `?status=${filter}` : ''; + const res = await api.get(`/api/approvals${params}`); + return res.data.data as ApprovalRequest[]; + }, + }); + + const approveMutation = useMutation({ + mutationFn: async (approvalId: string) => { + await api.post(`/api/approvals/${approvalId}/approve`, { comment: '审批通过' }); + }, + onSuccess: () => { + toast.success('审批已通过'); + queryClient.invalidateQueries({ queryKey: ['approvals'] }); + }, + onError: () => { + toast.error('审批失败'); + }, + }); + + const rejectMutation = useMutation({ + mutationFn: async ({ approvalId, reason }: { approvalId: string; reason: string }) => { + await api.post(`/api/approvals/${approvalId}/reject`, { reason }); + }, + onSuccess: () => { + toast.success('审批已拒绝'); + queryClient.invalidateQueries({ queryKey: ['approvals'] }); + setRejectModal({ open: false, approvalId: null }); + setRejectReason(''); + }, + onError: () => { + toast.error('拒绝失败'); + }, + }); + + const getStatusBadge = (status: string) => { + const badges = { + pending: { icon: Clock, color: 'bg-yellow-500/10 text-yellow-500 border-yellow-500/30', label: '待审批' }, + approved: { icon: CheckCircle, color: 'bg-green-500/10 text-green-500 border-green-500/30', label: '已通过' }, + rejected: { icon: XCircle, color: 'bg-red-500/10 text-red-500 border-red-500/30', label: '已拒绝' }, + timeout: { icon: AlertCircle, color: 'bg-gray-500/10 text-gray-500 border-gray-500/30', label: '已超时' }, + }; + const badge = badges[status as keyof typeof badges] || badges.pending; + const Icon = badge.icon; + return ( + + + {badge.label} + + ); + }; + + const formatTime = (dateStr: string) => { + const date = new Date(dateStr); + return date.toLocaleString('zh-CN', { + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + }); + }; + + return ( +
+ {/* Header */} +
+
+ +
+

审批中心

+

管理工作流审批请求

+
+
+
+ + {/* Filter Tabs */} +
+ {[ + { key: 'pending', label: '待审批' }, + { key: 'approved', label: '已通过' }, + { key: 'rejected', label: '已拒绝' }, + { key: 'all', label: '全部' }, + ].map((tab) => ( + + ))} +
+ + {/* Approval List */} + {isLoading ? ( +
加载中...
+ ) : !approvals || approvals.length === 0 ? ( +
+ +

暂无审批记录

+
+ ) : ( +
+ {approvals.map((approval) => ( +
+
+
+
+

+ {approval.node_label} +

+ {getStatusBadge(approval.status)} +
+

{approval.description}

+
+ 任务ID: {approval.task_id.slice(0, 8)}... + 发起人: {approval.requested_by || '系统'} + 发起时间: {formatTime(approval.created_at)} + {approval.timeout_at && approval.status === 'pending' && ( + + 超时: {formatTime(approval.timeout_at)} + + )} +
+ {approval.approved_by && ( +
+ {approval.status === 'approved' ? ( + + ✓ 由 {approval.approved_by} 于 {formatTime(approval.approved_at!)} 通过 + + ) : approval.status === 'rejected' ? ( + + ✗ 由 {approval.approved_by} 于 {formatTime(approval.approved_at!)} 拒绝 + {approval.reject_reason && `: ${approval.reject_reason}`} + + ) : null} +
+ )} +
+ + {approval.status === 'pending' && ( +
+ + +
+ )} +
+
+ ))} +
+ )} + + {/* Reject Modal */} + {rejectModal.open && ( +
+
+

拒绝审批

+