""" Health Check Blueprint Provides API health monitoring and system status endpoints. """ import os import time import logging from flask import Blueprint, request from ..app import Config, stats_collector from ..shared import task_status, TASK_STATUS_PENDING, TASK_STATUS_PROCESSING from ..shared import _format_response, log_performance, logger # Create blueprint health_bp = Blueprint('health', __name__, url_prefix='/health') @health_bp.route('', methods=['GET']) @log_performance def health_check(): """API Health Check""" logger.debug("Health check requested") try: # Check storage accessibility uploads_writable = os.access(Config.UPLOAD_FOLDER, os.W_OK) outputs_writable = os.access(Config.OUTPUT_FOLDER, os.W_OK) # Check active tasks active_tasks = len([t for t in task_status.values() if t.get("status") in [TASK_STATUS_PENDING, TASK_STATUS_PROCESSING]]) # Get basic stats for health check stats_summary = stats_collector.get_summary() health_data = { "status": "healthy", "version": "1.0.0", "timestamp": time.time(), "uptime": stats_summary['summary']['uptime_formatted'], "storage": { "uploads_writable": uploads_writable, "outputs_writable": outputs_writable }, "tasks": { "active_count": active_tasks, "total_tracked": len(task_status), "total_processed": stats_summary['tasks']['total_completed'] + stats_summary['tasks']['total_failed'], "success_rate_percent": stats_summary['tasks']['success_rate_percent'] }, "performance": { "requests_per_second": stats_summary['summary']['requests_per_second'], "avg_response_time_ms": stats_summary['performance']['avg_response_time_ms'], "error_rate_percent": stats_summary['summary']['error_rate_percent'] } } # Determine health status based on metrics is_healthy = True issues = [] if not uploads_writable: issues.append("上传文件夹不可写") is_healthy = False if not outputs_writable: issues.append("输出文件夹不可写") is_healthy = False if active_tasks > 20: # High load threshold issues.append(f"活跃任务数量过多 ({active_tasks})") if stats_summary['summary']['error_rate_percent'] > 10: # High error rate issues.append(f"错误率过高 ({stats_summary['summary']['error_rate_percent']:.1f}%)") is_healthy = False health_data["status"] = "healthy" if is_healthy else "degraded" if issues: health_data["issues"] = issues # Log warnings for potential issues for issue in issues: logger.warning(f"Health check issue: {issue}") status_level = logging.DEBUG if is_healthy else logging.WARNING logger.log(status_level, f"Health check: {health_data['status']} (active tasks: {active_tasks})") status_code = 200 if is_healthy else 503 # 503 Service Unavailable for degraded return _format_response(status_code, "健康检查完成" if is_healthy else "服务不可用", health_data) except Exception as e: logger.error(f"Health check failed: {str(e)}", exc_info=True) return _format_response(500, "健康检查失败", { "status": "unhealthy", "error": str(e), "timestamp": time.time() })