增加web_api
This commit is contained in:
94
src/gasflux/blueprints/health.py
Normal file
94
src/gasflux/blueprints/health.py
Normal file
@ -0,0 +1,94 @@
|
||||
"""
|
||||
Health Check Blueprint
|
||||
Provides API health monitoring and system status endpoints.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from flask import Blueprint, request
|
||||
|
||||
from ..app import Config, stats_collector
|
||||
from ..shared import task_status, TASK_STATUS_PENDING, TASK_STATUS_PROCESSING
|
||||
from ..shared import _format_response, log_performance, logger
|
||||
|
||||
# Create blueprint
|
||||
health_bp = Blueprint('health', __name__, url_prefix='/health')
|
||||
|
||||
|
||||
@health_bp.route('', methods=['GET'])
|
||||
@log_performance
|
||||
def health_check():
|
||||
"""API Health Check"""
|
||||
logger.debug("Health check requested")
|
||||
|
||||
try:
|
||||
# Check storage accessibility
|
||||
uploads_writable = os.access(Config.UPLOAD_FOLDER, os.W_OK)
|
||||
outputs_writable = os.access(Config.OUTPUT_FOLDER, os.W_OK)
|
||||
|
||||
# Check active tasks
|
||||
active_tasks = len([t for t in task_status.values() if t.get("status") in [TASK_STATUS_PENDING, TASK_STATUS_PROCESSING]])
|
||||
|
||||
# Get basic stats for health check
|
||||
stats_summary = stats_collector.get_summary()
|
||||
|
||||
health_data = {
|
||||
"status": "healthy",
|
||||
"version": "1.0.0",
|
||||
"timestamp": time.time(),
|
||||
"uptime": stats_summary['summary']['uptime_formatted'],
|
||||
"storage": {
|
||||
"uploads_writable": uploads_writable,
|
||||
"outputs_writable": outputs_writable
|
||||
},
|
||||
"tasks": {
|
||||
"active_count": active_tasks,
|
||||
"total_tracked": len(task_status),
|
||||
"total_processed": stats_summary['tasks']['total_completed'] + stats_summary['tasks']['total_failed'],
|
||||
"success_rate_percent": stats_summary['tasks']['success_rate_percent']
|
||||
},
|
||||
"performance": {
|
||||
"requests_per_second": stats_summary['summary']['requests_per_second'],
|
||||
"avg_response_time_ms": stats_summary['performance']['avg_response_time_ms'],
|
||||
"error_rate_percent": stats_summary['summary']['error_rate_percent']
|
||||
}
|
||||
}
|
||||
|
||||
# Determine health status based on metrics
|
||||
is_healthy = True
|
||||
issues = []
|
||||
|
||||
if not uploads_writable:
|
||||
issues.append("上传文件夹不可写")
|
||||
is_healthy = False
|
||||
if not outputs_writable:
|
||||
issues.append("输出文件夹不可写")
|
||||
is_healthy = False
|
||||
if active_tasks > 20: # High load threshold
|
||||
issues.append(f"活跃任务数量过多 ({active_tasks})")
|
||||
if stats_summary['summary']['error_rate_percent'] > 10: # High error rate
|
||||
issues.append(f"错误率过高 ({stats_summary['summary']['error_rate_percent']:.1f}%)")
|
||||
is_healthy = False
|
||||
|
||||
health_data["status"] = "healthy" if is_healthy else "degraded"
|
||||
if issues:
|
||||
health_data["issues"] = issues
|
||||
|
||||
# Log warnings for potential issues
|
||||
for issue in issues:
|
||||
logger.warning(f"Health check issue: {issue}")
|
||||
|
||||
status_level = logging.DEBUG if is_healthy else logging.WARNING
|
||||
logger.log(status_level, f"Health check: {health_data['status']} (active tasks: {active_tasks})")
|
||||
|
||||
status_code = 200 if is_healthy else 503 # 503 Service Unavailable for degraded
|
||||
return _format_response(status_code, "健康检查完成" if is_healthy else "服务不可用", health_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {str(e)}", exc_info=True)
|
||||
return _format_response(500, "健康检查失败", {
|
||||
"status": "unhealthy",
|
||||
"error": str(e),
|
||||
"timestamp": time.time()
|
||||
})
|
||||
Reference in New Issue
Block a user