重构: 切换存储至SQLite,启用INI配置与API Key校验

This commit is contained in:
2026-02-09 17:10:11 +08:00
parent d5edbc0723
commit b9828a1b13
30 changed files with 2721 additions and 612 deletions

209
src/gasflux/janitor.py Normal file
View File

@ -0,0 +1,209 @@
"""
Janitor Module
Handles background cleanup of expired tasks and their output directories.
"""
import threading
import time
import shutil
from pathlib import Path
import sqlite3
from flask import current_app
def start_janitor(app):
"""Start the background janitor thread for cleaning up expired tasks."""
def worker():
with app.app_context():
while True:
try:
cleanup_expired_tasks()
except Exception as e:
app.logger.error(f"Janitor cleanup error: {str(e)}", exc_info=True)
finally:
# Sleep for 30 seconds before next cleanup cycle
time.sleep(30)
# Create daemon thread so it doesn't prevent app shutdown
thread = threading.Thread(target=worker, daemon=True, name="janitor")
thread.start()
app.logger.info("Janitor thread started for background cleanup")
def cleanup_expired_tasks():
"""Clean up tasks that have exceeded their deletion time."""
db_path = _get_db_path()
dry_run_config = current_app.config.get('JANITOR_DRY_RUN', 'false')
if isinstance(dry_run_config, str):
dry_run = dry_run_config.lower() == 'true'
else:
dry_run = bool(dry_run_config)
try:
conn = sqlite3.connect(str(db_path), check_same_thread=False)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys=ON")
# Log current time for debugging (using same timezone as setting)
current_time = conn.execute("SELECT datetime('now', '+8 hours')").fetchone()[0]
current_app.logger.info(f"Janitor cleanup check at: {current_time}")
# Debug: Check for tasks with delete_after_at set
debug_rows = conn.execute("""
SELECT task_id, delete_after_at, downloaded_at
FROM tasks
WHERE delete_after_at IS NOT NULL
AND deleted_at IS NULL
""").fetchall()
if debug_rows:
current_app.logger.info(f"Found {len(debug_rows)} tasks with delete_after_at set:")
for row in debug_rows:
current_app.logger.info(f" Task {row['task_id']}: delete_at={row['delete_after_at']}, downloaded_at={row['downloaded_at']}")
# Find tasks that need to be deleted
rows = conn.execute("""
SELECT task_id, output_dir
FROM tasks
WHERE delete_after_at IS NOT NULL
AND deleted_at IS NULL
AND delete_after_at <= datetime('now', '+8 hours')
""").fetchall()
if not rows:
return # No tasks to clean up
current_app.logger.info(f"Janitor found {len(rows)} expired tasks to clean up")
upload_base = Path(current_app.config.get('UPLOAD_FOLDER') or '')
output_base = Path(current_app.config.get('OUTPUT_FOLDER') or '')
for row in rows:
task_id = row['task_id']
output_dir = row['output_dir']
try:
delete_targets = []
# 记录在库中的 output_dir
if output_dir:
p = Path(output_dir)
delete_targets.append(p)
# 兜底:按约定 outputs/<task_id>
derived_output_dir = output_base / task_id if output_base else None
if derived_output_dir and derived_output_dir not in delete_targets:
delete_targets.append(derived_output_dir)
# 同时删除 uploads/<task_id>
derived_upload_dir = upload_base / task_id if upload_base else None
if derived_upload_dir:
delete_targets.append(derived_upload_dir)
if dry_run:
for tgt in delete_targets:
if tgt:
current_app.logger.info(f"[DRY RUN] Would delete task {task_id} path: {tgt}")
else:
# 实际删除
for tgt in delete_targets:
try:
if tgt and tgt.exists():
shutil.rmtree(tgt, ignore_errors=True)
current_app.logger.info(f"Deleted path for task {task_id}: {tgt}")
else:
current_app.logger.warning(f"Path not found for task {task_id}: {tgt}")
except Exception as e:
current_app.logger.error(f"Failed to delete path {tgt} for task {task_id}: {e}")
# Hard delete from database
conn.execute(
"DELETE FROM tasks WHERE task_id = ?",
(task_id,)
)
conn.commit()
current_app.logger.info(f"Hard deleted task {task_id} from database")
except Exception as e:
current_app.logger.error(f"Failed to delete task {task_id}: {str(e)}", exc_info=True)
# Continue with other tasks even if one fails
except Exception as e:
current_app.logger.error(f"Database error during cleanup: {str(e)}", exc_info=True)
finally:
if 'conn' in locals():
conn.close()
def reconcile_tasks_on_startup():
"""Reconcile task states on application startup."""
db_path = _get_db_path()
try:
conn = sqlite3.connect(str(db_path), check_same_thread=False)
conn.execute("PRAGMA foreign_keys=ON")
# Fix tasks that were downloaded but don't have delete_after_at set
# This handles cases where the app crashed after marking downloaded but before setting delete time
conn.execute("""
UPDATE tasks
SET delete_after_at = COALESCE(delete_after_at, datetime(downloaded_at, '+10 minutes'))
WHERE downloaded_at IS NOT NULL
AND deleted_at IS NULL
AND delete_after_at IS NULL
""")
updated_count = conn.total_changes
if updated_count > 0:
current_app.logger.info(f"Startup reconciliation: Fixed delete_after_at for {updated_count} downloaded tasks")
# Check for tasks with output directories that no longer exist
# This helps clean up database entries for manually deleted directories
rows = conn.execute("""
SELECT task_id, output_dir
FROM tasks
WHERE output_dir IS NOT NULL
AND deleted_at IS NULL
""").fetchall()
orphaned_count = 0
for row in rows:
task_id_from_db = row[0] # task_id是第一个字段
output_dir_from_db = row[1] # output_dir是第二个字段
if not Path(output_dir_from_db).exists():
conn.execute(
"DELETE FROM tasks WHERE task_id = ?",
(task_id_from_db,)
)
orphaned_count += 1
if orphaned_count > 0:
current_app.logger.info(f"Startup reconciliation: Marked {orphaned_count} tasks as deleted (directories not found)")
# Backfill output_dir for rows with NULL, using OUTPUT_FOLDER/<task_id>
output_base = Path(current_app.config.get('OUTPUT_FOLDER') or '')
if output_base:
rows2 = conn.execute("""
SELECT task_id FROM tasks
WHERE (output_dir IS NULL OR output_dir = '')
AND deleted_at IS NULL
""").fetchall()
for r in rows2:
task_id_from_db = r[0] # task_id是第一个字段
guess = output_base / task_id_from_db
conn.execute("UPDATE tasks SET output_dir = ? WHERE task_id = ?", (str(guess), task_id_from_db))
current_app.logger.info(f"Backfilled output_dir for task {task_id_from_db}: {guess}")
conn.commit()
except Exception as e:
current_app.logger.error(f"Startup reconciliation error: {str(e)}", exc_info=True)
finally:
if 'conn' in locals():
conn.close()
def _get_db_path():
"""Get database file path from app config."""
from .db import get_db_path as get_config_db_path
return get_config_db_path(current_app)