Fix multi-worker temp file issue with file-based storage

- New temp_storage.py module stores files on disk instead of in-memory
- Multiple Gunicorn workers can now share temp files
- Startup cleanup removes leftover files from previous runs
- Dockerfile creates temp_files directory
- Added temp_files/ to .gitignore

Previously encode preview worked but download failed with "File expired"
because each worker had its own in-memory TEMP_FILES dict.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Aaron D. Lee
2026-01-05 22:40:42 -05:00
parent d8eb7b0160
commit 6a5b12f98e
4 changed files with 257 additions and 48 deletions

View File

@@ -83,6 +83,7 @@ from flask import (
)
from PIL import Image
from ssl_utils import ensure_certs
import temp_storage
os.environ["NUMPY_MADVISE_HUGEPAGE"] = "0"
os.environ["OMP_NUM_THREADS"] = "1"
@@ -257,9 +258,10 @@ def require_setup():
return None
# Temporary file storage for sharing (file_id -> {data, timestamp, filename})
TEMP_FILES: dict[str, dict] = {}
THUMBNAIL_FILES: dict[str, bytes] = {}
# DEPRECATED: In-memory storage replaced by file-based temp_storage module
# Kept for backwards compatibility during transition
TEMP_FILES: dict[str, dict] = {} # Not used - see temp_storage.py
THUMBNAIL_FILES: dict[str, bytes] = {} # Not used - see temp_storage.py
# ============================================================================
@@ -397,16 +399,7 @@ def generate_thumbnail(image_data: bytes, size: tuple = THUMBNAIL_SIZE) -> bytes
def cleanup_temp_files():
"""Remove expired temporary files."""
now = time.time()
expired = [
fid for fid, info in TEMP_FILES.items() if now - info["timestamp"] > TEMP_FILE_EXPIRY
]
for fid in expired:
TEMP_FILES.pop(fid, None)
# Also clean up corresponding thumbnail
thumb_id = f"{fid}_thumb"
THUMBNAIL_FILES.pop(thumb_id, None)
temp_storage.cleanup_expired(TEMP_FILE_EXPIRY)
def allowed_image(filename: str) -> bool:
@@ -563,13 +556,11 @@ def generate():
if not qr_too_large:
qr_token = secrets.token_urlsafe(16)
cleanup_temp_files()
TEMP_FILES[qr_token] = {
"data": creds.rsa_key_pem.encode(),
temp_storage.save_temp_file(qr_token, creds.rsa_key_pem.encode(), {
"filename": "rsa_key.pem",
"timestamp": time.time(),
"type": "rsa_key",
"compress": qr_needs_compression,
}
})
# v3.2.0: Single passphrase instead of daily phrases
return render_template(
@@ -606,10 +597,10 @@ def generate_qr(token):
if not HAS_QRCODE:
return "QR code support not available", 501
if token not in TEMP_FILES:
file_info = temp_storage.get_temp_file(token)
if not file_info:
return "Token expired or invalid", 404
file_info = TEMP_FILES[token]
if file_info.get("type") != "rsa_key":
return "Invalid token type", 400
@@ -630,10 +621,10 @@ def generate_qr_download(token):
if not HAS_QRCODE:
return "QR code support not available", 501
if token not in TEMP_FILES:
file_info = temp_storage.get_temp_file(token)
if not file_info:
return "Token expired or invalid", 404
file_info = TEMP_FILES[token]
if file_info.get("type") != "rsa_key":
return "Invalid token type", 400
@@ -933,17 +924,15 @@ def _run_encode_job(job_id: str, encode_params: dict) -> None:
# Store result
file_id = secrets.token_urlsafe(16)
TEMP_FILES[file_id] = {
"data": encode_result.stego_data,
temp_storage.save_temp_file(file_id, encode_result.stego_data, {
"filename": filename,
"timestamp": time.time(),
"embed_mode": embed_mode,
"output_format": dct_output_format if embed_mode == "dct" else "png",
"color_mode": dct_color_mode if embed_mode == "dct" else None,
"mime_type": output_mime,
"channel_mode": encode_result.channel_mode,
"channel_fingerprint": encode_result.channel_fingerprint,
}
})
_store_job(
job_id,
@@ -1212,10 +1201,8 @@ def encode_page():
# Store temporarily
file_id = secrets.token_urlsafe(16)
cleanup_temp_files()
TEMP_FILES[file_id] = {
"data": encode_result.stego_data,
temp_storage.save_temp_file(file_id, encode_result.stego_data, {
"filename": filename,
"timestamp": time.time(),
"embed_mode": embed_mode,
"output_format": dct_output_format if embed_mode == "dct" else "png",
"color_mode": dct_color_mode if embed_mode == "dct" else None,
@@ -1223,7 +1210,7 @@ def encode_page():
# Channel info (v4.0.0)
"channel_mode": encode_result.channel_mode,
"channel_fingerprint": encode_result.channel_fingerprint,
}
})
return redirect(url_for("encode_result", file_id=file_id))
@@ -1290,19 +1277,18 @@ def encode_progress(job_id):
@app.route("/encode/result/<file_id>")
@login_required
def encode_result(file_id):
if file_id not in TEMP_FILES:
file_info = temp_storage.get_temp_file(file_id)
if not file_info:
flash("File expired or not found. Please encode again.", "error")
return redirect(url_for("encode_page"))
file_info = TEMP_FILES[file_id]
# Generate thumbnail
thumbnail_data = generate_thumbnail(file_info["data"])
thumbnail_id = None
if thumbnail_data:
thumbnail_id = f"{file_id}_thumb"
THUMBNAIL_FILES[thumbnail_id] = thumbnail_data
temp_storage.save_thumbnail(thumbnail_id, thumbnail_data)
return render_template(
"encode_result.html",
@@ -1322,22 +1308,23 @@ def encode_result(file_id):
@login_required
def encode_thumbnail(thumb_id):
"""Serve thumbnail image."""
if thumb_id not in THUMBNAIL_FILES:
thumb_data = temp_storage.get_thumbnail(thumb_id)
if not thumb_data:
return "Thumbnail not found", 404
return send_file(
io.BytesIO(THUMBNAIL_FILES[thumb_id]), mimetype="image/jpeg", as_attachment=False
io.BytesIO(thumb_data), mimetype="image/jpeg", as_attachment=False
)
@app.route("/encode/download/<file_id>")
@login_required
def encode_download(file_id):
if file_id not in TEMP_FILES:
file_info = temp_storage.get_temp_file(file_id)
if not file_info:
flash("File expired or not found.", "error")
return redirect(url_for("encode_page"))
file_info = TEMP_FILES[file_id]
mime_type = file_info.get("mime_type", "image/png")
return send_file(
@@ -1352,10 +1339,10 @@ def encode_download(file_id):
@login_required
def encode_file_route(file_id):
"""Serve file for Web Share API."""
if file_id not in TEMP_FILES:
file_info = temp_storage.get_temp_file(file_id)
if not file_info:
return "Not found", 404
file_info = TEMP_FILES[file_id]
mime_type = file_info.get("mime_type", "image/png")
return send_file(
@@ -1370,11 +1357,11 @@ def encode_file_route(file_id):
@login_required
def encode_cleanup(file_id):
"""Manually cleanup a file after sharing."""
TEMP_FILES.pop(file_id, None)
temp_storage.delete_temp_file(file_id)
# Also cleanup thumbnail if exists
thumb_id = f"{file_id}_thumb"
THUMBNAIL_FILES.pop(thumb_id, None)
temp_storage.delete_thumbnail(thumb_id)
return jsonify({"status": "ok"})
@@ -1497,12 +1484,10 @@ def decode_page():
cleanup_temp_files()
filename = decode_result.filename or "decoded_file"
TEMP_FILES[file_id] = {
"data": decode_result.file_data,
temp_storage.save_temp_file(file_id, decode_result.file_data, {
"filename": filename,
"mime_type": decode_result.mime_type,
"timestamp": time.time(),
}
})
return render_template(
"decode.html",
@@ -1559,11 +1544,11 @@ def decode_page():
@login_required
def decode_download(file_id):
"""Download decoded file."""
if file_id not in TEMP_FILES:
file_info = temp_storage.get_temp_file(file_id)
if not file_info:
flash("File expired or not found.", "error")
return redirect(url_for("decode_page"))
file_info = TEMP_FILES[file_id]
mime_type = file_info.get("mime_type", "application/octet-stream")
return send_file(
@@ -2320,6 +2305,12 @@ def admin_user_password_reset():
if __name__ == "__main__":
base_dir = Path(__file__).parent
# Clean up any leftover temp files from previous runs
temp_storage.init(base_dir / "temp_files")
cleaned = temp_storage.cleanup_all()
if cleaned > 0:
print(f"Cleaned up {cleaned} leftover temp files from previous run")
# HTTPS configuration
ssl_context = None
if app.config.get("HTTPS_ENABLED", False):

View File

@@ -0,0 +1,214 @@
"""
File-based Temporary Storage
Stores temp files on disk instead of in-memory dict.
This allows multiple Gunicorn workers to share temp files
and survives service restarts within the expiry window.
Files are stored in a temp directory with:
- {file_id}.data - The actual file data
- {file_id}.json - Metadata (filename, timestamp, mime_type, etc.)
IMPORTANT: This module ONLY manages files in the temp_files/ directory.
It does NOT touch instance/ (auth database) or any other directories.
"""
import json
import os
import shutil
import time
from pathlib import Path
from threading import Lock
# Default temp directory (can be overridden)
DEFAULT_TEMP_DIR = Path(__file__).parent / "temp_files"
# Lock for thread-safe operations
_lock = Lock()
# Module-level temp directory (set on init)
_temp_dir: Path = DEFAULT_TEMP_DIR
def init(temp_dir: Path | str | None = None):
"""Initialize temp storage with optional custom directory."""
global _temp_dir
_temp_dir = Path(temp_dir) if temp_dir else DEFAULT_TEMP_DIR
_temp_dir.mkdir(parents=True, exist_ok=True)
def _data_path(file_id: str) -> Path:
"""Get path for file data."""
return _temp_dir / f"{file_id}.data"
def _meta_path(file_id: str) -> Path:
"""Get path for file metadata."""
return _temp_dir / f"{file_id}.json"
def _thumb_path(thumb_id: str) -> Path:
"""Get path for thumbnail data."""
return _temp_dir / f"{thumb_id}.thumb"
def save_temp_file(file_id: str, data: bytes, metadata: dict) -> None:
"""
Save a temp file with its metadata.
Args:
file_id: Unique identifier for the file
data: File contents as bytes
metadata: Dict with filename, mime_type, timestamp, etc.
"""
init() # Ensure directory exists
with _lock:
# Add timestamp if not present
if "timestamp" not in metadata:
metadata["timestamp"] = time.time()
# Write data file
_data_path(file_id).write_bytes(data)
# Write metadata
_meta_path(file_id).write_text(json.dumps(metadata))
def get_temp_file(file_id: str) -> dict | None:
"""
Get a temp file and its metadata.
Returns:
Dict with 'data' (bytes) and all metadata fields, or None if not found.
"""
init()
data_file = _data_path(file_id)
meta_file = _meta_path(file_id)
if not data_file.exists() or not meta_file.exists():
return None
try:
data = data_file.read_bytes()
metadata = json.loads(meta_file.read_text())
return {"data": data, **metadata}
except (OSError, json.JSONDecodeError):
return None
def has_temp_file(file_id: str) -> bool:
"""Check if a temp file exists."""
init()
return _data_path(file_id).exists() and _meta_path(file_id).exists()
def delete_temp_file(file_id: str) -> None:
"""Delete a temp file and its metadata."""
init()
with _lock:
_data_path(file_id).unlink(missing_ok=True)
_meta_path(file_id).unlink(missing_ok=True)
def save_thumbnail(thumb_id: str, data: bytes) -> None:
"""Save a thumbnail."""
init()
with _lock:
_thumb_path(thumb_id).write_bytes(data)
def get_thumbnail(thumb_id: str) -> bytes | None:
"""Get thumbnail data."""
init()
thumb_file = _thumb_path(thumb_id)
if not thumb_file.exists():
return None
try:
return thumb_file.read_bytes()
except OSError:
return None
def delete_thumbnail(thumb_id: str) -> None:
"""Delete a thumbnail."""
init()
with _lock:
_thumb_path(thumb_id).unlink(missing_ok=True)
def cleanup_expired(max_age_seconds: float) -> int:
"""
Delete expired temp files.
Args:
max_age_seconds: Maximum age in seconds before expiry
Returns:
Number of files deleted
"""
init()
now = time.time()
deleted = 0
with _lock:
# Find all metadata files
for meta_file in _temp_dir.glob("*.json"):
try:
metadata = json.loads(meta_file.read_text())
timestamp = metadata.get("timestamp", 0)
if now - timestamp > max_age_seconds:
file_id = meta_file.stem
_data_path(file_id).unlink(missing_ok=True)
meta_file.unlink(missing_ok=True)
# Also delete thumbnail if exists
_thumb_path(f"{file_id}_thumb").unlink(missing_ok=True)
deleted += 1
except (OSError, json.JSONDecodeError):
# Remove corrupted files
meta_file.unlink(missing_ok=True)
deleted += 1
return deleted
def cleanup_all() -> int:
"""
Delete all temp files. Call on service start/stop.
Returns:
Number of files deleted
"""
init()
deleted = 0
with _lock:
for f in _temp_dir.iterdir():
if f.is_file():
f.unlink(missing_ok=True)
deleted += 1
return deleted
def get_stats() -> dict:
"""Get temp storage statistics."""
init()
files = list(_temp_dir.glob("*.data"))
total_size = sum(f.stat().st_size for f in files if f.exists())
return {
"file_count": len(files),
"total_size_bytes": total_size,
"temp_dir": str(_temp_dir),
}