{ config, pkgs, flake, ... }: let inherit (flake.config.services.instances) comfyui web; service = comfyui; localhost = web.localhost.address1; host = service.domains.url0; in { # Enable Docker (nvidia-container-toolkit will auto-enable via graphics.nix) virtualisation.docker = { enable = true; enableNvidia = true; autoPrune = { enable = true; dates = "weekly"; }; }; # hardware.nvidia-container-toolkit.enable = true; # Create ComfyUI Docker container virtualisation.oci-containers = { backend = "docker"; containers.comfyui = { image = "yanwk/comfyui-boot:cu126-slim"; autoStart = true; ports = [ "${localhost}:${toString service.ports.port0}:8188" ]; volumes = [ # Main data directory "${service.paths.path0}:/root" # Specific subdirectories for better control "${service.paths.path0}/models:/root/models" "${service.paths.path0}/custom_nodes:/root/custom_nodes" "${service.paths.path0}/output:/root/output" "${service.paths.path0}/input:/root/input" "${service.paths.path0}/user:/root/user" ]; environment = { CLI_ARGS = "--listen 0.0.0.0 --port 8188 --preview-method auto --dont-print-server"; NVIDIA_VISIBLE_DEVICES = "0"; NVIDIA_DRIVER_CAPABILITIES = "compute,utility,graphics"; }; extraOptions = [ "--runtime=nvidia" "--gpus=device=0" # Memory limits to prevent OOM "--memory=32g" "--memory-swap=32g" "--shm-size=16g" # Security "--security-opt=no-new-privileges:true" # Network "--network=bridge" # Health check "--health-cmd=curl -f http://localhost:8188/ || exit 1" "--health-interval=30s" "--health-timeout=10s" "--health-retries=3" ]; }; }; # Caddy reverse proxy with WebSocket support services.caddy = { virtualHosts = { "${host}" = { extraConfig = '' basicauth { {$CADDY_AUTH_USER} {$CADDY_AUTH_PASSWORD_HASH} } # Main reverse proxy with WebSocket support reverse_proxy ${localhost}:${toString service.ports.port0} { header_up Host {host} header_up X-Real-IP {remote} header_up X-Forwarded-For {remote} header_up X-Forwarded-Proto {scheme} # WebSocket support - critical for ComfyUI real-time updates header_up Connection {>Connection} header_up Upgrade {>Upgrade} # Longer timeouts for generation tasks transport http { read_timeout 300s write_timeout 300s } } tls ${service.ssl.cert} ${service.ssl.key} # Security headers header { Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" X-Frame-Options "DENY" X-Content-Type-Options "nosniff" X-XSS-Protection "1; mode=block" Referrer-Policy "strict-origin-when-cross-origin" -Server } # Logging log { output file /var/log/caddy/comfyui-access.log format json } ''; }; }; }; # Setup directories with proper permissions systemd.tmpfiles.rules = [ "d ${service.paths.path0} 755 root root -" "Z ${service.sops.path0} 755 caddy caddy -" "d /var/log/caddy 755 caddy caddy -" ]; # Kernel tuning for AI workloads boot.kernel.sysctl = { "kernel.shmmax" = 68719476736; "kernel.shmall" = 4194304; "vm.swappiness" = 1; "vm.dirty_ratio" = 15; "vm.dirty_background_ratio" = 5; }; # Ensure Docker service waits for network systemd.services.docker-comfyui = { after = [ "network-online.target" "docker.service" ]; wants = [ "network-online.target" ]; requires = [ "docker.service" ]; }; }