dotfiles/modules/nixos/services/comfyui/default.nix
2025-10-01 19:51:55 -05:00

148 lines
3.9 KiB
Nix
Executable file

{
config,
pkgs,
flake,
...
}:
let
inherit (flake.config.services.instances) comfyui web;
service = comfyui;
localhost = web.localhost.address1;
host = service.domains.url0;
in
{
# Enable Docker (nvidia-container-toolkit will auto-enable via graphics.nix)
virtualisation.docker = {
enable = true;
enableNvidia = true;
autoPrune = {
enable = true;
dates = "weekly";
};
};
# hardware.nvidia-container-toolkit.enable = true;
# Create ComfyUI Docker container
virtualisation.oci-containers = {
backend = "docker";
containers.comfyui = {
image = "yanwk/comfyui-boot:cu126-slim";
autoStart = true;
ports = [
"${localhost}:${toString service.ports.port0}:8188"
];
volumes = [
# Main data directory
"${service.paths.path0}:/root"
# Specific subdirectories for better control
"${service.paths.path0}/models:/root/models"
"${service.paths.path0}/custom_nodes:/root/custom_nodes"
"${service.paths.path0}/output:/root/output"
"${service.paths.path0}/input:/root/input"
"${service.paths.path0}/user:/root/user"
];
environment = {
CLI_ARGS = "--listen 0.0.0.0 --port 8188 --preview-method auto --dont-print-server";
NVIDIA_VISIBLE_DEVICES = "0";
NVIDIA_DRIVER_CAPABILITIES = "compute,utility,graphics";
};
extraOptions = [
"--runtime=nvidia"
"--gpus=device=0"
# Memory limits to prevent OOM
"--memory=32g"
"--memory-swap=32g"
"--shm-size=16g"
# Security
"--security-opt=no-new-privileges:true"
# Network
"--network=bridge"
# Health check
"--health-cmd=curl -f http://localhost:8188/ || exit 1"
"--health-interval=30s"
"--health-timeout=10s"
"--health-retries=3"
];
};
};
# Caddy reverse proxy with WebSocket support
services.caddy = {
virtualHosts = {
"${host}" = {
extraConfig = ''
basicauth {
{$CADDY_AUTH_USER} {$CADDY_AUTH_PASSWORD_HASH}
}
# Main reverse proxy with WebSocket support
reverse_proxy ${localhost}:${toString service.ports.port0} {
header_up Host {host}
header_up X-Real-IP {remote}
header_up X-Forwarded-For {remote}
header_up X-Forwarded-Proto {scheme}
# WebSocket support - critical for ComfyUI real-time updates
header_up Connection {>Connection}
header_up Upgrade {>Upgrade}
# Longer timeouts for generation tasks
transport http {
read_timeout 300s
write_timeout 300s
}
}
tls ${service.ssl.cert} ${service.ssl.key}
# Security headers
header {
Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
X-Frame-Options "DENY"
X-Content-Type-Options "nosniff"
X-XSS-Protection "1; mode=block"
Referrer-Policy "strict-origin-when-cross-origin"
-Server
}
# Logging
log {
output file /var/log/caddy/comfyui-access.log
format json
}
'';
};
};
};
# Setup directories with proper permissions
systemd.tmpfiles.rules = [
"d ${service.paths.path0} 755 root root -"
"Z ${service.sops.path0} 755 caddy caddy -"
"d /var/log/caddy 755 caddy caddy -"
];
# Kernel tuning for AI workloads
boot.kernel.sysctl = {
"kernel.shmmax" = 68719476736;
"kernel.shmall" = 4194304;
"vm.swappiness" = 1;
"vm.dirty_ratio" = 15;
"vm.dirty_background_ratio" = 5;
};
# Ensure Docker service waits for network
systemd.services.docker-comfyui = {
after = [
"network-online.target"
"docker.service"
];
wants = [ "network-online.target" ];
requires = [ "docker.service" ];
};
}