dotfiles/example/host/default.nix
2025-11-07 13:36:30 -06:00

335 lines
12 KiB
Nix
Executable file

{
pkgs,
config,
lib,
...
}:
let
inherit (config.microvm) stateDir;
microvmCommand = import ../../pkgs/microvm-command.nix {
inherit pkgs;
};
user = "microvm";
group = "kvm";
in
{
imports = [ ./options.nix ];
config = lib.mkIf config.microvm.host.enable {
assertions = lib.concatMap (vmName: [
{
assertion =
config.microvm.vms.${vmName}.config != null -> config.microvm.vms.${vmName}.flake == null;
message = "vm ${vmName}: Fully-declarative VMs cannot also set a flake!";
}
{
assertion =
config.microvm.vms.${vmName}.config != null -> config.microvm.vms.${vmName}.updateFlake == null;
message = "vm ${vmName}: Fully-declarative VMs cannot set a updateFlake!";
}
]) (builtins.attrNames config.microvm.vms);
boot.kernelModules = [ "tun" ];
system.activationScripts.microvm-host = ''
mkdir -p ${stateDir}
chown ${user}:${group} ${stateDir}
chmod g+w ${stateDir}
'';
environment.systemPackages = [
microvmCommand
];
users.users.${user} = {
isSystemUser = true;
inherit group;
};
security.pam.loginLimits = [
{
domain = user;
item = "memlock";
type = "hard";
value = "infinity";
}
{
domain = user;
item = "memlock";
type = "soft";
value = "infinity";
}
];
systemd.services =
builtins.foldl'
(
result: name:
result
// (
let
microvmConfig = config.microvm.vms.${name};
inherit (microvmConfig) flake updateFlake;
isFlake = flake != null;
guestConfig =
if isFlake then flake.nixosConfigurations.${name}.config else microvmConfig.config.config;
runner = guestConfig.microvm.declaredRunner;
in
{
"install-microvm-${name}" = {
description = "Install MicroVM '${name}'";
before = [
"microvm@${name}.service"
"microvm-tap-interfaces@${name}.service"
"microvm-pci-devices@${name}.service"
"microvm-virtiofsd@${name}.service"
];
partOf = [ "microvm@${name}.service" ];
wantedBy = [ "microvms.target" ];
# Run on every rebuild for fully-declarative MicroVMs and flake-based MicroVMs without updateFlake.
# For MicroVMs with updateFlake set, only run on initial installation.
unitConfig.ConditionPathExists = lib.mkIf (isFlake && updateFlake != null) "!${stateDir}/${name}";
serviceConfig.Type = "oneshot";
script = ''
mkdir -p ${stateDir}/${name}
cd ${stateDir}/${name}
ln -sTf ${runner} current
chown -h ${user}:${group} . current
''
# Including the toplevel here is crucial to have the service definition
# change when the host is rebuilt and the vm definition changed.
+ lib.optionalString (!isFlake) ''
ln -sTf ${guestConfig.system.build.toplevel} toplevel
''
# Declarative deployment requires storing just the flake
+ lib.optionalString isFlake ''
echo '${if updateFlake != null then updateFlake else flake}' > flake
chown -h ${user}:${group} flake
'';
serviceConfig.SyslogIdentifier = "install-microvm-${name}";
};
"microvm@${name}" = {
# restartIfChanged is opt-out, so we have to include the definition unconditionally
serviceConfig.X-RestartIfChanged = [
""
microvmConfig.restartIfChanged
];
path = lib.mkForce [ ];
# If the given declarative microvm wants to be restarted on change,
# We have to make sure this service group is restarted. To make sure
# that this service is also changed when the microvm configuration changes,
# we also have to include a trigger here.
restartTriggers = [ guestConfig.system.build.toplevel ];
overrideStrategy = "asDropin";
serviceConfig.Type =
if guestConfig.microvm.declaredRunner.supportsNotifySocket then "notify" else "simple";
};
"microvm-tap-interfaces@${name}" = {
serviceConfig.X-RestartIfChanged = [
""
microvmConfig.restartIfChanged
];
path = lib.mkForce [ ];
overrideStrategy = "asDropin";
};
"microvm-pci-devices@${name}" = {
serviceConfig.X-RestartIfChanged = [
""
microvmConfig.restartIfChanged
];
path = lib.mkForce [ ];
overrideStrategy = "asDropin";
};
"microvm-virtiofsd@${name}" = {
serviceConfig.X-RestartIfChanged = [
""
microvmConfig.restartIfChanged
];
path = lib.mkForce [ ];
overrideStrategy = "asDropin";
};
}
)
)
{
"microvm-tap-interfaces@" = {
description = "Setup MicroVM '%i' TAP interfaces";
before = [ "microvm@%i.service" ];
partOf = [ "microvm@%i.service" ];
after = [ "network.target" ];
unitConfig.ConditionPathExists = "${stateDir}/%i/current/bin/tap-up";
restartIfChanged = false;
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
SyslogIdentifier = "microvm-tap-interfaces@%i";
ExecStart = "${stateDir}/%i/current/bin/tap-up";
ExecStop = "${stateDir}/%i/booted/bin/tap-down";
};
};
"microvm-macvtap-interfaces@" = {
description = "Setup MicroVM '%i' MACVTAP interfaces";
before = [ "microvm@%i.service" ];
partOf = [ "microvm@%i.service" ];
unitConfig.ConditionPathExists = "${stateDir}/%i/current/bin/macvtap-up";
restartIfChanged = false;
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
SyslogIdentifier = "microvm-macvtap-interfaces@%i";
ExecStart = "${stateDir}/%i/current/bin/macvtap-up";
ExecStop = "${stateDir}/%i/booted/bin/macvtap-down";
};
};
"microvm-pci-devices@" = {
description = "Setup MicroVM '%i' devices for passthrough";
before = [ "microvm@%i.service" ];
partOf = [ "microvm@%i.service" ];
unitConfig.ConditionPathExists = "${stateDir}/%i/current/bin/pci-setup";
restartIfChanged = false;
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
SyslogIdentifier = "microvm-pci-devices@%i";
ExecStart = "${stateDir}/%i/current/bin/pci-setup";
};
};
"microvm-virtiofsd@" =
let
runFromBootedOrCurrent = pkgs.writeShellScript "microvm-runFromBootedOrCurrent" ''
NAME="$1"
VM="$2"
cd "${stateDir}/$VM"
if [ -e booted ]; then
exec booted/bin/$NAME
else
exec current/bin/$NAME
fi
'';
in
{
description = "VirtioFS daemons for MicroVM '%i'";
before = [ "microvm@%i.service" ];
after = [ "local-fs.target" ];
partOf = [ "microvm@%i.service" ];
unitConfig.ConditionPathExists = "${stateDir}/%i/current/bin/virtiofsd-run";
restartIfChanged = false;
serviceConfig = {
WorkingDirectory = "${stateDir}/%i";
ExecStart = "${stateDir}/%i/current/bin/virtiofsd-run";
ExecStop = "${runFromBootedOrCurrent} virtiofsd-shutdown %i";
LimitNOFILE = 1048576;
NotifyAccess = "all";
PrivateTmp = "yes";
Restart = "always";
RestartSec = "5s";
SyslogIdentifier = "microvm-virtiofsd@%i";
Type = "notify";
};
};
"microvm@" = {
description = "MicroVM '%i'";
requires = [
"microvm-tap-interfaces@%i.service"
"microvm-macvtap-interfaces@%i.service"
"microvm-pci-devices@%i.service"
"microvm-virtiofsd@%i.service"
];
after = [
"network.target"
"microvm-tap-interfaces@%i.service"
"microvm-macvtap-interfaces@%i.service"
"microvm-pci-devices@%i.service"
"microvm-virtiofsd@%i.service"
];
unitConfig.ConditionPathExists = "${stateDir}/%i/current/bin/microvm-run";
restartIfChanged = false;
preStart = ''
rm -f booted
ln -s $(readlink current) booted
'';
postStop = ''
rm booted
'';
serviceConfig = {
Type = if config.microvm.host.useNotifySockets then "notify" else "simple";
WorkingDirectory = "${stateDir}/%i";
ExecStart = "${stateDir}/%i/current/bin/microvm-run";
ExecStop = "${stateDir}/%i/booted/bin/microvm-shutdown";
TimeoutSec = config.microvm.host.startupTimeout;
Restart = "always";
RestartSec = "5s";
User = user;
Group = group;
SyslogIdentifier = "microvm@%i";
LimitNOFILE = 1048576;
NotifyAccess = "all";
LimitMEMLOCK = "infinity";
};
};
}
(builtins.attrNames config.microvm.vms);
microvm.autostart = builtins.filter (vmName: config.microvm.vms.${vmName}.autostart) (
builtins.attrNames config.microvm.vms
);
# Starts all the containers after boot
systemd.targets.microvms = {
wantedBy = [ "multi-user.target" ];
wants = map (name: "microvm@${name}.service") config.microvm.autostart;
};
# This helper creates tap interfaces and attaches them to a bridge
# for qemu regardless if it is run as root or not.
security.wrappers.qemu-bridge-helper = lib.mkIf (!config.virtualisation.libvirtd.enable) {
source = "${pkgs.qemu-utils}/libexec/qemu-bridge-helper";
owner = "root";
group = "root";
capabilities = "cap_net_admin+ep";
};
# You must define this file with your bridge interfaces if you
# intend to use qemu-bridge-helper through a `type = "bridge"`
# interface.
environment.etc."qemu/bridge.conf".text = lib.mkDefault ''
allow all
'';
# Enable Kernel Same-Page Merging
hardware.ksm.enable = lib.mkDefault true;
# TODO: remove in 2026
system.activationScripts.microvm-update-check = ''
if [ -d ${stateDir} ]; then
_outdated_microvms=""
for dir in ${stateDir}/*; do
if [ -e $dir/current/share/microvm/virtiofs ] &&
[ ! -e $dir/current/bin/virtiofsd-run ]; then
_outdated_microvms="$_outdated_microvms $(basename $dir)"
elif [ -e $dir/current/share/microvm/tap-interfaces ] &&
[ ! -e $dir/current/bin/tap-up ]; then
_outdated_microvms="$_outdated_microvms $(basename $dir)"
elif [ -e $dir/current/share/microvm/macvtap-interfaces ] &&
[ ! -e $dir/current/bin/macvtap-up ]; then
_outdated_microvms="$_outdated_microvms $(basename $dir)"
elif [ -e $dir/current/share/microvm/pci-devices ] &&
[ ! -e $dir/current/bin/pci-setup ]; then
_outdated_microvms="$_outdated_microvms $(basename $dir)"
fi
done
if [ "$_outdated_microvms" != "" ]; then
echo "The following MicroVMs must be updated to follow the new virtiofsd/tap/macvtap/pci setup scheme: $_outdated_microvms"
fi
fi
'';
};
}