111 lines
3.5 KiB
Nix
111 lines
3.5 KiB
Nix
|
import ./make-test-python.nix ({ pkgs, lib, ... }: rec {
|
||
|
name = "pacemaker";
|
||
|
meta = with pkgs.lib.maintainers; {
|
||
|
maintainers = [ astro ];
|
||
|
};
|
||
|
|
||
|
nodes =
|
||
|
let
|
||
|
node = i: {
|
||
|
networking.interfaces.eth1.ipv4.addresses = [ {
|
||
|
address = "192.168.0.${toString i}";
|
||
|
prefixLength = 24;
|
||
|
} ];
|
||
|
|
||
|
services.corosync = {
|
||
|
enable = true;
|
||
|
clusterName = "zentralwerk-network";
|
||
|
nodelist = lib.imap (i: name: {
|
||
|
nodeid = i;
|
||
|
inherit name;
|
||
|
ring_addrs = [
|
||
|
(builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address
|
||
|
];
|
||
|
}) (builtins.attrNames nodes);
|
||
|
};
|
||
|
environment.etc."corosync/authkey" = {
|
||
|
source = builtins.toFile "authkey"
|
||
|
# minimum length: 128 bytes
|
||
|
"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
|
||
|
mode = "0400";
|
||
|
};
|
||
|
|
||
|
services.pacemaker.enable = true;
|
||
|
|
||
|
# used for pacemaker resource
|
||
|
systemd.services.ha-cat = {
|
||
|
description = "Highly available netcat";
|
||
|
serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard";
|
||
|
};
|
||
|
};
|
||
|
in {
|
||
|
node1 = node 1;
|
||
|
node2 = node 2;
|
||
|
node3 = node 3;
|
||
|
};
|
||
|
|
||
|
# sets up pacemaker with resources configuration, then crashes a
|
||
|
# node and waits for service restart on another node
|
||
|
testScript =
|
||
|
let
|
||
|
resources = builtins.toFile "cib-resources.xml" ''
|
||
|
<resources>
|
||
|
<primitive id="cat" class="systemd" type="ha-cat">
|
||
|
<operations>
|
||
|
<op id="stop-cat" name="start" interval="0" timeout="1s"/>
|
||
|
<op id="start-cat" name="start" interval="0" timeout="1s"/>
|
||
|
<op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/>
|
||
|
</operations>
|
||
|
</primitive>
|
||
|
</resources>
|
||
|
'';
|
||
|
in ''
|
||
|
import re
|
||
|
import time
|
||
|
|
||
|
start_all()
|
||
|
|
||
|
${lib.concatMapStrings (node: ''
|
||
|
${node}.wait_until_succeeds("corosync-quorumtool")
|
||
|
${node}.wait_for_unit("pacemaker.service")
|
||
|
'') (builtins.attrNames nodes)}
|
||
|
|
||
|
# No STONITH device
|
||
|
node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false")
|
||
|
# Configure the cat resource
|
||
|
node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}")
|
||
|
|
||
|
# wait until the service is started
|
||
|
while True:
|
||
|
output = node1.succeed("crm_resource -r cat --locate")
|
||
|
match = re.search("is running on: (.+)", output)
|
||
|
if match:
|
||
|
for machine in machines:
|
||
|
if machine.name == match.group(1):
|
||
|
current_node = machine
|
||
|
break
|
||
|
time.sleep(1)
|
||
|
|
||
|
current_node.log("Service running here!")
|
||
|
current_node.crash()
|
||
|
|
||
|
# pick another node that's still up
|
||
|
for machine in machines:
|
||
|
if machine.booted:
|
||
|
check_node = machine
|
||
|
# find where the service has been started next
|
||
|
while True:
|
||
|
output = check_node.succeed("crm_resource -r cat --locate")
|
||
|
match = re.search("is running on: (.+)", output)
|
||
|
# output will remain the old current_node until the crash is detected by pacemaker
|
||
|
if match and match.group(1) != current_node.name:
|
||
|
for machine in machines:
|
||
|
if machine.name == match.group(1):
|
||
|
next_node = machine
|
||
|
break
|
||
|
time.sleep(1)
|
||
|
|
||
|
next_node.log("Service migrated here!")
|
||
|
'';
|
||
|
})
|