From d5a23f4442c17d9a2151e3ab958403e75ad33b82 Mon Sep 17 00:00:00 2001 From: Masayuki Ishii Date: Fri, 20 Dec 2024 13:38:45 +0900 Subject: [PATCH 1/3] Add boot-ip-setter Signed-off-by: Masayuki Ishii --- Makefile | 2 +- constants.go | 18 + dctest/boot-ip-setter_test.go | 52 +++ dctest/join_remove_test.go | 30 ++ dctest/suites_test.go | 1 + debian/DEBIAN/postinst | 2 +- debian/DEBIAN/prerm | 2 +- .../lib/systemd/system/boot-ip-setter.service | 15 + docs/boot-ip-setter.md | 96 +++++ go.mod | 3 +- pkg/boot-ip-setter/ip.go | 183 ++++++++ pkg/boot-ip-setter/ip_test.go | 196 +++++++++ pkg/boot-ip-setter/main.go | 143 +++++++ pkg/boot-ip-setter/metrics.go | 84 ++++ pkg/boot-ip-setter/metrics_test.go | 101 +++++ pkg/boot-ip-setter/mock_test.go | 44 ++ pkg/boot-ip-setter/netif.go | 133 ++++++ pkg/boot-ip-setter/netif_test.go | 396 ++++++++++++++++++ pkg/boot-ip-setter/suites_test.go | 13 + 19 files changed, 1510 insertions(+), 4 deletions(-) create mode 100644 dctest/boot-ip-setter_test.go create mode 100644 debian/lib/systemd/system/boot-ip-setter.service create mode 100644 docs/boot-ip-setter.md create mode 100644 pkg/boot-ip-setter/ip.go create mode 100644 pkg/boot-ip-setter/ip_test.go create mode 100644 pkg/boot-ip-setter/main.go create mode 100644 pkg/boot-ip-setter/metrics.go create mode 100644 pkg/boot-ip-setter/metrics_test.go create mode 100644 pkg/boot-ip-setter/mock_test.go create mode 100644 pkg/boot-ip-setter/netif.go create mode 100644 pkg/boot-ip-setter/netif_test.go create mode 100644 pkg/boot-ip-setter/suites_test.go diff --git a/Makefile b/Makefile index 0f7df0c0e..5173f23c8 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ OP_WIN_ZIP = neco-operation-cli-windows_$(VERSION)_amd64.zip OP_MAC_ZIP = neco-operation-cli-mac_$(VERSION)_amd64.zip DEBBUILD_FLAGS = -Znone BIN_PKGS = ./pkg/neco -SBIN_PKGS = ./pkg/neco-updater ./pkg/neco-worker +SBIN_PKGS = ./pkg/neco-updater ./pkg/neco-worker ./pkg/boot-ip-setter OPDEB_BINNAMES = argocd hubble jsonnet jsonnetfmt jsonnet-lint kubectl kubeseal kustomize logcli stern tsh kubectl-moco kubectl-accurate amtool yq tempo-cli flamegraph.pl stackcollapse-perf.pl necoperf-cli necoip nsdump clusterdump cmctl vmalert-tool npv OPDEB_DOCNAMES = argocd hubble jsonnet kubectl kubeseal kustomize logcli stern teleport moco accurate alertmanager yq tempo flamegraph necoperf cmctl vmalert-tool diff --git a/constants.go b/constants.go index 62ea425f2..3458c5211 100644 --- a/constants.go +++ b/constants.go @@ -133,6 +133,24 @@ const ( NecoRebooterService = "neco-rebooter" ) +// Virutal IP +const ( + VirtualIPAddrDHCPServer1 = "10.71.255.1" + VirtualIPAddrDHCPServer2 = "10.71.255.2" + VirtualIPAddrDHCPServer3 = "10.71.255.3" + VirtualIPAddrDHCPServer4 = "10.71.255.4" + VirtualIPAddrDHCPServer5 = "10.71.255.5" + VirtualIPAddrActiveBootServer = "10.71.255.6" +) + +var DHCPServerAddressList = []string{ + VirtualIPAddrDHCPServer1, + VirtualIPAddrDHCPServer2, + VirtualIPAddrDHCPServer3, + VirtualIPAddrDHCPServer4, + VirtualIPAddrDHCPServer5, +} + // File locations var ( RackFile = filepath.Join(NecoDir, "rack") diff --git a/dctest/boot-ip-setter_test.go b/dctest/boot-ip-setter_test.go new file mode 100644 index 000000000..38f5dccbe --- /dev/null +++ b/dctest/boot-ip-setter_test.go @@ -0,0 +1,52 @@ +package dctest + +import ( + "github.com/cybozu-go/neco" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// testBootIPSetter tests the behavior of boot-ip-setter in bootstrapping +func testBootIPSetter() { + It("should set Virtual IPs to boot servers", func() { + expectedDHCPServerHostname := map[string]string{ + "10.71.255.1": "gcp0-boot-0", + "10.71.255.2": "gcp0-boot-1", + "10.71.255.3": "gcp0-boot-2", + } + expectedActiveBootServerHostname := []string{ + "gcp0-boot-0", + "gcp0-boot-1", + "gcp0-boot-2", + } + checkBootServerVirtualIPs(expectedDHCPServerHostname, expectedActiveBootServerHostname) + }) +} + +func checkBootServerVirtualIPs(expectedDHCPServerHostname map[string]string, expectedActiveBootServerHostname []string) { + machines, err := getSabakanMachines("--without-role=boot") + Expect(err).NotTo(HaveOccurred()) + + By("checking dhcp server addresses") + for _, m := range machines { + nodeIP := m.Spec.IPv4[0] + for _, vip := range neco.DHCPServerAddressList { + if host := expectedDHCPServerHostname[vip]; host != "" { + stdout, stderr, err := execAt(bootServers[0], "ckecli", "ssh", "cybozu@"+nodeIP, "--", "curl", "-m", "2", "-sS", "http://"+vip+":4192/hostname") + Expect(err).NotTo(HaveOccurred(), "from=%s, to=%s, stdout=%s, stderr=%s", nodeIP, vip, stdout, stderr) + Expect(string(stdout)).To(Equal(host)) + } else { + stdout, stderr, err := execAt(bootServers[0], "ckecli", "ssh", "cybozu@"+nodeIP, "--", "curl", "-m", "2", "-sS", "http://"+vip+":4192/hostname") + Expect(err).To(HaveOccurred(), "from=%s, to=%s, stdout=%s, stderr=%s", nodeIP, vip, stdout, stderr) + } + } + } + + By("checking active boot server address") + for _, m := range machines { + nodeIP := m.Spec.IPv4[0] + stdout, stderr, err := execAt(bootServers[0], "ckecli", "ssh", "cybozu@"+nodeIP, "--", "curl", "-m", "2", "-sS", "http://"+neco.VirtualIPAddrActiveBootServer+":4192/hostname") + Expect(err).NotTo(HaveOccurred(), "from=%s, stdout=%s, stderr=%s", nodeIP, stdout, stderr) + Expect(string(stdout)).To(BeElementOf(expectedActiveBootServerHostname)) + } +} diff --git a/dctest/join_remove_test.go b/dctest/join_remove_test.go index 2c9860e50..9afa36c0f 100644 --- a/dctest/join_remove_test.go +++ b/dctest/join_remove_test.go @@ -165,6 +165,22 @@ func testJoinRemove() { }).Should(Succeed()) }) + It("should set Virtual IPs to boot-3", func() { + expectedDHCPServerHostname := map[string]string{ + "10.71.255.1": "gcp0-boot-0", + "10.71.255.2": "gcp0-boot-1", + "10.71.255.3": "gcp0-boot-2", + "10.71.255.4": "gcp0-boot-3", + } + expectedActiveBootServerHostname := []string{ + "gcp0-boot-0", + "gcp0-boot-1", + "gcp0-boot-2", + "gcp0-boot-3", + } + checkBootServerVirtualIPs(expectedDHCPServerHostname, expectedActiveBootServerHostname) + }) + It("should remove boot-3", func() { By("Running neco leave 3") token := getVaultToken() @@ -203,6 +219,20 @@ func testJoinRemove() { time.Sleep(3 * time.Minute) }) + It("should remove virtual IPs from boot-3", func() { + expectedDHCPServerHostname := map[string]string{ + "10.71.255.1": "gcp0-boot-0", + "10.71.255.2": "gcp0-boot-1", + "10.71.255.3": "gcp0-boot-2", + } + expectedActiveBootServerHostname := []string{ + "gcp0-boot-0", + "gcp0-boot-1", + "gcp0-boot-2", + } + checkBootServerVirtualIPs(expectedDHCPServerHostname, expectedActiveBootServerHostname) + }) + It("should set state of boot-3 to unreachable", func() { By("Stopping boot-3") // In DCtest on CircleCI, ginkgo is executed in the operation pod, so you cannot use pmctl in this context. diff --git a/dctest/suites_test.go b/dctest/suites_test.go index 62ae010fd..a3657348c 100644 --- a/dctest/suites_test.go +++ b/dctest/suites_test.go @@ -52,6 +52,7 @@ var bootstrapSuite = func() { Context("init-data", testInitData) Context("etcdpasswd", testEtcdpasswd) Context("sabakan-state-setter", testSabakanStateSetter) + Context("boot-ip-setter", testBootIPSetter) Context("ignitions", testIgnitions) Context("cke", func() { testCKESetup() diff --git a/debian/DEBIAN/postinst b/debian/DEBIAN/postinst index d98cde6e2..903524a5c 100755 --- a/debian/DEBIAN/postinst +++ b/debian/DEBIAN/postinst @@ -1,6 +1,6 @@ #!/bin/sh -e -SERVICES="node-exporter neco-updater neco-worker sabakan-state-setter neco-rebooter cke cke-localproxy teleport-node" +SERVICES="node-exporter neco-updater neco-worker sabakan-state-setter neco-rebooter cke cke-localproxy teleport-node boot-ip-setter" TIMERS="docker-prune kill-old-login-sessions backup-cke-etcd export-unit-status trigger-reboot-all-nodes" configure() { diff --git a/debian/DEBIAN/prerm b/debian/DEBIAN/prerm index 065fa6086..306d2fff3 100755 --- a/debian/DEBIAN/prerm +++ b/debian/DEBIAN/prerm @@ -1,6 +1,6 @@ #!/bin/sh -e -SERVICES="neco-updater neco-worker node-exporter sabakan-state-setter neco-rebooter cke cke-localproxy teleport-node trigger-reboot-all-nodes" +SERVICES="neco-updater neco-worker node-exporter sabakan-state-setter neco-rebooter cke cke-localproxy teleport-node trigger-reboot-all-nodes boot-ip-setter" TIMERS="docker-prune kill-old-login-sessions backup-cke-etcd export-unit-status trigger-reboot-all-nodes" prerm() { diff --git a/debian/lib/systemd/system/boot-ip-setter.service b/debian/lib/systemd/system/boot-ip-setter.service new file mode 100644 index 000000000..f27180b0f --- /dev/null +++ b/debian/lib/systemd/system/boot-ip-setter.service @@ -0,0 +1,15 @@ +[Unit] +Description=boot ip setter +After=network-online.target sabakan.service +Wants=network-online.target sabakan.service +StartLimitIntervalSec=600s + +[Service] +Type=simple +Restart=on-failure +RestartForceExitStatus=SIGPIPE +RestartSec=30s +ExecStart=/usr/sbin/boot-ip-setter + +[Install] +WantedBy=multi-user.target diff --git a/docs/boot-ip-setter.md b/docs/boot-ip-setter.md new file mode 100644 index 000000000..f033ad311 --- /dev/null +++ b/docs/boot-ip-setter.md @@ -0,0 +1,96 @@ +boot-ip-setter +============== + +`boot-ip-setter` is a daemon program for handling virtual IP addresses on the boot servers. +It runs on active boot servers, where the neco package has been installed, and components such as etcd and sabakan, etc., are running. + +This program handles the following two types of virtual IP addresses for different uses: + +1. DHCP Server Address + + The IP address for Sabakan DHCP server. This address is used as the DHCP relay destination in the network switches. + + This program selects one of the five addresses from `10.71.255.1` to `10.72.255.5` and sets the address to its running server. + If multiple active boot servers exist, these addresses will be set without bias to each server. + +2. Active Boot Server Address + + The IP address for accessing one of the boot servers from inside the Kubernetes cluster. + The value is fixed at `10.71.255.6`. The same value is set for all active boot servers. + +This program decides whether or not to set these IPs based on the member list of the etcd cluster on boot servers and sets the IPs to the network interface. + + +## Usage (Options) + +```console +$ boot-ip-setter [OPTIONS] +``` + +| Option | Default value | Description | +| --------------- | -------------- | -------------------------------------------------------- | +| `-debug` | `false` | Show debug log or not. | +| `-interface` | `boot` | The target network interface that this program operates. | +| `-interval` | `1m` | The interval for periodic operation. | +| `-listen-addr ` | `0.0.0.0:4192` | The listen address. | + + +## HTTP endpoint + +This program provides the following HTTP endpoints. + +- `/hostname` + + This endpoint returns the hostname of the server that this program runs on. + This is mainly intended for use in testing or operational checks. + +- `/metrics` + + This endpoint returns the metrics. For details on metrics, please refer to the next section. + + +## Metrics + +This program provides the following metrics in the Prometheus format. +Besides this, it also outputs the metrics collected in the `GoCollector` and the `ProcessCollector` of the [Prometheus Go client library](https://github.com/prometheus/client_golang). + +| Name | Description | Type | Labels | +| ------------------------------------------------- | --------------------------------------------------- | ------- | ------------------- | +| `boot_ip_setter_hostname` | The hostname this program runs on. | Gauge | `hostname` | +| `boot_ip_setter_interface_address` | The IP address set to the target interface. | Gauge | `interface`, `ipv4` | +| `boot_ip_setter_interface_operation_errors_total` | The number of times the interface operation failed. | Counter | | + + +## Internals + +### Main process + +This program repeats the following actions in one-minute cycles. + +- Gets member list of the etcd cluster on boot servers. +- Calculates the virtual IPs should be set from the member list. +- Sets the IP address to the target network interface. If there are any unnecessary IPs on the interface, this program deletes them. + +This program doesn't advertise the IPs, it just sets IPs to the network interfaces. + +### Signal Handling + +This program terminates normally when receiving `SIGTERM` or `SIGINT`. + +### Error Handling + +This program handles errors as follows. + +- Connection failure to the etcd + + This program will terminate abnormally and delete the IPs on the target interface on exit. + These errors may be resolved by retrying. So terminates early and retries from the beginning. + +- Operation failure of the network interface + + This program will count up the `boot_ip_setter_interface_operation_errors_total` metric. + These errors may not be recovered by restarting. So this program continues running and notifies errors by using metrics. + +- Other failure + + If an error other than the above occurs, this program will terminate abnormally and delete the IPs on the target interface on exit. diff --git a/go.mod b/go.mod index d7de8ba4d..f668db45b 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( go.etcd.io/etcd/client/v3 v3.5.17 golang.org/x/crypto v0.31.0 golang.org/x/oauth2 v0.24.0 + golang.org/x/sync v0.10.0 golang.org/x/term v0.27.0 k8s.io/api v0.31.0 k8s.io/apimachinery v0.31.0 @@ -98,6 +99,7 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/miekg/dns v1.1.41 // indirect @@ -132,7 +134,6 @@ require ( go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sync v0.10.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.5.0 // indirect diff --git a/pkg/boot-ip-setter/ip.go b/pkg/boot-ip-setter/ip.go new file mode 100644 index 000000000..0f2b1aa7d --- /dev/null +++ b/pkg/boot-ip-setter/ip.go @@ -0,0 +1,183 @@ +package main + +import ( + "context" + "fmt" + "log/slog" + "slices" + "strconv" + "strings" + "sync/atomic" + "time" + + "github.com/cybozu-go/neco" + clientv3 "go.etcd.io/etcd/client/v3" +) + +func runIPSetter(ctx context.Context, logger *slog.Logger, etcdClient *clientv3.Client, netif NetworkInterface, errorCounter *atomic.Int32, interval time.Duration, rack int) error { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + logger.Debug("runOnce") + err := runOnce(ctx, logger, etcdClient, netif, errorCounter, rack) + if err != nil { + return err + } + + select { + case <-ticker.C: + case <-ctx.Done(): + return ctx.Err() + } + } +} + +func runOnce(ctx context.Context, logger *slog.Logger, etcdClient *clientv3.Client, netif NetworkInterface, errorCounter *atomic.Int32, rack int) error { + memberListResp, err := etcdClient.MemberList(ctx) + if err != nil { + return fmt.Errorf("failed to get member list: %w", err) + } + members := make([]string, 0, len(memberListResp.Members)) + for _, m := range memberListResp.Members { + members = append(members, m.Name) + } + + var nextAddrs []string + dhcpAddr, err := selectDHCPServerAddr(members, rack) + if err != nil { + return fmt.Errorf("failed to select dhcp server address: %w", err) + } + if dhcpAddr != "" { + nextAddrs = []string{dhcpAddr, neco.VirtualIPAddrActiveBootServer} + } + logger.Debug("expected addresses", "addrs", nextAddrs) + + currentAddrs, err := netif.ListAddrs() + if err != nil { + errorCounter.Add(1) + logger.Error("failed to list current addresses", "error", err) + return nil // ignore error + } + logger.Debug("current addresses", "addrs", currentAddrs) + + ops := decideOps(currentAddrs, nextAddrs) + logger.Debug("ops", "ops", ops) + + err = runOps(netif, ops) + if err != nil { + errorCounter.Add(1) + logger.Error("operation error", "error", err) + return nil // ignore error + } + + logger.Debug("operations completed") + return nil +} + +func selectDHCPServerAddr(etcdMembers []string, rack int) (string, error) { + if rack < 0 { + return "", fmt.Errorf("invalid rack number: %d", rack) + } + + var rackList []int + for _, name := range etcdMembers { + s, found := strings.CutPrefix(name, "boot-") + if !found { + return "", fmt.Errorf("failed to cut rack number from etcd member name: %s", name) + } + r, err := strconv.Atoi(s) + if err != nil { + return "", fmt.Errorf("failed to convert rack number: name=%s, %v", name, err) + } + rackList = append(rackList, r) + } + slices.Sort(rackList) + + if n, found := slices.BinarySearch(rackList, rack); found { + return neco.DHCPServerAddressList[n%len(neco.DHCPServerAddressList)], nil + } + return "", nil +} + +const ( + opUp = iota + opDown + opAdd + opDelete + opDeleteAll +) + +type op struct { + op int + address string +} + +func (o *op) String() string { + if o == nil { + return "nil" + } + switch o.op { + case opUp: + return "up" + case opDown: + return "down" + case opAdd: + return "add:" + o.address + case opDelete: + return "delete:" + o.address + case opDeleteAll: + return "deleteAll" + } + return "invalid" +} + +func decideOps(currentAddrs []string, nextAddrs []string) []*op { + if len(nextAddrs) == 0 { + return []*op{{op: opDeleteAll}, {op: opDown}} + } + + currentAddrMap := map[string]bool{} + nextAddrMap := map[string]bool{} + for _, a := range currentAddrs { + currentAddrMap[a] = true + } + for _, a := range nextAddrs { + nextAddrMap[a] = true + } + + ret := []*op{{op: opUp}} + for _, a := range nextAddrs { + if !currentAddrMap[a] { + ret = append(ret, &op{op: opAdd, address: a}) + } + } + for _, a := range currentAddrs { + if !nextAddrMap[a] { + ret = append(ret, &op{op: opDelete, address: a}) + } + } + return ret +} + +func runOps(netif NetworkInterface, ops []*op) error { + for _, op := range ops { + var err error + switch op.op { + case opUp: + err = netif.Up() + case opDown: + err = netif.Down() + case opAdd: + err = netif.AddAddr(op.address) + case opDelete: + err = netif.DeleteAddr(op.address) + case opDeleteAll: + err = netif.DeleteAllAddr() + } + if err != nil { + return fmt.Errorf("failed to run operation %s: %w", op.String(), err) + } + } + return nil +} diff --git a/pkg/boot-ip-setter/ip_test.go b/pkg/boot-ip-setter/ip_test.go new file mode 100644 index 000000000..4b730bd94 --- /dev/null +++ b/pkg/boot-ip-setter/ip_test.go @@ -0,0 +1,196 @@ +package main + +import ( + "errors" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("IP Setter", func() { + Context("selectDHCPServerAddr", func() { + It("should select IP address", func() { + testCases := []struct { + etcdMembers []string + rackAndIP map[int]string // key: rack, value: expected IP + }{ + { + etcdMembers: []string{"boot-0", "boot-1", "boot-2"}, + rackAndIP: map[int]string{ + 0: "10.71.255.1", + 1: "10.71.255.2", + 2: "10.71.255.3", + 3: "", + }, + }, + { + etcdMembers: []string{"boot-1", "boot-5", "boot-9", "boot-10", "boot-99", "boot-100", "boot-999"}, + rackAndIP: map[int]string{ + 0: "", + 1: "10.71.255.1", + 2: "", + 4: "", + 5: "10.71.255.2", + 9: "10.71.255.3", + 10: "10.71.255.4", + 99: "10.71.255.5", + 100: "10.71.255.1", + 999: "10.71.255.2", + 1000: "", + }, + }, + } + for i, tc := range testCases { + for rack, expected := range tc.rackAndIP { + actual, err := selectDHCPServerAddr(tc.etcdMembers, rack) + Expect(err).NotTo(HaveOccurred(), "Test %d: member=%s, rack=%d", i, tc.etcdMembers, rack) + Expect(actual).To(Equal(expected), "Test %d: member=%s, rack=%d", i, tc.etcdMembers, rack) + } + } + }) + + It("should return error", func() { + testCases := []struct { + etcdMembers []string + rack int + }{ + { + etcdMembers: []string{"boot-0", "invalid-name", "boot-2"}, + rack: 0, + }, + { + etcdMembers: []string{"boot-0", "boot-1", "boot-invalidracknumber"}, + rack: 0, + }, + { + etcdMembers: []string{"boot-0", "boot-1", "boot-2"}, + rack: -1, + }, + } + for i, tc := range testCases { + _, err := selectDHCPServerAddr(tc.etcdMembers, tc.rack) + Expect(err).To(HaveOccurred(), "Test %d: member=%s, rack=%d", i, tc.etcdMembers, tc.rack) + } + }) + }) + + Context("decideOps", func() { + It("should decide operations", func() { + testCases := []struct { + currentAddrs []string + nextAddrs []string + expected []*op + }{ + { + currentAddrs: []string{}, + nextAddrs: []string{}, + expected: []*op{{op: opDeleteAll}, {op: opDown}}, + }, + { + currentAddrs: []string{"10.0.0.1", "10.0.0.2"}, + nextAddrs: []string{}, + expected: []*op{{op: opDeleteAll}, {op: opDown}}, + }, + { + currentAddrs: []string{"10.0.0.1", "10.0.0.2"}, + nextAddrs: []string{"10.0.0.1", "10.0.0.2"}, + expected: []*op{{op: opUp}}, + }, + { + currentAddrs: []string{}, + nextAddrs: []string{"10.0.0.1", "10.0.0.2"}, + expected: []*op{{op: opUp}, {op: opAdd, address: "10.0.0.1"}, {op: opAdd, address: "10.0.0.2"}}, + }, + { + currentAddrs: []string{"10.0.0.1", "10.0.0.2"}, + nextAddrs: []string{"10.0.0.3", "10.0.0.2"}, + expected: []*op{{op: opUp}, {op: opAdd, address: "10.0.0.3"}, {op: opDelete, address: "10.0.0.1"}}, + }, + { + currentAddrs: []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"}, + nextAddrs: []string{"10.0.0.2", "10.0.0.4"}, + expected: []*op{{op: opUp}, {op: opAdd, address: "10.0.0.4"}, {op: opDelete, address: "10.0.0.1"}, {op: opDelete, address: "10.0.0.3"}}, + }, + } + for i, tc := range testCases { + actual := decideOps(tc.currentAddrs, tc.nextAddrs) + Expect(actual).To(Equal(tc.expected), "Test %d", i) + } + }) + }) + + Context("runOps", func() { + It("should operate network interface", func() { + testCases := []struct { + ops []*op + result []string + }{ + { + ops: []*op{}, // nop + result: []string{}, + }, + { + ops: []*op{{op: opUp}}, + result: []string{"up"}, + }, + { + ops: []*op{{op: opDown}}, + result: []string{"down"}, + }, + { + ops: []*op{{op: opAdd, address: "10.0.0.1"}}, + result: []string{"add:10.0.0.1"}, + }, + { + ops: []*op{{op: opDelete, address: "10.0.0.2"}}, + result: []string{"delete:10.0.0.2"}, + }, + { + ops: []*op{{op: opDeleteAll}}, + result: []string{"deleteAll"}, + }, + { + ops: []*op{{op: opDeleteAll}, {op: opDown}}, + result: []string{"deleteAll", "down"}, + }, + { + ops: []*op{{op: opUp}, {op: opAdd, address: "10.0.0.2"}, {op: opDelete, address: "10.0.0.1"}}, + result: []string{"up", "add:10.0.0.2", "delete:10.0.0.1"}, + }, + } + for i, tc := range testCases { + netif := &mockNetIF{called: []string{}} + err := runOps(netif, tc.ops) + Expect(err).NotTo(HaveOccurred(), "Test %d", i) + Expect(netif.called).To(Equal(tc.result), "Test %d", i) + } + }) + + It("should return error", func() { + testCases := []struct { + ops []*op + }{ + { + ops: []*op{{op: opUp}}, + }, + { + ops: []*op{{op: opDown}}, + }, + { + ops: []*op{{op: opAdd}}, + }, + { + ops: []*op{{op: opDelete}}, + }, + { + ops: []*op{{op: opDeleteAll}}, + }, + } + for i, tc := range testCases { + netif := &mockNetIF{err: errors.New("test error")} + err := runOps(netif, tc.ops) + Expect(err).To(HaveOccurred(), "Test %d", i) + } + }) + }) +}) diff --git a/pkg/boot-ip-setter/main.go b/pkg/boot-ip-setter/main.go new file mode 100644 index 000000000..ac899248e --- /dev/null +++ b/pkg/boot-ip-setter/main.go @@ -0,0 +1,143 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + "sync/atomic" + "syscall" + "time" + + "github.com/cybozu-go/neco" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/promhttp" + "golang.org/x/sync/errgroup" +) + +const ( + defaultInterface = "boot" + defaultInterval = 1 * time.Minute + defaultListenAddress = "0.0.0.0:4192" +) + +var ( + flagDebugLog = flag.Bool("debug", false, "Show debug log or not.") + flagInterface = flag.String("interface", defaultInterface, "The target network interface that this program operates.") + flagInterval = flag.Duration("interval", defaultInterval, "The interval for periodic operation.") + flagListenAddress = flag.String("listen-addr", defaultListenAddress, "The listen address.") +) + +func main() { + flag.Parse() + + logLevel := new(slog.LevelVar) + if *flagDebugLog { + logLevel.Set(slog.LevelDebug) + } + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: logLevel})) + + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer stop() + + logger.Info("hello!", "interface", *flagInterface, "interval", *flagInterval, "listen address", *flagListenAddress) + + netif := NewInterface(*flagInterface) + err := subMain(ctx, logger, netif, *flagInterval, *flagListenAddress) + if err != nil && !errors.Is(err, context.Canceled) { + logger.Error("error exit", "error", err) + + // delete all addresses if this program ends abnormally + err := netif.DeleteAllAddr() + if err != nil { + logger.Error("failed to delete address on exit", "error", err) + } + err = netif.Down() + if err != nil { + logger.Error("failed to down interface on exit", "error", err) + } + + os.Exit(1) + } + + logger.Info("bye!") +} + +func subMain(ctx context.Context, logger *slog.Logger, netif NetworkInterface, interval time.Duration, listenAddr string) error { + hostname, err := os.Hostname() + if err != nil { + return fmt.Errorf("failed to get hostname: %w", err) + } + logger.Debug("succeeded to get hostname", "hostname", hostname) + + rack, err := neco.MyLRN() + if err != nil { + return fmt.Errorf("failed to get logical rack number: %w", err) + } + logger.Debug("succeeded to get logical rack number", "rack", rack) + + etcdClient, err := neco.EtcdClient() + if err != nil { + return fmt.Errorf("failed to create etcd client: %w", err) + } + defer etcdClient.Close() + logger.Debug("succeeded to create etcd client") + + errorCounter := &atomic.Int32{} + + hostnameHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, hostname) + }) + + registry := prometheus.NewRegistry() + registry.MustRegister( + collectors.NewGoCollector(), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + newCollector(logger.With("component", "metrics collector"), hostname, netif, errorCounter), + ) + metricsHandler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{EnableOpenMetrics: true}) + + eg, ctx := errgroup.WithContext(ctx) + + eg.Go(func() error { + logger.Info("starting ip setter") + return runIPSetter(ctx, logger.With("component", "ip setter"), etcdClient, netif, errorCounter, interval, rack) + }) + + eg.Go(func() error { + logger.Info("starting http server") + return runHTTPServer(ctx, listenAddr, hostnameHandler, metricsHandler) + }) + + return eg.Wait() +} + +func runHTTPServer(ctx context.Context, listenAddr string, hostnameHandler, metricsHandler http.Handler) error { + mux := http.NewServeMux() + mux.Handle("/hostname", hostnameHandler) + mux.Handle("/metrics", metricsHandler) + server := &http.Server{Addr: listenAddr, Handler: mux} + + errCh := make(chan error) + go func() { + errCh <- server.ListenAndServe() + }() + select { + case err := <-errCh: + // ListenAndServe always returns a non-nil error. So no need for a nil check. + return fmt.Errorf("failed to listen: %w", err) + case <-ctx.Done(): + ctx2, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := server.Shutdown(ctx2); err != nil { + return fmt.Errorf("failed to shutdown: %w", err) + } + return ctx.Err() + } +} diff --git a/pkg/boot-ip-setter/metrics.go b/pkg/boot-ip-setter/metrics.go new file mode 100644 index 000000000..7e90dd873 --- /dev/null +++ b/pkg/boot-ip-setter/metrics.go @@ -0,0 +1,84 @@ +package main + +import ( + "log/slog" + "sync/atomic" + + "github.com/prometheus/client_golang/prometheus" +) + +type metricsCollector struct { + logger *slog.Logger + hostname string + netif NetworkInterface + errorCounter *atomic.Int32 + metrics *metrics +} + +type metrics struct { + hostnameGaugeVec *prometheus.GaugeVec + addressGaugeVec *prometheus.GaugeVec + errorCounter prometheus.Counter +} + +func newCollector(logger *slog.Logger, hostname string, netif NetworkInterface, errorCounter *atomic.Int32) prometheus.Collector { + c := &metricsCollector{ + logger: logger, + hostname: hostname, + netif: netif, + errorCounter: errorCounter, + } + c.metrics = &metrics{ + hostnameGaugeVec: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "boot_ip_setter", + Name: "hostname", + Help: "The hostname this program runs on.", + }, + []string{"hostname"}, + ), + addressGaugeVec: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "boot_ip_setter", + Name: "interface_address", + Help: "The IP address set to the target interface.", + }, + []string{"interface", "ipv4"}, + ), + errorCounter: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "boot_ip_setter", + Name: "interface_operation_errors_total", + Help: "The number of times the interface operation failed.", + }, + ), + } + return c +} + +func (c *metricsCollector) Describe(ch chan<- *prometheus.Desc) { + c.metrics.hostnameGaugeVec.Describe(ch) + c.metrics.addressGaugeVec.Describe(ch) + c.metrics.errorCounter.Describe(ch) +} + +func (c *metricsCollector) Collect(ch chan<- prometheus.Metric) { + c.metrics.hostnameGaugeVec.Reset() + c.metrics.hostnameGaugeVec.WithLabelValues(c.hostname).Set(1) + c.metrics.hostnameGaugeVec.Collect(ch) + + addrs, err := c.netif.ListAddrs() + if err != nil { + c.errorCounter.Add(1) + c.logger.Error("failed to list addresses", "error", err) + } + c.metrics.addressGaugeVec.Reset() + for _, a := range addrs { + c.metrics.addressGaugeVec.WithLabelValues(c.netif.Name(), a).Set(1) + } + c.metrics.addressGaugeVec.Collect(ch) + + delta := c.errorCounter.Swap(0) + c.metrics.errorCounter.Add(float64(delta)) + c.metrics.errorCounter.Collect(ch) +} diff --git a/pkg/boot-ip-setter/metrics_test.go b/pkg/boot-ip-setter/metrics_test.go new file mode 100644 index 000000000..a7e2bd682 --- /dev/null +++ b/pkg/boot-ip-setter/metrics_test.go @@ -0,0 +1,101 @@ +package main + +import ( + "errors" + "log/slog" + "os" + "strings" + "sync/atomic" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" +) + +var _ = Describe("Metrics", Ordered, func() { + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})) + var collector prometheus.Collector + var netif *mockNetIF + var errorCounter *atomic.Int32 + + BeforeAll(func() { + netif = &mockNetIF{addrs: []string{"10.0.0.1", "10.0.0.2"}} + errorCounter = &atomic.Int32{} + collector = newCollector(logger, "testhost", netif, errorCounter) + }) + + It("should return hostname", func() { + expected := ` + # HELP boot_ip_setter_hostname The hostname this program runs on. + # TYPE boot_ip_setter_hostname gauge + boot_ip_setter_hostname{hostname="testhost"} 1 + ` + Expect(testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_hostname")).NotTo(HaveOccurred()) + }) + + It("should update address metrics", func() { + expected := ` + # HELP boot_ip_setter_interface_address The IP address set to the target interface. + # TYPE boot_ip_setter_interface_address gauge + boot_ip_setter_interface_address{interface="mock",ipv4="10.0.0.1"} 1 + boot_ip_setter_interface_address{interface="mock",ipv4="10.0.0.2"} 1 + ` + err := testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_address") + Expect(err).NotTo(HaveOccurred()) + + netif.addrs = []string{} + expected = `` + err = testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_address") + Expect(err).NotTo(HaveOccurred()) + + netif.addrs = []string{"10.0.0.10", "10.0.0.11", "10.0.0.12"} + expected = ` + # HELP boot_ip_setter_interface_address The IP address set to the target interface. + # TYPE boot_ip_setter_interface_address gauge + boot_ip_setter_interface_address{interface="mock",ipv4="10.0.0.10"} 1 + boot_ip_setter_interface_address{interface="mock",ipv4="10.0.0.11"} 1 + boot_ip_setter_interface_address{interface="mock",ipv4="10.0.0.12"} 1 + ` + err = testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_address") + Expect(err).NotTo(HaveOccurred()) + }) + + It("should count up error metrics", func() { + expected := ` + # HELP boot_ip_setter_interface_operation_errors_total The number of times the interface operation failed. + # TYPE boot_ip_setter_interface_operation_errors_total counter + boot_ip_setter_interface_operation_errors_total 0 + ` + err := testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_operation_errors_total") + Expect(err).NotTo(HaveOccurred()) + + errorCounter.Add(1) + expected = ` + # HELP boot_ip_setter_interface_operation_errors_total The number of times the interface operation failed. + # TYPE boot_ip_setter_interface_operation_errors_total counter + boot_ip_setter_interface_operation_errors_total 1 + ` + err = testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_operation_errors_total") + Expect(err).NotTo(HaveOccurred()) + + errorCounter.Add(3) + expected = ` + # HELP boot_ip_setter_interface_operation_errors_total The number of times the interface operation failed. + # TYPE boot_ip_setter_interface_operation_errors_total counter + boot_ip_setter_interface_operation_errors_total 4 + ` + err = testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_operation_errors_total") + Expect(err).NotTo(HaveOccurred()) + + // It will be count up, when NetworkInterface returns error in the metrics collector. + netif.err = errors.New("metrics test") + expected = ` + # HELP boot_ip_setter_interface_operation_errors_total The number of times the interface operation failed. + # TYPE boot_ip_setter_interface_operation_errors_total counter + boot_ip_setter_interface_operation_errors_total 5 + ` + err = testutil.CollectAndCompare(collector, strings.NewReader(expected), "boot_ip_setter_interface_operation_errors_total") + Expect(err).NotTo(HaveOccurred()) + }) +}) diff --git a/pkg/boot-ip-setter/mock_test.go b/pkg/boot-ip-setter/mock_test.go new file mode 100644 index 000000000..d59e3aa04 --- /dev/null +++ b/pkg/boot-ip-setter/mock_test.go @@ -0,0 +1,44 @@ +package main + +type mockNetIF struct { + addrs []string + called []string + err error +} + +func (n *mockNetIF) Name() string { + return "mock" +} + +func (n *mockNetIF) Up() error { + n.called = append(n.called, "up") + return n.err +} + +func (n *mockNetIF) Down() error { + n.called = append(n.called, "down") + return n.err +} + +func (n *mockNetIF) ListAddrs() ([]string, error) { + n.called = append(n.called, "list") + if n.err != nil { + return nil, n.err + } + return n.addrs, nil +} + +func (n *mockNetIF) AddAddr(addr string) error { + n.called = append(n.called, "add:"+addr) + return n.err +} + +func (n *mockNetIF) DeleteAddr(addr string) error { + n.called = append(n.called, "delete:"+addr) + return n.err +} + +func (n *mockNetIF) DeleteAllAddr() error { + n.called = append(n.called, "deleteAll") + return n.err +} diff --git a/pkg/boot-ip-setter/netif.go b/pkg/boot-ip-setter/netif.go new file mode 100644 index 000000000..bd95a5a0c --- /dev/null +++ b/pkg/boot-ip-setter/netif.go @@ -0,0 +1,133 @@ +package main + +import ( + "fmt" + "slices" + + "github.com/vishvananda/netlink" +) + +type NetworkInterface interface { + Name() string + Up() error + Down() error + ListAddrs() ([]string, error) + AddAddr(addr string) error + DeleteAddr(addr string) error + DeleteAllAddr() error +} + +type networkInterface struct { + linkName string +} + +func NewInterface(linkName string) NetworkInterface { + return &networkInterface{ + linkName: linkName, + } +} + +func (n *networkInterface) Name() string { + return n.linkName +} + +func (n *networkInterface) Up() error { + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return fmt.Errorf("failed to find %s: %w", n.linkName, err) + } + + err = netlink.LinkSetUp(link) + if err != nil { + return fmt.Errorf("failed to up %s: %w", n.linkName, err) + } + return nil +} + +func (n *networkInterface) Down() error { + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return fmt.Errorf("failed to find %s: %w", n.linkName, err) + } + + err = netlink.LinkSetDown(link) + if err != nil { + return fmt.Errorf("failed to down %s: %w", n.linkName, err) + } + return nil +} + +func (n *networkInterface) ListAddrs() ([]string, error) { + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return nil, fmt.Errorf("failed to find %s: %w", n.linkName, err) + } + + addrList, err := netlink.AddrList(link, netlink.FAMILY_V4) + if err != nil { + return nil, fmt.Errorf("failed to list addresses on %s: %w", n.linkName, err) + } + + ret := []string{} + for _, a := range addrList { + ret = append(ret, a.IP.String()) + } + slices.Sort(ret) + return ret, nil +} + +func (n *networkInterface) AddAddr(addr string) error { + a, err := netlink.ParseAddr(addr + "/32") + if err != nil { + return err + } + + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return fmt.Errorf("failed to find %s: %w", n.linkName, err) + } + + err = netlink.AddrAdd(link, a) + if err != nil { + return fmt.Errorf("failed to add %s to %s: %w", a.IP.String(), n.linkName, err) + } + return nil +} + +func (n *networkInterface) DeleteAddr(addr string) error { + a, err := netlink.ParseAddr(addr + "/32") + if err != nil { + return err + } + + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return fmt.Errorf("failed to find %s: %w", n.linkName, err) + } + + err = netlink.AddrDel(link, a) + if err != nil { + return fmt.Errorf("failed to delete %s from %s: %w", a.IP.String(), n.linkName, err) + } + return nil +} + +func (n *networkInterface) DeleteAllAddr() error { + link, err := netlink.LinkByName(n.linkName) + if err != nil { + return err + } + + addrList, err := netlink.AddrList(link, netlink.FAMILY_V4) + if err != nil { + return fmt.Errorf("failed to list addresses on %s: %w", n.linkName, err) + } + + for _, a := range addrList { + err = netlink.AddrDel(link, &a) + if err != nil { + return fmt.Errorf("failed to delete %s from %s: %w", a.IP.String(), n.linkName, err) + } + } + return nil +} diff --git a/pkg/boot-ip-setter/netif_test.go b/pkg/boot-ip-setter/netif_test.go new file mode 100644 index 000000000..3b0d1c631 --- /dev/null +++ b/pkg/boot-ip-setter/netif_test.go @@ -0,0 +1,396 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("NetworkInterface", func() { + Context("operating interface", Ordered, func() { + // This test operates actual network interfaces on the host. + // If that is okay with you, please run this test with the environment variable "RUN_NETIF_TEST=yes". + // And this test cannot run on a Cicle CI instance. Please run it manually. + skip := true + if os.Getenv("RUN_NETIF_TEST") == "yes" { + skip = false + } + + const testInterface1 = "test-netif-1" + + BeforeEach(func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + DeferCleanup(func() { + ipLinkDelete(testInterface1) // ignore error + }) + + By("setting up target network interface") + err := ipLinkAdd(testInterface1) + Expect(err).NotTo(HaveOccurred()) + + ret, err := ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret).To(HaveLen(1)) + Expect(ret[0].State).To(Equal("DOWN")) + Expect(ret[0].Addrs).To(HaveLen(0)) + }) + + It("should return correct information", func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + By("getting interface name") + netif := NewInterface(testInterface1) + Expect(netif.Name()).To(Equal(testInterface1)) + + By("listing addresses") + addrs, err := netif.ListAddrs() + Expect(err).NotTo(HaveOccurred()) + Expect(addrs).To(BeEmpty()) + + By("setting addresses by ip command") + err = ipAddressAdd(testInterface1, "192.168.0.100") + Expect(err).NotTo(HaveOccurred()) + err = ipAddressAdd(testInterface1, "192.168.0.101") + Expect(err).NotTo(HaveOccurred()) + + By("listing addresses") + addrs, err = netif.ListAddrs() + Expect(err).NotTo(HaveOccurred()) + Expect(addrs).To(ConsistOf("192.168.0.100", "192.168.0.101")) + }) + + It("should change operational state", func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + netif := NewInterface(testInterface1) + + By("enabling interface") + err := netif.Up() + Expect(err).NotTo(HaveOccurred()) + + ret, err := ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret).To(HaveLen(1)) + // The operational state becomes "UNKNOWN", when a dummy interface is up. + // https://serverfault.com/questions/629676/dummy-network-interface-in-linux + Expect(ret[0].State).To(Equal("UNKNOWN")) + + By("enabling interface again") + err = netif.Up() + Expect(err).NotTo(HaveOccurred()) + + ret, err = ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret).To(HaveLen(1)) + Expect(ret[0].State).To(Equal("UNKNOWN")) + + By("disabling interface") + err = netif.Down() + Expect(err).NotTo(HaveOccurred()) + + ret, err = ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret).To(HaveLen(1)) + Expect(ret[0].State).To(Equal("DOWN")) + + By("disabling interface again") + err = netif.Down() + Expect(err).NotTo(HaveOccurred()) + + ret, err = ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret).To(HaveLen(1)) + Expect(ret[0].State).To(Equal("DOWN")) + }) + + It("should add address", func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + netif := NewInterface(testInterface1) + + By("adding address") + err := netif.AddAddr("192.168.0.100") + Expect(err).NotTo(HaveOccurred()) + + ret, err := ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret[0].ipv4Addrs()).To(ConsistOf("192.168.0.100")) + + By("adding another address") + err = netif.AddAddr("192.168.0.101") + Expect(err).NotTo(HaveOccurred()) + + ret, err = ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret[0].ipv4Addrs()).To(ConsistOf("192.168.0.100", "192.168.0.101")) + + By("adding address that has already set") // error case + err = netif.AddAddr("192.168.0.101") + Expect(err).To(HaveOccurred()) + }) + + It("should delete address", func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + By("setting addresses by ip command") + err := ipAddressAdd(testInterface1, "192.168.0.100") + Expect(err).NotTo(HaveOccurred()) + err = ipAddressAdd(testInterface1, "192.168.0.101") + Expect(err).NotTo(HaveOccurred()) + + netif := NewInterface(testInterface1) + + By("deleting address") + err = netif.DeleteAddr("192.168.0.100") + Expect(err).NotTo(HaveOccurred()) + + ret, err := ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret[0].ipv4Addrs()).To(ConsistOf("192.168.0.101")) + + By("deleting address again") // error case + err = netif.DeleteAddr("192.168.0.100") + Expect(err).To(HaveOccurred()) + }) + + It("should delete all addresses", func() { + if skip { + Skip("RUN_NETIF_TEST is not set") + } + + By("setting addresses by ip command") + err := ipAddressAdd(testInterface1, "192.168.0.100") + Expect(err).NotTo(HaveOccurred()) + err = ipAddressAdd(testInterface1, "192.168.0.101") + Expect(err).NotTo(HaveOccurred()) + + netif := NewInterface(testInterface1) + + By("deleting all addresses") + err = netif.DeleteAllAddr() + Expect(err).NotTo(HaveOccurred()) + + ret, err := ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret[0].ipv4Addrs()).To(BeEmpty()) + + By("deleting all addresses again") + err = netif.DeleteAllAddr() + Expect(err).NotTo(HaveOccurred()) + + ret, err = ipAddressShow(testInterface1) + Expect(err).NotTo(HaveOccurred()) + Expect(ret[0].ipv4Addrs()).To(BeEmpty()) + }) + }) + + Context("interface does not exist", Ordered, func() { + It("should return error", func() { + netif := NewInterface("test-netif-notfound") + + err := netif.Up() + Expect(err).To(HaveOccurred()) + + err = netif.Down() + Expect(err).To(HaveOccurred()) + + addrs, err := netif.ListAddrs() + Expect(err).To(HaveOccurred()) + Expect(addrs).To(BeEmpty()) + + err = netif.AddAddr("192.168.0.100") + Expect(err).To(HaveOccurred()) + + err = netif.DeleteAddr("192.168.0.100") + Expect(err).To(HaveOccurred()) + + err = netif.DeleteAllAddr() + Expect(err).To(HaveOccurred()) + }) + }) + +}) + +func ipLinkAdd(name string) error { + _, err := exec.Command("ip", "link", "add", name, "type", "dummy").Output() + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return fmt.Errorf("%v, %s", err, string(exitErr.Stderr)) + } + return err +} + +func ipLinkDelete(name string) error { + _, err := exec.Command("ip", "link", "delete", name).Output() + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return fmt.Errorf("%v, %s", err, string(exitErr.Stderr)) + } + return err +} + +func ipAddressAdd(name, addr string) error { + _, err := exec.Command("ip", "address", "add", addr, "dev", name).Output() + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return fmt.Errorf("%v, %s", err, string(exitErr.Stderr)) + } + return err +} + +func ipAddressShow(name string) ([]*ipAddressShowResult, error) { + out, err := exec.Command("ip", "-j", "address", "show", name).Output() + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return nil, fmt.Errorf("%w, %s", err, string(exitErr.Stderr)) + } + var ret []*ipAddressShowResult + err = json.Unmarshal(out, &ret) + if err != nil { + return nil, err + } + return ret, nil +} + +type ipAddressShowResult struct { + State string `json:"operstate"` + Addrs []*addrInfo `json:"addr_info"` +} + +type addrInfo struct { + Family string `json:"family"` + Local string `json:"local"` +} + +func (r *ipAddressShowResult) ipv4Addrs() []string { + if r == nil { + return nil + } + ret := []string{} + for _, a := range r.Addrs { + if a.Family == "inet" { + ret = append(ret, a.Local) + } + } + return ret +} + +// NOTE: The outputs of the ip command are as follows. + +// (1) After creating a dummy interface. +// +// $ sudo ip link add test type dummy +// $ ip -j address show test | jq . +// [ +// { +// "ifindex": 14, +// "ifname": "test", +// "flags": [ +// "BROADCAST", +// "NOARP" +// ], +// "mtu": 1500, +// "qdisc": "noop", +// "operstate": "DOWN", +// "group": "default", +// "txqlen": 1000, +// "link_type": "ether", +// "address": "c2:55:1b:fc:8c:9c", +// "broadcast": "ff:ff:ff:ff:ff:ff", +// "addr_info": [] +// } +// ] + +// (2) After adding an address to the interface. +// +// $ sudo ip address add 192.168.0.99 dev test +// $ ip -j address show test | jq . +// [ +// { +// "ifindex": 14, +// "ifname": "test", +// "flags": [ +// "BROADCAST", +// "NOARP" +// ], +// "mtu": 1500, +// "qdisc": "noop", +// "operstate": "DOWN", +// "group": "default", +// "txqlen": 1000, +// "link_type": "ether", +// "address": "c2:55:1b:fc:8c:9c", +// "broadcast": "ff:ff:ff:ff:ff:ff", +// "addr_info": [ +// { +// "family": "inet", +// "local": "192.168.0.99", +// "prefixlen": 32, +// "scope": "global", +// "label": "test", +// "valid_life_time": 4294967295, +// "preferred_life_time": 4294967295 +// } +// ] +// } +// ] + +// (3) After enabling the interface. +// +// $ sudo ip link set test up +// $ ip -j address show test | jq . +// [ +// { +// "ifindex": 14, +// "ifname": "test", +// "flags": [ +// "BROADCAST", +// "NOARP", +// "UP", +// "LOWER_UP" +// ], +// "mtu": 1500, +// "qdisc": "noqueue", +// "operstate": "UNKNOWN", +// "group": "default", +// "txqlen": 1000, +// "link_type": "ether", +// "address": "c2:55:1b:fc:8c:9c", +// "broadcast": "ff:ff:ff:ff:ff:ff", +// "addr_info": [ +// { +// "family": "inet", +// "local": "192.168.0.99", +// "prefixlen": 32, +// "scope": "global", +// "label": "test", +// "valid_life_time": 4294967295, +// "preferred_life_time": 4294967295 +// }, +// { +// "family": "inet6", +// "local": "fe80::c055:1bff:fefc:8c9c", +// "prefixlen": 64, +// "scope": "link", +// "valid_life_time": 4294967295, +// "preferred_life_time": 4294967295 +// } +// ] +// } +// ] diff --git a/pkg/boot-ip-setter/suites_test.go b/pkg/boot-ip-setter/suites_test.go new file mode 100644 index 000000000..c44b5a3ca --- /dev/null +++ b/pkg/boot-ip-setter/suites_test.go @@ -0,0 +1,13 @@ +package main + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestBootIPSetter(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "boot-ip-setter") +} From 18adb1c1cffe29390f9177a7d478833c7aedaa1e Mon Sep 17 00:00:00 2001 From: Masayuki Ishii Date: Thu, 23 Jan 2025 15:13:54 +0900 Subject: [PATCH 2/3] menu: update dhcp relay setting Signed-off-by: Masayuki Ishii --- menu/cluster_yaml_generator.go | 9 +++----- menu/testdata/cluster.yml | 40 ++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/menu/cluster_yaml_generator.go b/menu/cluster_yaml_generator.go index 8d089f4d6..1762f8f60 100644 --- a/menu/cluster_yaml_generator.go +++ b/menu/cluster_yaml_generator.go @@ -4,6 +4,7 @@ import ( "fmt" "io" + "github.com/cybozu-go/neco" "github.com/cybozu-go/placemat/v2/pkg/types" "k8s.io/apimachinery/pkg/runtime/serializer/json" "sigs.k8s.io/yaml" @@ -487,12 +488,8 @@ func (c *Cluster) createToRNetNs(rack *rack, tor *tor, torIdx int) *types.NetNSS fmt.Sprintf("--pid-file=/var/run/dnsmasq_%s.pid", name), "--log-facility=-", } - for _, r := range c.racks { - if r.name == rack.name { - continue - } - dnsmasqCommand = append(dnsmasqCommand, "--dhcp-relay") - dnsmasqCommand = append(dnsmasqCommand, fmt.Sprintf("%s,%s", tor.nodeAddress.IP.String(), r.bootNode.node0Address.IP.String())) + for _, ip := range neco.DHCPServerAddressList { + dnsmasqCommand = append(dnsmasqCommand, "--dhcp-relay", fmt.Sprintf("%s,%s", tor.nodeAddress.IP.String(), ip)) } torNs.Apps = append(torNs.Apps, &types.NetNSAppSpec{ Name: "dnsmasq", diff --git a/menu/testdata/cluster.yml b/menu/testdata/cluster.yml index b38d38df0..145939377 100644 --- a/menu/testdata/cluster.yml +++ b/menu/testdata/cluster.yml @@ -519,7 +519,15 @@ apps: - --pid-file=/var/run/dnsmasq_rack0-tor1.pid - --log-facility=- - --dhcp-relay - - 10.69.0.65,10.69.0.195 + - 10.69.0.65,10.71.255.1 + - --dhcp-relay + - 10.69.0.65,10.71.255.2 + - --dhcp-relay + - 10.69.0.65,10.71.255.3 + - --dhcp-relay + - 10.69.0.65,10.71.255.4 + - --dhcp-relay + - 10.69.0.65,10.71.255.5 interfaces: - addresses: - 10.72.1.1/31 @@ -549,7 +557,15 @@ apps: - --pid-file=/var/run/dnsmasq_rack0-tor2.pid - --log-facility=- - --dhcp-relay - - 10.69.0.129,10.69.0.195 + - 10.69.0.129,10.71.255.1 + - --dhcp-relay + - 10.69.0.129,10.71.255.2 + - --dhcp-relay + - 10.69.0.129,10.71.255.3 + - --dhcp-relay + - 10.69.0.129,10.71.255.4 + - --dhcp-relay + - 10.69.0.129,10.71.255.5 interfaces: - addresses: - 10.72.1.3/31 @@ -579,7 +595,15 @@ apps: - --pid-file=/var/run/dnsmasq_rack1-tor1.pid - --log-facility=- - --dhcp-relay - - 10.69.1.1,10.69.0.3 + - 10.69.1.1,10.71.255.1 + - --dhcp-relay + - 10.69.1.1,10.71.255.2 + - --dhcp-relay + - 10.69.1.1,10.71.255.3 + - --dhcp-relay + - 10.69.1.1,10.71.255.4 + - --dhcp-relay + - 10.69.1.1,10.71.255.5 interfaces: - addresses: - 10.72.1.5/31 @@ -609,7 +633,15 @@ apps: - --pid-file=/var/run/dnsmasq_rack1-tor2.pid - --log-facility=- - --dhcp-relay - - 10.69.1.65,10.69.0.3 + - 10.69.1.65,10.71.255.1 + - --dhcp-relay + - 10.69.1.65,10.71.255.2 + - --dhcp-relay + - 10.69.1.65,10.71.255.3 + - --dhcp-relay + - 10.69.1.65,10.71.255.4 + - --dhcp-relay + - 10.69.1.65,10.71.255.5 interfaces: - addresses: - 10.72.1.7/31 From 3aecfb3fa347ed34ff658ff29ad4053f3e80c0a6 Mon Sep 17 00:00:00 2001 From: Masayuki Ishii Date: Fri, 24 Jan 2025 17:15:06 +0900 Subject: [PATCH 3/3] Fixed startup and shutdown messages --- pkg/boot-ip-setter/main.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/boot-ip-setter/main.go b/pkg/boot-ip-setter/main.go index ac899248e..dec9224f4 100644 --- a/pkg/boot-ip-setter/main.go +++ b/pkg/boot-ip-setter/main.go @@ -45,12 +45,12 @@ func main() { ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) defer stop() - logger.Info("hello!", "interface", *flagInterface, "interval", *flagInterval, "listen address", *flagListenAddress) + logger.Info("boot-ip-setter has started", "interface", *flagInterface, "interval", *flagInterval, "listen address", *flagListenAddress) netif := NewInterface(*flagInterface) err := subMain(ctx, logger, netif, *flagInterval, *flagListenAddress) if err != nil && !errors.Is(err, context.Canceled) { - logger.Error("error exit", "error", err) + logger.Error("boot-ip-setter has finished abnormally", "error", err) // delete all addresses if this program ends abnormally err := netif.DeleteAllAddr() @@ -65,7 +65,7 @@ func main() { os.Exit(1) } - logger.Info("bye!") + logger.Info("boot-ip-setter has finished") } func subMain(ctx context.Context, logger *slog.Logger, netif NetworkInterface, interval time.Duration, listenAddr string) error {