Skip to content

Commit

Permalink
Service checks
Browse files Browse the repository at this point in the history
  • Loading branch information
claudiubelu committed Dec 6, 2024
1 parent bb2be37 commit d1c6db8
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 4 deletions.
23 changes: 22 additions & 1 deletion src/k8s/pkg/k8sd/app/hooks_bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,17 @@ func (a *App) onBootstrapWorkerNode(ctx context.Context, s state.State, encodedT
return fmt.Errorf("failed after retry: %w", err)
}

log.Info("Checking worker node services")
if err := control.RetryFor(ctx, 3, 5*time.Second, func() error {
if err := snaputil.CheckWorkerServicesStates(ctx, snap, "active"); err != nil {
return fmt.Errorf("failed to ensure all worker services are active: %w", err)
}
return nil
}); err != nil {
return fmt.Errorf("failed after retry: %w", err)
}

log.Info("Worker node services are ready")
return nil
}

Expand Down Expand Up @@ -508,8 +519,18 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s state.State, bootst
if err := waitApiServerReady(ctx, snap); err != nil {
return fmt.Errorf("kube-apiserver did not become ready in time: %w", err)
}
log.Info("API server is ready - notify controllers")

log.Info("API server is ready - checking control plane services")
if err := control.RetryFor(ctx, 3, 5*time.Second, func() error {
if err := snaputil.CheckControlPlaneServicesStates(ctx, snap, "active"); err != nil {
return fmt.Errorf("failed to ensure all control plane services are active: %w", err)
}
return nil
}); err != nil {
return fmt.Errorf("failed after retry: %w", err)
}

log.Info("Control plane services are ready - notify controllers")
a.NotifyFeatureController(
cfg.Network.GetEnabled(),
cfg.Gateway.GetEnabled(),
Expand Down
12 changes: 12 additions & 0 deletions src/k8s/pkg/k8sd/app/hooks_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/canonical/k8s/pkg/k8sd/setup"
"github.com/canonical/k8s/pkg/k8sd/types"
"github.com/canonical/k8s/pkg/log"
snaputil "github.com/canonical/k8s/pkg/snap/util"
"github.com/canonical/k8s/pkg/utils"
"github.com/canonical/k8s/pkg/utils/control"
"github.com/canonical/k8s/pkg/utils/experimental/snapdconfig"
Expand Down Expand Up @@ -236,5 +237,16 @@ func (a *App) onPostJoin(ctx context.Context, s state.State, initConfig map[stri
return fmt.Errorf("failed to wait for kube-apiserver to become ready: %w", err)
}

log.Info("API server is ready - checking control plane services")
if err := control.RetryFor(ctx, 3, 5*time.Second, func() error {
if err := snaputil.CheckControlPlaneServicesStates(ctx, snap, "active"); err != nil {
return fmt.Errorf("failed to ensure all control plane services are active: %w", err)
}
return nil
}); err != nil {
return fmt.Errorf("failed after retry: %w", err)
}

log.Info("Control plane node services are ready")
return nil
}
7 changes: 4 additions & 3 deletions src/k8s/pkg/snap/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ type Snap interface {
GID() int // GID is the group ID to set on config files.
Hostname() string // Hostname is the name of the node.

StartService(ctx context.Context, serviceName string) error // snapctl start $service
StopService(ctx context.Context, serviceName string) error // snapctl stop $service
RestartService(ctx context.Context, serviceName string) error // snapctl restart $service
StartService(ctx context.Context, serviceName string) error // snapctl start $service
StopService(ctx context.Context, serviceName string) error // snapctl stop $service
RestartService(ctx context.Context, serviceName string) error // snapctl restart $service
GetServiceState(ctx context.Context, serviceName string) (string, error) // snapctl services $service

SnapctlGet(ctx context.Context, args ...string) ([]byte, error) // snapctl get $args...
SnapctlSet(ctx context.Context, args ...string) error // snapctl set $args...
Expand Down
4 changes: 4 additions & 0 deletions src/k8s/pkg/snap/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ func (s *Snap) RestartService(ctx context.Context, name string) error {
return s.RestartServiceErr
}

func (s *Snap) GetServiceState(ctx context.Context, name string) (string, error) {
return "active", nil
}

func (s *Snap) Refresh(ctx context.Context, opts types.RefreshOpts) (string, error) {
if len(s.RefreshCalledWith) == 0 {
s.RefreshCalledWith = []types.RefreshOpts{opts}
Expand Down
27 changes: 27 additions & 0 deletions src/k8s/pkg/snap/pebble.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package snap

import (
"bytes"
"context"
"fmt"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/canonical/k8s/pkg/k8sd/types"
Expand Down Expand Up @@ -56,6 +58,31 @@ func (s *pebble) RestartService(ctx context.Context, name string) error {
return s.runCommand(ctx, []string{filepath.Join(s.snapDir, "bin", "pebble"), "restart", name})
}

// GetServiceState returns a k8s service state. The name can be either prefixed or not.
func (s *pebble) GetServiceState(ctx context.Context, name string) (string, error) {
var b bytes.Buffer
err := s.runCommand(ctx, []string{filepath.Join(s.snapDir, "bin", "pebble"), "services", name}, func(c *exec.Cmd) { c.Stdout = &b })
if err != nil {
return "", err
}

output := b.String()
// We're expecting output like this:
// Service Startup Current Since
// kubelet enabled inactive -
lines := strings.Split(output, "\n")
if len(lines) < 2 {
return "", fmt.Errorf("Unexpected output when checking service %s state", name)
}

fields := strings.Fields(lines[1])
if len(fields) < 3 || (!strings.EqualFold(stateActive, fields[2]) && !strings.EqualFold(stateInactive, fields[2])) {
return "", fmt.Errorf("Unexpected output when checking service %s state", name)
}

return fields[2], nil
}

func (s *pebble) Refresh(ctx context.Context, to types.RefreshOpts) (string, error) {
switch {
case to.Revision != "":
Expand Down
32 changes: 32 additions & 0 deletions src/k8s/pkg/snap/snap.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ import (
"k8s.io/cli-runtime/pkg/genericclioptions"
)

const (
stateActive = "active"
stateInactive = "inactive"
)

type SnapOpts struct {
SnapInstanceName string
SnapDir string
Expand Down Expand Up @@ -85,6 +90,33 @@ func (s *snap) RestartService(ctx context.Context, name string) error {
return s.runCommand(ctx, []string{"snapctl", "restart", serviceName(name)})
}

// GetServiceState returns a k8s service state. The name can be either prefixed or not.
func (s *snap) GetServiceState(ctx context.Context, name string) (string, error) {
log.FromContext(ctx).V(2).WithCallDepth(1).Info("Getting service state", "service", name)

var b bytes.Buffer
err := s.runCommand(ctx, []string{"snapctl", "services", serviceName(name)}, func(c *exec.Cmd) { c.Stdout = &b })
if err != nil {
return "", err
}

output := b.String()
// We're expecting output like this:
// Service Startup Current Notes
// k8s.kubelet enabled inactive -
lines := strings.Split(output, "\n")
if len(lines) < 2 {
return "", fmt.Errorf("Unexpected output when checking service %s state", name)
}

fields := strings.Fields(lines[1])
if len(fields) < 3 || (!strings.EqualFold(stateActive, fields[2]) && !strings.EqualFold(stateInactive, fields[2])) {
return "", fmt.Errorf("Unexpected output when checking service %s state", name)
}

return fields[2], nil
}

// Refresh refreshes the snap to a different track, revision or custom snap.
func (s *snap) Refresh(ctx context.Context, to types.RefreshOpts) (string, error) {
if s.Strict() {
Expand Down
29 changes: 29 additions & 0 deletions src/k8s/pkg/snap/util/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package snaputil
import (
"context"
"fmt"
"strings"

"github.com/canonical/k8s/pkg/snap"
)
Expand Down Expand Up @@ -98,6 +99,34 @@ func StopK8sDqliteServices(ctx context.Context, snap snap.Snap) error {
return nil
}

// CheckWorkerServicesStates checks if the worker services are in the expected state.
// CheckWorkerServicesStates will return on the first error or service state mismatch.
func CheckWorkerServicesStates(ctx context.Context, snap snap.Snap, state string) error {
return checkServicesStates(ctx, snap, workerServices, state)
}

// CheckControlPlaneServicesStates checks if the control plane services are in the expected state.
// CheckControlPlaneServicesStates will return on the first error or service state mismatch.
func CheckControlPlaneServicesStates(ctx context.Context, snap snap.Snap, state string) error {
return checkServicesStates(ctx, snap, controlPlaneServices, state)
}

// CheckK8sDqliteServices checks if the k8s-dqlite datastore service is in the expected state.
func CheckK8sDqliteServices(ctx context.Context, snap snap.Snap, state string) error {
return checkServicesStates(ctx, snap, []string{"k8s-dqlite"}, state)
}

func checkServicesStates(ctx context.Context, snap snap.Snap, services []string, state string) error {
for _, service := range services {
if actualState, err := snap.GetServiceState(ctx, service); err != nil {
return fmt.Errorf("failed to check service %s: %w", service, err)
} else if !strings.EqualFold(state, actualState) {
return fmt.Errorf("expected %s to be in state %s, but it is %s", service, state, actualState)
}
}
return nil
}

// ServiceArgsFromMap processes a map of string pointers and categorizes them into update and delete lists.
// - If the value pointer is nil, it adds the argument name to the delete list.
// - If the value pointer is not nil, it adds the argument and its value to the update map.
Expand Down

0 comments on commit d1c6db8

Please sign in to comment.