Skip to content

Commit

Permalink
chore: support debug shell for advanced development
Browse files Browse the repository at this point in the history
Support dropping into a very minimal debug shell.

```bash
sudo -E --preserve-env=HOME _out/talosctl-linux-amd64 cluster create --provisioner=qemu $REGISTRY_MIRROR_FLAGS --controlplanes=1 --workers=0 --with-bootloader=false --with-debug-shell
```

Co-authored-by: Dmitry Sharshakov <[email protected]>
Signed-off-by: Noel Georgi <[email protected]>
Signed-off-by: Dmitry Sharshakov <[email protected]>
  • Loading branch information
frezbo and dsseng committed Oct 19, 2024
1 parent c14b446 commit 1b22df4
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 0 deletions.
34 changes: 34 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ ARG TOOLS
ARG PKGS
ARG EXTRAS
ARG INSTALLER_ARCH
ARG DEBUG_TOOLS_SOURCE

ARG PKGS_PREFIX
ARG PKG_FHS
Expand Down Expand Up @@ -42,6 +43,8 @@ ARG PKG_CNI
ARG PKG_FLANNEL_CNI
ARG PKG_TALOSCTL_CNI_BUNDLE_INSTALL

ARG DEBUG_TOOLS_SOURCE

# Resolve package images using ${PKGS} to be used later in COPY --from=.

FROM ${PKG_FHS} AS pkg-fhs
Expand Down Expand Up @@ -140,6 +143,29 @@ FROM ${PKG_KERNEL} AS pkg-kernel
FROM --platform=amd64 ${PKG_KERNEL} AS pkg-kernel-amd64
FROM --platform=arm64 ${PKG_KERNEL} AS pkg-kernel-arm64

FROM --platform=amd64 ${TOOLS} as tools-amd64
FROM --platform=arm64 ${TOOLS} as tools-arm64

FROM scratch as pkg-debug-tools-scratch-amd64
FROM scratch as pkg-debug-tools-scratch-arm64

FROM scratch as pkg-debug-tools-bash-minimal-amd64
COPY --from=tools-amd64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-amd64 /toolchain/lib/ld-musl-x86_64.so.1 /toolchain/toolchain/lib/ld-musl-x86_64.so.1
COPY --from=tools-amd64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-amd64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-amd64 /toolchain/bin/tee /toolchain/bin/tee

FROM scratch as pkg-debug-tools-bash-minimal-arm64
COPY --from=tools-arm64 /toolchain/bin/bash /toolchain/bin/bash
COPY --from=tools-arm64 /toolchain/lib/ld-musl-aarch64.so.1 /toolchain/toolchain/lib/ld-musl-aarch64.so.1
COPY --from=tools-arm64 /toolchain/bin/cat /toolchain/bin/cat
COPY --from=tools-arm64 /toolchain/bin/ls /toolchain/bin/ls
COPY --from=tools-arm64 /toolchain/bin/tee /toolchain/bin/tee

FROM pkg-debug-tools-${DEBUG_TOOLS_SOURCE}-amd64 as pkg-debug-tools-amd64
FROM pkg-debug-tools-${DEBUG_TOOLS_SOURCE}-arm64 as pkg-debug-tools-arm64

# Strip CNI package.

FROM scratch AS pkg-cni-stripped-amd64
Expand Down Expand Up @@ -651,6 +677,10 @@ COPY --link --from=pkg-kmod-amd64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-amd64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-amd64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-amd64 /machined /rootfs/sbin/init

# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-amd64 * /rootfs/

RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff
Expand Down Expand Up @@ -721,6 +751,10 @@ COPY --link --from=pkg-kmod-arm64 /usr/lib/libkmod.* /rootfs/lib/
COPY --link --from=pkg-kmod-arm64 /usr/bin/kmod /rootfs/sbin/modprobe
COPY --link --from=modules-arm64 /lib/modules /rootfs/lib/modules
COPY --link --from=machined-build-arm64 /machined /rootfs/sbin/init

# this is a no-op as it copies from a scratch image when WITH_DEBUG_SHELL is not set
COPY --link --from=pkg-debug-tools-arm64 * /rootfs/

RUN <<END
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
ln /rootfs/sbin/init /rootfs/sbin/poweroff
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ CI_RELEASE_TAG := $(shell git log --oneline --format=%B -n 1 HEAD^2 -- 2>/dev/nu
ARTIFACTS := _out
TOOLS ?= ghcr.io/siderolabs/tools:v1.9.0-alpha.0-4-g2058296

DEBUG_TOOLS_SOURCE := scratch

PKGS_PREFIX ?= ghcr.io/siderolabs
PKGS ?= v1.9.0-alpha.0-24-gbe92da0
EXTRAS ?= v1.9.0-alpha.0-1-geab6e58
Expand Down Expand Up @@ -147,6 +149,11 @@ else
GO_LDFLAGS += -s -w
endif

ifneq (, $(filter $(WITH_DEBUG_SHELL), t true TRUE y yes 1))
# bash-minimal is a Dockerfile target that copies over the bash from siderolabs tools
DEBUG_TOOLS_SOURCE := bash-minimal
endif

GO_BUILDFLAGS_TALOSCTL := $(GO_BUILDFLAGS) -tags "$(GO_BUILDTAGS_TALOSCTL)"
GO_BUILDFLAGS += -tags "$(GO_BUILDTAGS)"

Expand All @@ -161,6 +168,7 @@ COMMON_ARGS += --progress=$(PROGRESS)
COMMON_ARGS += --platform=$(PLATFORM)
COMMON_ARGS += --push=$(PUSH)
COMMON_ARGS += --build-arg=TOOLS=$(TOOLS)
COMMON_ARGS += --build-arg=DEBUG_TOOLS_SOURCE=$(DEBUG_TOOLS_SOURCE)
COMMON_ARGS += --build-arg=PKGS=$(PKGS)
COMMON_ARGS += --build-arg=EXTRAS=$(EXTRAS)
COMMON_ARGS += --build-arg=GOFUMPT_VERSION=$(GOFUMPT_VERSION)
Expand Down
26 changes: 26 additions & 0 deletions cmd/talosctl/cmd/mgmt/cluster/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const (
controlPlanePortFlag = "control-plane-port"
firewallFlag = "with-firewall"
tpm2EnabledFlag = "with-tpm2"
withDebugShellFlag = "with-debug-shell"

// The following flags are the gen options - the options that are only used in machine configuration (i.e., not during the qemu/docker provisioning).
// They are not applicable when no machine configuration is generated, hence mutually exclusive with the --input-dir flag.
Expand Down Expand Up @@ -190,6 +191,7 @@ var (
withUUIDHostnames bool
withSiderolinkAgent agentFlag
withJSONLogs bool
debugShellEnabled bool
)

// createCmd represents the cluster up command.
Expand Down Expand Up @@ -470,13 +472,20 @@ func create(ctx context.Context) error {
provision.WithBootlader(bootloaderEnabled),
provision.WithUEFI(uefiEnabled),
provision.WithTPM2(tpm2Enabled),
provision.WithDebugShell(debugShellEnabled),
provision.WithExtraUEFISearchPaths(extraUEFISearchPaths),
provision.WithTargetArch(targetArch),
provision.WithSiderolinkAgent(withSiderolinkAgent.IsEnabled()),
}

var configBundleOpts []bundle.Option

if debugShellEnabled {
if provisionerName != "qemu" {
return errors.New("debug shell only supported with qemu provisioner")
}
}

if ports != "" {
if provisionerName != docker {
return errors.New("exposed-ports flag only supported with docker provisioner")
Expand Down Expand Up @@ -968,6 +977,21 @@ func create(ctx context.Context) error {
return err
}

if debugShellEnabled {
fmt.Println("You can now connect to debug shell on any node using these commands:")

for _, node := range request.Nodes {
talosDir, err := clientconfig.GetTalosDirectory()
if err != nil {
return nil
}

fmt.Printf("socat - UNIX-CONNECT:%s\n", filepath.Join(talosDir, "clusters", clusterName, node.Name+".serial"))
}

return nil
}

// No talosconfig in the bundle - skip the operations below
if bundleTalosconfig == nil {
return nil
Expand Down Expand Up @@ -1206,6 +1230,8 @@ func init() {
createCmd.Flags().BoolVar(&bootloaderEnabled, bootloaderEnabledFlag, true, "enable bootloader to load kernel and initramfs from disk image after install")
createCmd.Flags().BoolVar(&uefiEnabled, "with-uefi", true, "enable UEFI on x86_64 architecture")
createCmd.Flags().BoolVar(&tpm2Enabled, tpm2EnabledFlag, false, "enable TPM2 emulation support using swtpm")
createCmd.Flags().BoolVar(&debugShellEnabled, withDebugShellFlag, false, "drop talos into a maintenance shell on boot, this is for advanced debugging for developers only")
createCmd.Flags().MarkHidden("with-debug-shell") //nolint:errcheck
createCmd.Flags().StringSliceVar(&extraUEFISearchPaths, "extra-uefi-search-paths", []string{}, "additional search paths for UEFI firmware (only applies when UEFI is enabled)")
createCmd.Flags().StringSliceVar(&registryMirrors, registryMirrorFlag, []string{}, "list of registry mirrors to use in format: <registry host>=<mirror URL>")
createCmd.Flags().StringSliceVar(&registryInsecure, registryInsecureFlag, []string{}, "list of registry hostnames to skip TLS verification for")
Expand Down
11 changes: 11 additions & 0 deletions internal/pkg/mount/switchroot/switchroot.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"path/filepath"

"github.com/siderolabs/go-debug"
"github.com/siderolabs/go-procfs/procfs"
"golang.org/x/sys/unix"

"github.com/siderolabs/talos/internal/pkg/mount"
Expand All @@ -28,6 +29,8 @@ var preservedPaths = map[string]struct{}{

// Switch moves the rootfs to a specified directory. See
// https://github.com/karelzak/util-linux/blob/master/sys-utils/switch_root.c.
//
//nolint:gocyclo
func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Println("moving mounts to the new rootfs")

Expand Down Expand Up @@ -88,6 +91,14 @@ func Switch(prefix string, mountpoints *mount.Points) (err error) {
log.Printf("race detection enabled with halt_on_error=1")
}

if val := procfs.ProcCmdline().Get("talos.debugshell"); val != nil {
if err = unix.Exec("/bin/bash", []string{"/bin/bash"}, envv); err != nil {
return fmt.Errorf("error executing /bin/bash: %w", err)
}

return nil
}

if err = unix.Exec("/sbin/init", []string{"/sbin/init"}, envv); err != nil {
return fmt.Errorf("error executing /sbin/init: %w", err)
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/provision/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ func WithTPM2(enabled bool) Option {
}
}

// WithDebugShell drops into debug shell in initramfs.
func WithDebugShell(enabled bool) Option {
return func(o *Options) error {
o.WithDebugShell = enabled

return nil
}
}

// WithExtraUEFISearchPaths configures additional search paths to look for UEFI firmware.
func WithExtraUEFISearchPaths(extraUEFISearchPaths []string) Option {
return func(o *Options) error {
Expand Down Expand Up @@ -166,6 +175,8 @@ type Options struct {
UEFIEnabled bool
// Enable TPM2 emulation using swtpm.
TPM2Enabled bool
// Enable debug shell in the bootloader.
WithDebugShell bool
// Configure additional search paths to look for UEFI firmware.
ExtraUEFISearchPaths []string

Expand Down
9 changes: 9 additions & 0 deletions pkg/provision/providers/qemu/launch.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type LaunchConfig struct {
NodeUUID uuid.UUID
BadRTC bool
ArchitectureData Arch
WithDebugShell bool

// Talos config
Config string
Expand Down Expand Up @@ -320,6 +321,14 @@ func launchVM(config *LaunchConfig) error {
"pause",
}

if config.WithDebugShell {
args = append(
args,
"-serial",
fmt.Sprintf("unix:%s/%s.serial,server,nowait", config.StatePath, config.Hostname),
)
}

var (
scsiAttached, ahciAttached, nvmeAttached bool
ahciBus int
Expand Down
5 changes: 5 additions & 0 deletions pkg/provision/providers/qemu/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
}
}

if opts.WithDebugShell {
cmdline.Append("talos.debugshell", "")
}

var nodeConfig string

if !nodeReq.SkipInjectingConfig {
Expand Down Expand Up @@ -157,6 +161,7 @@ func (p *provisioner) createNode(state *vm.State, clusterReq provision.ClusterRe
TFTPServer: nodeReq.TFTPServer,
IPXEBootFileName: nodeReq.IPXEBootFilename,
APIPort: apiPort,
WithDebugShell: opts.WithDebugShell,
}

if clusterReq.IPXEBootScript != "" {
Expand Down
4 changes: 4 additions & 0 deletions website/content/v1.9/advanced/developing-talos.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ Specfic tests can be run with `-test.run=TestIntegration/api.ResetSuite`.

`make <something> WITH_DEBUG=1` enables Go profiling and other debug features, useful for local development.

`make initramfs WITH_DEBUG_SHELL=true` adds bash and minimal utilities for debugging purposes.
Combine with `--with-debug-shell` flag when creating cluster to obtain shell access.
This is uncommonly used as in this case the bash shell will run in place of machined.

## Destroying Cluster

```bash
Expand Down

0 comments on commit 1b22df4

Please sign in to comment.