From 08b0d93ea35e59b388b7acf0bdc7464346a83c3a Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Mon, 22 May 2023 17:21:53 +0200 Subject: [PATCH] kube play: exit-code propagation Implement means for reflecting failed containers (i.e., those having exited non-zero) to better integrate `kube play` with systemd. The idea is to have the main PID of `kube play` exit non-zero in a configurable way such that systemd's restart policies can kick in. When using the default sdnotify-notify policy, the service container acts as the main PID to further reduce the resource footprint. In that case, before stopping the service container, Podman will lookup the exit codes of all non-infra containers. The service will then behave according to the following three exit-code policies: - `none`: exit 0 and ignore containers (default) - `any`: exit non-zero if _any_ container did - `all`: exit non-zero if _all_ containers did The upper values can be passed via a hidden `kube play --service-exit-code-propagation` flag which can be used by tests and later on by Quadlet. In case Podman acts as the main PID (i.e., when at least one container runs with an sdnotify-policy other than "ignore"), Podman will continue to wait for the service container to exit and reflect its exit code. Note that this commit also fixes a long-standing annoyance of the service container exiting non-zero. The underlying issue was that the service container had been stopped with SIGKILL instead of SIGTERM and hence exited non-zero. Fixing that was a prerequisite for the exit-code propagation to work but also improves the integration of `kube play` with systemd and hence Quadlet with systemd. Jira: issues.redhat.com/browse/RUN-1776 Signed-off-by: Valentin Rothberg --- cmd/podman/kube/play.go | 22 ++-- .../markdown/podman-container-inspect.1.md.in | 81 +++++++------- libpod/container_config.go | 2 + libpod/container_inspect.go | 49 ++++----- libpod/define/container_inspect.go | 77 ++++++------- libpod/define/exit_code_propagation.go | 54 ++++++++++ libpod/options.go | 7 +- libpod/runtime_ctr.go | 4 +- libpod/service.go | 101 ++++++++++++++---- pkg/domain/entities/play.go | 5 + pkg/domain/infra/abi/play.go | 11 +- test/system/250-systemd.bats | 3 +- test/system/252-quadlet.bats | 3 +- test/system/260-sdnotify.bats | 76 +++++++++++++ 14 files changed, 352 insertions(+), 143 deletions(-) create mode 100644 libpod/define/exit_code_propagation.go diff --git a/cmd/podman/kube/play.go b/cmd/podman/kube/play.go index 2292899744..e8bcc29cf6 100644 --- a/cmd/podman/kube/play.go +++ b/cmd/podman/kube/play.go @@ -181,18 +181,19 @@ func playFlags(cmd *cobra.Command) { flags.StringVar(&playOptions.ContextDir, contextDirFlagName, "", "Path to top level of context directory") _ = cmd.RegisterFlagCompletionFunc(contextDirFlagName, completion.AutocompleteDefault) - // NOTE: The service-container flag is marked as hidden as it - // is purely designed for running kube-play or play-kube in systemd units. - // It is not something users should need to know or care about. - // - // Having a flag rather than an env variable is cleaner. - serviceFlagName := "service-container" - flags.BoolVar(&playOptions.ServiceContainer, serviceFlagName, false, "Starts a service container before all pods") - _ = flags.MarkHidden("service-container") - flags.StringVar(&playOptions.SignaturePolicy, "signature-policy", "", "`Pathname` of signature policy file (not usually used)") _ = flags.MarkHidden("signature-policy") + + // Below flags are local-only and hidden since they are used in + // kube-play's systemd integration only and hence hidden from + // users. + serviceFlagName := "service-container" + flags.BoolVar(&playOptions.ServiceContainer, serviceFlagName, false, "Starts a service container before all pods") + _ = flags.MarkHidden(serviceFlagName) + exitFlagName := "service-exit-code-propagation" + flags.StringVar(&playOptions.ExitCodePropagation, exitFlagName, "", "Exit-code propagation of the service container") + _ = flags.MarkHidden(exitFlagName) } } @@ -450,6 +451,9 @@ func kubeplay(body io.Reader) error { if err != nil { return err } + if report.ExitCode != nil { + registry.SetExitCode(int(*report.ExitCode)) + } if err := printPlayReport(report); err != nil { return err } diff --git a/docs/source/markdown/podman-container-inspect.1.md.in b/docs/source/markdown/podman-container-inspect.1.md.in index a8b1a8bb1c..b9e650690a 100644 --- a/docs/source/markdown/podman-container-inspect.1.md.in +++ b/docs/source/markdown/podman-container-inspect.1.md.in @@ -20,46 +20,47 @@ The keys of the returned JSON can be used as the values for the --format flag (s Valid placeholders for the Go template are listed below: -| **Placeholder** | **Description** | -| ----------------- | ------------------ | -| .AppArmorProfile | AppArmor profile (string) | -| .Args | Command-line arguments (array of strings) | -| .BoundingCaps | Bounding capability set (array of strings) | -| .Config ... | Structure with config info | -| .ConmonPidFile | Path to file containing conmon pid (string) | -| .Created | Container creation time (string, ISO3601) | -| .Dependencies | Dependencies (array of strings) | -| .Driver | Storage driver (string) | -| .EffectiveCaps | Effective capability set (array of strings) | -| .ExecIDs | Exec IDs (array of strings) | -| .GraphDriver ... | Further details of graph driver (struct) | -| .HostConfig ... | Host config details (struct) | -| .HostnamePath | Path to file containing hostname (string) | -| .HostsPath | Path to container /etc/hosts file (string) | -| .ID | Container ID (full 64-char hash) | -| .Image | Container image ID (64-char hash) | -| .ImageDigest | Container image digest (sha256:+64-char hash) | -| .ImageName | Container image name (string) | -| .IsInfra | Is this an infra container? (string: true/false) | -| .IsService | Is this a service container? (string: true/false) | -| .MountLabel | SELinux label of mount (string) | -| .Mounts | Mounts (array of strings) | -| .Name | Container name (string) | -| .Namespace | Container namespace (string) | -| .NetworkSettings ... | Network settings (struct) | -| .OCIConfigPath | Path to OCI config file (string) | -| .OCIRuntime | OCI runtime name (string) | -| .Path | Path to container command (string) | -| .PidFile | Path to file containing container PID (string) | -| .Pod | Parent pod (string) | -| .ProcessLabel | SELinux label of process (string) | -| .ResolvConfPath | Path to container's resolv.conf file (string) | -| .RestartCount | Number of times container has been restarted (int) | -| .Rootfs | Container rootfs (string) | -| .SizeRootFs | Size of rootfs, in bytes [1] | -| .SizeRw | Size of upper (R/W) container layer, in bytes [1] | -| .State ... | Container state info (struct) | -| .StaticDir | Path to container metadata dir (string) | +| **Placeholder** | **Description** | +| ------------------------ | -------------------------------------------------- | +| .AppArmorProfile | AppArmor profile (string) | +| .Args | Command-line arguments (array of strings) | +| .BoundingCaps | Bounding capability set (array of strings) | +| .Config ... | Structure with config info | +| .ConmonPidFile | Path to file containing conmon pid (string) | +| .Created | Container creation time (string, ISO3601) | +| .Dependencies | Dependencies (array of strings) | +| .Driver | Storage driver (string) | +| .EffectiveCaps | Effective capability set (array of strings) | +| .ExecIDs | Exec IDs (array of strings) | +| .GraphDriver ... | Further details of graph driver (struct) | +| .HostConfig ... | Host config details (struct) | +| .HostnamePath | Path to file containing hostname (string) | +| .HostsPath | Path to container /etc/hosts file (string) | +| .ID | Container ID (full 64-char hash) | +| .Image | Container image ID (64-char hash) | +| .ImageDigest | Container image digest (sha256:+64-char hash) | +| .ImageName | Container image name (string) | +| .IsInfra | Is this an infra container? (string: true/false) | +| .IsService | Is this a service container? (string: true/false) | +| .KubeExitCodePropagation | Kube exit-code propagation (string) | +| .MountLabel | SELinux label of mount (string) | +| .Mounts | Mounts (array of strings) | +| .Name | Container name (string) | +| .Namespace | Container namespace (string) | +| .NetworkSettings ... | Network settings (struct) | +| .OCIConfigPath | Path to OCI config file (string) | +| .OCIRuntime | OCI runtime name (string) | +| .Path | Path to container command (string) | +| .PidFile | Path to file containing container PID (string) | +| .Pod | Parent pod (string) | +| .ProcessLabel | SELinux label of process (string) | +| .ResolvConfPath | Path to container's resolv.conf file (string) | +| .RestartCount | Number of times container has been restarted (int) | +| .Rootfs | Container rootfs (string) | +| .SizeRootFs | Size of rootfs, in bytes [1] | +| .SizeRw | Size of upper (R/W) container layer, in bytes [1] | +| .State ... | Container state info (struct) | +| .StaticDir | Path to container metadata dir (string) | [1] This format specifier requires the **--size** option diff --git a/libpod/container_config.go b/libpod/container_config.go index 6aabc817ac..42b565dbb4 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -360,6 +360,8 @@ type ContainerMiscConfig struct { CgroupParent string `json:"cgroupParent"` // GroupEntry specifies arbitrary data to append to a file. GroupEntry string `json:"group_entry,omitempty"` + // KubeExitCodePropagation of the service container. + KubeExitCodePropagation define.KubeExitCodePropagation `json:"kubeExitCodePropagation"` // LogPath log location LogPath string `json:"logPath"` // LogTag is the tag used for logging diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 0da2a4d059..d6f404c701 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -141,30 +141,31 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver CheckpointLog: runtimeInfo.CheckpointLog, RestoreLog: runtimeInfo.RestoreLog, }, - Image: config.RootfsImageID, - ImageName: config.RootfsImageName, - Namespace: config.Namespace, - Rootfs: config.Rootfs, - Pod: config.Pod, - ResolvConfPath: resolvPath, - HostnamePath: hostnamePath, - HostsPath: hostsPath, - StaticDir: config.StaticDir, - OCIRuntime: config.OCIRuntime, - ConmonPidFile: config.ConmonPidFile, - PidFile: config.PidFile, - Name: config.Name, - RestartCount: int32(runtimeInfo.RestartCount), - Driver: driverData.Name, - MountLabel: config.MountLabel, - ProcessLabel: config.ProcessLabel, - AppArmorProfile: ctrSpec.Process.ApparmorProfile, - ExecIDs: execIDs, - GraphDriver: driverData, - Mounts: inspectMounts, - Dependencies: c.Dependencies(), - IsInfra: c.IsInfra(), - IsService: c.IsService(), + Image: config.RootfsImageID, + ImageName: config.RootfsImageName, + Namespace: config.Namespace, + Rootfs: config.Rootfs, + Pod: config.Pod, + ResolvConfPath: resolvPath, + HostnamePath: hostnamePath, + HostsPath: hostsPath, + StaticDir: config.StaticDir, + OCIRuntime: config.OCIRuntime, + ConmonPidFile: config.ConmonPidFile, + PidFile: config.PidFile, + Name: config.Name, + RestartCount: int32(runtimeInfo.RestartCount), + Driver: driverData.Name, + MountLabel: config.MountLabel, + ProcessLabel: config.ProcessLabel, + AppArmorProfile: ctrSpec.Process.ApparmorProfile, + ExecIDs: execIDs, + GraphDriver: driverData, + Mounts: inspectMounts, + Dependencies: c.Dependencies(), + IsInfra: c.IsInfra(), + IsService: c.IsService(), + KubeExitCodePropagation: config.KubeExitCodePropagation.String(), } if config.RootfsImageID != "" { // May not be set if the container was created with --rootfs diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 750ddf1aeb..0309a8dde0 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -654,44 +654,45 @@ type InspectNetworkSettings struct { // compatible with `docker inspect` JSON, but additional fields have been added // as required to share information not in the original output. type InspectContainerData struct { - ID string `json:"Id"` - Created time.Time `json:"Created"` - Path string `json:"Path"` - Args []string `json:"Args"` - State *InspectContainerState `json:"State"` - Image string `json:"Image"` - ImageDigest string `json:"ImageDigest"` - ImageName string `json:"ImageName"` - Rootfs string `json:"Rootfs"` - Pod string `json:"Pod"` - ResolvConfPath string `json:"ResolvConfPath"` - HostnamePath string `json:"HostnamePath"` - HostsPath string `json:"HostsPath"` - StaticDir string `json:"StaticDir"` - OCIConfigPath string `json:"OCIConfigPath,omitempty"` - OCIRuntime string `json:"OCIRuntime,omitempty"` - ConmonPidFile string `json:"ConmonPidFile"` - PidFile string `json:"PidFile"` - Name string `json:"Name"` - RestartCount int32 `json:"RestartCount"` - Driver string `json:"Driver"` - MountLabel string `json:"MountLabel"` - ProcessLabel string `json:"ProcessLabel"` - AppArmorProfile string `json:"AppArmorProfile"` - EffectiveCaps []string `json:"EffectiveCaps"` - BoundingCaps []string `json:"BoundingCaps"` - ExecIDs []string `json:"ExecIDs"` - GraphDriver *DriverData `json:"GraphDriver"` - SizeRw *int64 `json:"SizeRw,omitempty"` - SizeRootFs int64 `json:"SizeRootFs,omitempty"` - Mounts []InspectMount `json:"Mounts"` - Dependencies []string `json:"Dependencies"` - NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` - Namespace string `json:"Namespace"` - IsInfra bool `json:"IsInfra"` - IsService bool `json:"IsService"` - Config *InspectContainerConfig `json:"Config"` - HostConfig *InspectContainerHostConfig `json:"HostConfig"` + ID string `json:"Id"` + Created time.Time `json:"Created"` + Path string `json:"Path"` + Args []string `json:"Args"` + State *InspectContainerState `json:"State"` + Image string `json:"Image"` + ImageDigest string `json:"ImageDigest"` + ImageName string `json:"ImageName"` + Rootfs string `json:"Rootfs"` + Pod string `json:"Pod"` + ResolvConfPath string `json:"ResolvConfPath"` + HostnamePath string `json:"HostnamePath"` + HostsPath string `json:"HostsPath"` + StaticDir string `json:"StaticDir"` + OCIConfigPath string `json:"OCIConfigPath,omitempty"` + OCIRuntime string `json:"OCIRuntime,omitempty"` + ConmonPidFile string `json:"ConmonPidFile"` + PidFile string `json:"PidFile"` + Name string `json:"Name"` + RestartCount int32 `json:"RestartCount"` + Driver string `json:"Driver"` + MountLabel string `json:"MountLabel"` + ProcessLabel string `json:"ProcessLabel"` + AppArmorProfile string `json:"AppArmorProfile"` + EffectiveCaps []string `json:"EffectiveCaps"` + BoundingCaps []string `json:"BoundingCaps"` + ExecIDs []string `json:"ExecIDs"` + GraphDriver *DriverData `json:"GraphDriver"` + SizeRw *int64 `json:"SizeRw,omitempty"` + SizeRootFs int64 `json:"SizeRootFs,omitempty"` + Mounts []InspectMount `json:"Mounts"` + Dependencies []string `json:"Dependencies"` + NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` + Namespace string `json:"Namespace"` + IsInfra bool `json:"IsInfra"` + IsService bool `json:"IsService"` + KubeExitCodePropagation string `json:"KubeExitCodePropagation"` + Config *InspectContainerConfig `json:"Config"` + HostConfig *InspectContainerHostConfig `json:"HostConfig"` } // InspectExecSession contains information about a given exec session. diff --git a/libpod/define/exit_code_propagation.go b/libpod/define/exit_code_propagation.go new file mode 100644 index 0000000000..eff69b1c0e --- /dev/null +++ b/libpod/define/exit_code_propagation.go @@ -0,0 +1,54 @@ +package define + +import "fmt" + +// KubeExitCodePropagation defines an exit policy of kube workloads. +type KubeExitCodePropagation int + +const ( + // Invalid exit policy for a proper type system. + KubeExitCodePropagationInvalid KubeExitCodePropagation = iota + // Exit 0 regardless of any failed containers. + KubeExitCodePropagationNone + // Exit non-zero if all containers failed. + KubeExitCodePropagationAll + // Exit non-zero if any container failed. + KubeExitCodePropagationAny + + // String representations. + strKubeECPInvalid = "invalid" + strKubeECPNone = "none" + strKubeECPAll = "all" + strKubeECPAny = "any" +) + +// Parse the specified kube exit-code propagation. Return an error if an +// unsupported value is specified. +func ParseKubeExitCodePropagation(value string) (KubeExitCodePropagation, error) { + switch value { + case strKubeECPNone, "": + return KubeExitCodePropagationNone, nil + case strKubeECPAll: + return KubeExitCodePropagationAll, nil + case strKubeECPAny: + return KubeExitCodePropagationAny, nil + default: + return KubeExitCodePropagationInvalid, fmt.Errorf("unsupported exit-code propagation %q", value) + } +} + +// Return the string representation of the KubeExitCodePropagation. +func (k KubeExitCodePropagation) String() string { + switch k { + case KubeExitCodePropagationNone: + return strKubeECPNone + case KubeExitCodePropagationAll: + return strKubeECPAll + case KubeExitCodePropagationAny: + return strKubeECPAny + case KubeExitCodePropagationInvalid: + return strKubeECPInvalid + default: + return "unknown value" + } +} diff --git a/libpod/options.go b/libpod/options.go index a974caeebf..54cc5b0ae6 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1579,15 +1579,16 @@ func withIsInfra() CtrCreateOption { } // WithIsService allows us to differentiate between service containers and other container -// within the container config -func WithIsService() CtrCreateOption { +// within the container config. It also sets the exit-code propagation of the +// service container. +func WithIsService(ecp define.KubeExitCodePropagation) CtrCreateOption { return func(ctr *Container) error { if ctr.valid { return define.ErrCtrFinalized } ctr.config.IsService = true - + ctr.config.KubeExitCodePropagation = ecp return nil } } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index e13fb66654..563832f457 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -957,11 +957,11 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol } if c.IsService() { - canStop, err := c.canStopServiceContainer() + report, err := c.canStopServiceContainer() if err != nil { return id, err } - if !canStop { + if !report.canBeStopped { return id, fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ",")) } } diff --git a/libpod/service.go b/libpod/service.go index 976ccd88bf..a4b66eae9f 100644 --- a/libpod/service.go +++ b/libpod/service.go @@ -7,6 +7,7 @@ import ( "github.com/containers/podman/v4/libpod/define" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) // A service consists of one or more pods. The service container is started @@ -59,17 +60,29 @@ func (c *Container) IsService() bool { return c.config.IsService } +// serviceContainerReport bundles information when checking whether a service +// container can be stopped. +type serviceContainerReport struct { + // Indicates whether the service container can be stopped or not. + canBeStopped bool + // Number of all known containers below the service container. + numContainers int + // Number of containers below the service containers that exited + // non-zero. + failedContainers int +} + // canStopServiceContainerLocked returns true if all pods of the service are stopped. // Note that the method acquires the container lock. -func (c *Container) canStopServiceContainerLocked() (bool, error) { +func (c *Container) canStopServiceContainerLocked() (*serviceContainerReport, error) { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return false, err + return nil, err } if !c.IsService() { - return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID()) + return nil, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID()) } return c.canStopServiceContainer() @@ -77,14 +90,15 @@ func (c *Container) canStopServiceContainerLocked() (bool, error) { // canStopServiceContainer returns true if all pods of the service are stopped. // Note that the method expects the container to be locked. -func (c *Container) canStopServiceContainer() (bool, error) { +func (c *Container) canStopServiceContainer() (*serviceContainerReport, error) { + report := serviceContainerReport{canBeStopped: true} for _, id := range c.state.Service.Pods { pod, err := c.runtime.LookupPod(id) if err != nil { if errors.Is(err, define.ErrNoSuchPod) { continue } - return false, err + return nil, err } status, err := pod.GetPodStatus() @@ -92,19 +106,37 @@ func (c *Container) canStopServiceContainer() (bool, error) { if errors.Is(err, define.ErrNoSuchPod) { continue } - return false, err + return nil, err } - // We can only stop the service if all pods are done. switch status { case define.PodStateStopped, define.PodStateExited, define.PodStateErrored: - continue + podCtrs, err := c.runtime.state.PodContainers(pod) + if err != nil { + return nil, err + } + for _, pc := range podCtrs { + if pc.IsInfra() { + continue // ignore infra containers + } + exitCode, err := c.runtime.state.GetContainerExitCode(pc.ID()) + if err != nil { + return nil, err + } + if exitCode != 0 { + report.failedContainers++ + } + report.numContainers++ + } default: - return false, nil + // Service container cannot be stopped, so we can + // return early. + report.canBeStopped = false + return &report, nil } } - return true, nil + return &report, nil } // Checks whether the service container can be stopped and does so. @@ -125,21 +157,49 @@ func (p *Pod) maybeStopServiceContainer() error { // pod->container->servicePods hierarchy. p.runtime.queueWork(func() { logrus.Debugf("Pod %s has a service %s: checking if it can be stopped", p.ID(), serviceCtr.ID()) - canStop, err := serviceCtr.canStopServiceContainerLocked() + report, err := serviceCtr.canStopServiceContainerLocked() if err != nil { logrus.Errorf("Checking whether service of container %s can be stopped: %v", serviceCtr.ID(), err) return } - if !canStop { + if !report.canBeStopped { return } - logrus.Debugf("Stopping service container %s", serviceCtr.ID()) - if err := serviceCtr.Stop(); err != nil && !errors.Is(err, define.ErrCtrStopped) { - // Log this in debug mode so that we don't print out an error and confuse the user - // when the service container can't be stopped because it is in created state - // This can happen when an error happens during kube play and we are trying to - // clean up after the error. - logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err) + + // Now either kill or stop the service container, depending on the configured exit policy. + stop := func() { + // Note that the service container runs catatonit which + // will exit gracefully on SIGINT. + logrus.Debugf("Stopping service container %s", serviceCtr.ID()) + if err := serviceCtr.Kill(uint(unix.SIGINT)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) { + logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err) + } + } + + kill := func() { + logrus.Debugf("Killing service container %s", serviceCtr.ID()) + if err := serviceCtr.Kill(uint(unix.SIGKILL)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) { + logrus.Debugf("Error killing service container %s: %v", serviceCtr.ID(), err) + } + } + + switch serviceCtr.config.KubeExitCodePropagation { + case define.KubeExitCodePropagationNone: + stop() + case define.KubeExitCodePropagationAny: + if report.failedContainers > 0 { + kill() + } else { + stop() + } + case define.KubeExitCodePropagationAll: + if report.failedContainers == report.numContainers { + kill() + } else { + stop() + } + default: + logrus.Errorf("Internal error: cannot stop service container %s: unknown exit policy %q", serviceCtr.ID(), serviceCtr.config.KubeExitCodePropagation.String()) } }) return nil @@ -240,9 +300,8 @@ func (p *Pod) maybeRemoveServiceContainer() error { if !canRemove { return } - timeout := uint(0) logrus.Debugf("Removing service container %s", serviceCtr.ID()) - if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, &timeout); err != nil { + if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, nil); err != nil { if !errors.Is(err, define.ErrNoSuchCtr) { logrus.Errorf("Removing service container %s: %v", serviceCtr.ID(), err) } diff --git a/pkg/domain/entities/play.go b/pkg/domain/entities/play.go index 3989f96a68..184062278e 100644 --- a/pkg/domain/entities/play.go +++ b/pkg/domain/entities/play.go @@ -21,6 +21,9 @@ type PlayKubeOptions struct { // Down indicates whether to bring contents of a yaml file "down" // as in stop Down bool + // ExitCodePropagation decides how the main PID of the Kube service + // should exit depending on the containers' exit codes. + ExitCodePropagation string // Replace indicates whether to delete and recreate a yaml file Replace bool // Do not create /etc/hosts within the pod's containers, @@ -100,6 +103,8 @@ type PlayKubeReport struct { Secrets []PlaySecret // ServiceContainerID - ID of the service container if one is created ServiceContainerID string + // If set, exit with the specified exit code. + ExitCode *int32 } type KubePlayReport = PlayKubeReport diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 420c9ebd30..c19178b68e 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -86,7 +86,12 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri if err != nil { return nil, fmt.Errorf("creating runtime spec for service container: %w", err) } - opts = append(opts, libpod.WithIsService()) + + ecp, err := define.ParseKubeExitCodePropagation(options.ExitCodePropagation) + if err != nil { + return nil, err + } + opts = append(opts, libpod.WithIsService(ecp)) // Set the sd-notify mode to "ignore". Podman is responsible for // sending the notify messages when all containers are ready. @@ -348,9 +353,11 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options if err := notifyproxy.SendMessage("", message); err != nil { return nil, err } - if _, err := serviceContainer.Wait(ctx); err != nil { + exitCode, err := serviceContainer.Wait(ctx) + if err != nil { return nil, fmt.Errorf("waiting for service container: %w", err) } + report.ExitCode = &exitCode } report.ServiceContainerID = serviceContainer.ID() diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats index f051b2146d..8e24440228 100644 --- a/test/system/250-systemd.bats +++ b/test/system/250-systemd.bats @@ -459,8 +459,7 @@ $name stderr" "logs work with passthrough" fi sleep 0.5 done - # The service is marked as failed as the service container exits non-zero. - is "$output" "failed" "systemd service transitioned to 'inactive' state: $service_name" + is "$output" "inactive" "systemd service transitioned to 'inactive' state: $service_name" # Now stop and start the service again. systemctl stop $service_name diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats index 96b8f7b08f..e991c4480f 100644 --- a/test/system/252-quadlet.bats +++ b/test/system/252-quadlet.bats @@ -439,8 +439,7 @@ EOF run_podman container inspect --format "{{.State.Status}}" test_pod-test is "$output" "running" "container should be started by systemd and hence be running" - # The service is marked as failed as the service container exits non-zero. - service_cleanup $QUADLET_SERVICE_NAME failed + service_cleanup $QUADLET_SERVICE_NAME inactive run_podman rmi $(pause_image) } diff --git a/test/system/260-sdnotify.bats b/test/system/260-sdnotify.bats index fb2dc432a9..1452e68898 100644 --- a/test/system/260-sdnotify.bats +++ b/test/system/260-sdnotify.bats @@ -371,4 +371,80 @@ READY=1" "sdnotify sent MAINPID and READY" run_podman rmi $(pause_image) } +function generate_exit_code_yaml { + local fname=$1 + local cmd1=$2 + local cmd2=$3 + local sdnotify_policy=$4 + echo " +apiVersion: v1 +kind: Pod +metadata: + labels: + app: test + name: test_pod + annotations: + io.containers.sdnotify: "$sdnotify_policy" +spec: + restartPolicy: Never + containers: + - name: ctr1 + image: $IMAGE + command: + - $cmd1 + - name: ctr2 + image: $IMAGE + command: + - $cmd2 +" > $fname +} + +@test "podman kube play - exit-code propagation" { + fname=$PODMAN_TMPDIR/$(random_string).yaml + + # Create a test matrix with the following arguments: + # exit-code propagation | ctr1 command | ctr2 command | service-container exit code + exit_tests=" +all | true | true | 0 +all | true | false | 0 +all | false | false | 137 +any | true | true | 0 +any | false | true | 137 +any | false | false | 137 +none | true | true | 0 +none | true | false | 0 +none | false | false | 0 +" + + # I am sorry, this is a long test as we need to test the upper matrix + # twice. The first run is using the default sdnotify policy of "ignore". + # In this case, the service container serves as the main PID of the service + # to have a minimal resource footprint. The second run is using the + # "conmon" sdnotify policy in which case Podman needs to serve as the main + # PID to act as an sdnotify proxy; there Podman will wait for the service + # container to exit and reflects its exit code. + while read exit_code_prop cmd1 cmd2 exit_code; do + for sdnotify_policy in ignore conmon; do + generate_exit_code_yaml $fname $cmd1 $cmd2 $sdnotify_policy + yaml_sha=$(sha256sum $fname) + service_container="${yaml_sha:0:12}-service" + podman_exit=$exit_code + if [[ $sdnotify_policy == "ignore" ]];then + podman_exit=0 + fi + run_podman $podman_exit kube play --service-exit-code-propagation="$exit_code_prop" --service-container $fname + run_podman container inspect --format '{{.KubeExitCodePropagation}}' $service_container + is "$output" "$exit_code_prop" "service container has the expected policy set in its annotations" + run_podman wait $service_container + is "$output" "$exit_code" "service container reflects expected exit code $exit_code (policy: $policy, cmd1: $cmd1, cmd2: $cmd2)" + run_podman kube down $fname + done + done < <(parse_table "$exit_tests") + + # A final smoke test to make sure bogus policies lead to an error + run_podman 125 kube play --service-exit-code-propagation=bogus --service-container $fname + is "$output" "Error: unsupported exit-code propagation \"bogus\"" "error on unsupported exit-code propagation" + + run_podman rmi $(pause_image) +} # vim: filetype=sh