podman/libpod/service.go

312 lines
8.9 KiB
Go

package libpod
import (
"context"
"errors"
"fmt"
"github.com/containers/podman/v4/libpod/define"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// A service consists of one or more pods. The service container is started
// before all pods and is stopped when the last pod stops. The service
// container allows for tracking and managing the entire life cycle of service
// which may be started via `podman-play-kube`.
type Service struct {
// Pods running as part of the service.
Pods []string `json:"servicePods"`
}
// Indicates whether the pod is associated with a service container.
// The pod is expected to be updated and locked.
func (p *Pod) hasServiceContainer() bool {
return p.config.ServiceContainerID != ""
}
// Returns the pod's service container.
// The pod is expected to be updated and locked.
func (p *Pod) serviceContainer() (*Container, error) {
id := p.config.ServiceContainerID
if id == "" {
return nil, fmt.Errorf("pod has no service container: %w", define.ErrNoSuchCtr)
}
return p.runtime.state.Container(id)
}
// ServiceContainer returns the service container.
func (p *Pod) ServiceContainer() (*Container, error) {
p.lock.Lock()
defer p.lock.Unlock()
if err := p.updatePod(); err != nil {
return nil, err
}
return p.serviceContainer()
}
func (c *Container) addServicePodLocked(id string) error {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return err
}
c.state.Service.Pods = append(c.state.Service.Pods, id)
return c.save()
}
// IsService returns true when the container is a "service container".
func (c *Container) IsService() bool {
return c.config.IsService
}
// serviceContainerReport bundles information when checking whether a service
// container can be stopped.
type serviceContainerReport struct {
// Indicates whether the service container can be stopped or not.
canBeStopped bool
// Number of all known containers below the service container.
numContainers int
// Number of containers below the service containers that exited
// non-zero.
failedContainers int
}
// canStopServiceContainerLocked returns true if all pods of the service are stopped.
// Note that the method acquires the container lock.
func (c *Container) canStopServiceContainerLocked() (*serviceContainerReport, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return nil, err
}
if !c.IsService() {
return nil, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
return c.canStopServiceContainer()
}
// canStopServiceContainer returns true if all pods of the service are stopped.
// Note that the method expects the container to be locked.
func (c *Container) canStopServiceContainer() (*serviceContainerReport, error) {
report := serviceContainerReport{canBeStopped: true}
for _, id := range c.state.Service.Pods {
pod, err := c.runtime.LookupPod(id)
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return nil, err
}
status, err := pod.GetPodStatus()
if err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return nil, err
}
switch status {
case define.PodStateStopped, define.PodStateExited, define.PodStateErrored:
podCtrs, err := c.runtime.state.PodContainers(pod)
if err != nil {
return nil, err
}
for _, pc := range podCtrs {
if pc.IsInfra() {
continue // ignore infra containers
}
exitCode, err := c.runtime.state.GetContainerExitCode(pc.ID())
if err != nil {
return nil, err
}
if exitCode != 0 {
report.failedContainers++
}
report.numContainers++
}
default:
// Service container cannot be stopped, so we can
// return early.
report.canBeStopped = false
return &report, nil
}
}
return &report, nil
}
// Checks whether the service container can be stopped and does so.
func (p *Pod) maybeStopServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be stopped", p.ID(), serviceCtr.ID())
report, err := serviceCtr.canStopServiceContainerLocked()
if err != nil {
logrus.Errorf("Checking whether service of container %s can be stopped: %v", serviceCtr.ID(), err)
return
}
if !report.canBeStopped {
return
}
// Now either kill or stop the service container, depending on the configured exit policy.
stop := func() {
// Note that the service container runs catatonit which
// will exit gracefully on SIGINT.
logrus.Debugf("Stopping service container %s", serviceCtr.ID())
if err := serviceCtr.Kill(uint(unix.SIGINT)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) {
logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err)
}
}
kill := func() {
logrus.Debugf("Killing service container %s", serviceCtr.ID())
if err := serviceCtr.Kill(uint(unix.SIGKILL)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) {
logrus.Debugf("Error killing service container %s: %v", serviceCtr.ID(), err)
}
}
switch serviceCtr.config.KubeExitCodePropagation {
case define.KubeExitCodePropagationNone:
stop()
case define.KubeExitCodePropagationAny:
if report.failedContainers > 0 {
kill()
} else {
stop()
}
case define.KubeExitCodePropagationAll:
if report.failedContainers == report.numContainers {
kill()
} else {
stop()
}
default:
logrus.Errorf("Internal error: cannot stop service container %s: unknown exit policy %q", serviceCtr.ID(), serviceCtr.config.KubeExitCodePropagation.String())
}
})
return nil
}
// Starts the pod's service container if it's not already running.
func (p *Pod) maybeStartServiceContainer(ctx context.Context) error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
return fmt.Errorf("getting pod's service container: %w", err)
}
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return err
}
if serviceCtr.state.State == define.ContainerStateRunning {
return nil
}
// Restart will reinit among other things.
return serviceCtr.restartWithTimeout(ctx, 0)
}
// canRemoveServiceContainer returns true if all pods of the service are removed.
// Note that the method acquires the container lock.
func (c *Container) canRemoveServiceContainer() (bool, error) {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}
for _, id := range c.state.Service.Pods {
if _, err := c.runtime.LookupPod(id); err != nil {
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return false, err
}
return false, nil
}
return true, nil
}
// Checks whether the service container can be removed and does so.
// It also unlinks the pod from the service container.
func (p *Pod) maybeRemoveServiceContainer() error {
if !p.hasServiceContainer() {
return nil
}
serviceCtr, err := p.serviceContainer()
if err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return fmt.Errorf("getting pod's service container: %w", err)
}
// Checking whether the service can be stopped must be done in
// the runtime's work queue to resolve ABBA dead locks in the
// pod->container->servicePods hierarchy.
p.runtime.queueWork(func() {
logrus.Debugf("Pod %s has a service %s: checking if it can be removed", p.ID(), serviceCtr.ID())
canRemove, err := func() (bool, error) { // Anonymous func for easy locking
serviceCtr.lock.Lock()
defer serviceCtr.lock.Unlock()
if err := serviceCtr.syncContainer(); err != nil {
return false, err
}
// Unlink the pod from the service container.
servicePods := make([]string, 0, len(serviceCtr.state.Service.Pods)-1)
for _, id := range serviceCtr.state.Service.Pods {
if id != p.ID() {
servicePods = append(servicePods, id)
}
}
serviceCtr.state.Service.Pods = servicePods
if err := serviceCtr.save(); err != nil {
return false, err
}
return serviceCtr.canRemoveServiceContainer()
}()
if err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Checking whether service container %s can be removed: %v", serviceCtr.ID(), err)
}
return
}
if !canRemove {
return
}
logrus.Debugf("Removing service container %s", serviceCtr.ID())
if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, nil); err != nil {
if !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Removing service container %s: %v", serviceCtr.ID(), err)
}
}
})
return nil
}