mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-25 01:20:18 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1052 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1052 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2016 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package kuberuntime
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	goruntime "runtime"
 | |
| 	"time"
 | |
| 
 | |
| 	cadvisorapi "github.com/google/cadvisor/info/v1"
 | |
| 	"k8s.io/klog/v2"
 | |
| 
 | |
| 	v1 "k8s.io/api/core/v1"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	kubetypes "k8s.io/apimachinery/pkg/types"
 | |
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 | |
| 	utilversion "k8s.io/apimachinery/pkg/util/version"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	"k8s.io/client-go/tools/record"
 | |
| 	ref "k8s.io/client-go/tools/reference"
 | |
| 	"k8s.io/client-go/util/flowcontrol"
 | |
| 	"k8s.io/component-base/logs/logreduction"
 | |
| 	internalapi "k8s.io/cri-api/pkg/apis"
 | |
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
 | |
| 	"k8s.io/kubernetes/pkg/api/legacyscheme"
 | |
| 	"k8s.io/kubernetes/pkg/credentialprovider"
 | |
| 	"k8s.io/kubernetes/pkg/credentialprovider/plugin"
 | |
| 	"k8s.io/kubernetes/pkg/features"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/cm"
 | |
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/events"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/images"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/logs"
 | |
| 	proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/types"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/cache"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/format"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// The api version of kubelet runtime api
 | |
| 	kubeRuntimeAPIVersion = "0.1.0"
 | |
| 	// The root directory for pod logs
 | |
| 	podLogsRootDirectory = "/var/log/pods"
 | |
| 	// A minimal shutdown window for avoiding unnecessary SIGKILLs
 | |
| 	minimumGracePeriodInSeconds = 2
 | |
| 
 | |
| 	// The expiration time of version cache.
 | |
| 	versionCacheTTL = 60 * time.Second
 | |
| 	// How frequently to report identical errors
 | |
| 	identicalErrorDelay = 1 * time.Minute
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	// ErrVersionNotSupported is returned when the api version of runtime interface is not supported
 | |
| 	ErrVersionNotSupported = errors.New("runtime api version is not supported")
 | |
| )
 | |
| 
 | |
| // podStateProvider can determine if a pod is deleted ir terminated
 | |
| type podStateProvider interface {
 | |
| 	IsPodDeleted(kubetypes.UID) bool
 | |
| 	IsPodTerminated(kubetypes.UID) bool
 | |
| }
 | |
| 
 | |
| type kubeGenericRuntimeManager struct {
 | |
| 	runtimeName string
 | |
| 	recorder    record.EventRecorder
 | |
| 	osInterface kubecontainer.OSInterface
 | |
| 
 | |
| 	// machineInfo contains the machine information.
 | |
| 	machineInfo *cadvisorapi.MachineInfo
 | |
| 
 | |
| 	// Container GC manager
 | |
| 	containerGC *containerGC
 | |
| 
 | |
| 	// Keyring for pulling images
 | |
| 	keyring credentialprovider.DockerKeyring
 | |
| 
 | |
| 	// Runner of lifecycle events.
 | |
| 	runner kubecontainer.HandlerRunner
 | |
| 
 | |
| 	// RuntimeHelper that wraps kubelet to generate runtime container options.
 | |
| 	runtimeHelper kubecontainer.RuntimeHelper
 | |
| 
 | |
| 	// Health check results.
 | |
| 	livenessManager  proberesults.Manager
 | |
| 	readinessManager proberesults.Manager
 | |
| 	startupManager   proberesults.Manager
 | |
| 
 | |
| 	// If true, enforce container cpu limits with CFS quota support
 | |
| 	cpuCFSQuota bool
 | |
| 
 | |
| 	// CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
 | |
| 	cpuCFSQuotaPeriod metav1.Duration
 | |
| 
 | |
| 	// wrapped image puller.
 | |
| 	imagePuller images.ImageManager
 | |
| 
 | |
| 	// gRPC service clients
 | |
| 	runtimeService internalapi.RuntimeService
 | |
| 	imageService   internalapi.ImageManagerService
 | |
| 
 | |
| 	// The version cache of runtime daemon.
 | |
| 	versionCache *cache.ObjectCache
 | |
| 
 | |
| 	// The directory path for seccomp profiles.
 | |
| 	seccompProfileRoot string
 | |
| 
 | |
| 	// Internal lifecycle event handlers for container resource management.
 | |
| 	internalLifecycle cm.InternalContainerLifecycle
 | |
| 
 | |
| 	// A shim to legacy functions for backward compatibility.
 | |
| 	legacyLogProvider LegacyLogProvider
 | |
| 
 | |
| 	// Manage container logs.
 | |
| 	logManager logs.ContainerLogManager
 | |
| 
 | |
| 	// Manage RuntimeClass resources.
 | |
| 	runtimeClassManager *runtimeclass.Manager
 | |
| 
 | |
| 	// Cache last per-container error message to reduce log spam
 | |
| 	logReduction *logreduction.LogReduction
 | |
| 
 | |
| 	// PodState provider instance
 | |
| 	podStateProvider podStateProvider
 | |
| }
 | |
| 
 | |
| // KubeGenericRuntime is a interface contains interfaces for container runtime and command.
 | |
| type KubeGenericRuntime interface {
 | |
| 	kubecontainer.Runtime
 | |
| 	kubecontainer.StreamingRuntime
 | |
| 	kubecontainer.CommandRunner
 | |
| }
 | |
| 
 | |
| // LegacyLogProvider gives the ability to use unsupported docker log drivers (e.g. journald)
 | |
| type LegacyLogProvider interface {
 | |
| 	// Get the last few lines of the logs for a specific container.
 | |
| 	GetContainerLogTail(uid kubetypes.UID, name, namespace string, containerID kubecontainer.ContainerID) (string, error)
 | |
| }
 | |
| 
 | |
| // NewKubeGenericRuntimeManager creates a new kubeGenericRuntimeManager
 | |
| func NewKubeGenericRuntimeManager(
 | |
| 	recorder record.EventRecorder,
 | |
| 	livenessManager proberesults.Manager,
 | |
| 	readinessManager proberesults.Manager,
 | |
| 	startupManager proberesults.Manager,
 | |
| 	seccompProfileRoot string,
 | |
| 	machineInfo *cadvisorapi.MachineInfo,
 | |
| 	podStateProvider podStateProvider,
 | |
| 	osInterface kubecontainer.OSInterface,
 | |
| 	runtimeHelper kubecontainer.RuntimeHelper,
 | |
| 	httpClient types.HTTPGetter,
 | |
| 	imageBackOff *flowcontrol.Backoff,
 | |
| 	serializeImagePulls bool,
 | |
| 	imagePullQPS float32,
 | |
| 	imagePullBurst int,
 | |
| 	imageCredentialProviderConfigFile string,
 | |
| 	imageCredentialProviderBinDir string,
 | |
| 	cpuCFSQuota bool,
 | |
| 	cpuCFSQuotaPeriod metav1.Duration,
 | |
| 	runtimeService internalapi.RuntimeService,
 | |
| 	imageService internalapi.ImageManagerService,
 | |
| 	internalLifecycle cm.InternalContainerLifecycle,
 | |
| 	legacyLogProvider LegacyLogProvider,
 | |
| 	logManager logs.ContainerLogManager,
 | |
| 	runtimeClassManager *runtimeclass.Manager,
 | |
| ) (KubeGenericRuntime, error) {
 | |
| 	kubeRuntimeManager := &kubeGenericRuntimeManager{
 | |
| 		recorder:            recorder,
 | |
| 		cpuCFSQuota:         cpuCFSQuota,
 | |
| 		cpuCFSQuotaPeriod:   cpuCFSQuotaPeriod,
 | |
| 		seccompProfileRoot:  seccompProfileRoot,
 | |
| 		livenessManager:     livenessManager,
 | |
| 		readinessManager:    readinessManager,
 | |
| 		startupManager:      startupManager,
 | |
| 		machineInfo:         machineInfo,
 | |
| 		osInterface:         osInterface,
 | |
| 		runtimeHelper:       runtimeHelper,
 | |
| 		runtimeService:      newInstrumentedRuntimeService(runtimeService),
 | |
| 		imageService:        newInstrumentedImageManagerService(imageService),
 | |
| 		internalLifecycle:   internalLifecycle,
 | |
| 		legacyLogProvider:   legacyLogProvider,
 | |
| 		logManager:          logManager,
 | |
| 		runtimeClassManager: runtimeClassManager,
 | |
| 		logReduction:        logreduction.NewLogReduction(identicalErrorDelay),
 | |
| 	}
 | |
| 
 | |
| 	typedVersion, err := kubeRuntimeManager.getTypedVersion()
 | |
| 	if err != nil {
 | |
| 		klog.ErrorS(err, "Get runtime version failed")
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Only matching kubeRuntimeAPIVersion is supported now
 | |
| 	// TODO: Runtime API machinery is under discussion at https://github.com/kubernetes/kubernetes/issues/28642
 | |
| 	if typedVersion.Version != kubeRuntimeAPIVersion {
 | |
| 		klog.ErrorS(err, "This runtime api version is not supported",
 | |
| 			"apiVersion", typedVersion.Version,
 | |
| 			"supportedAPIVersion", kubeRuntimeAPIVersion)
 | |
| 		return nil, ErrVersionNotSupported
 | |
| 	}
 | |
| 
 | |
| 	kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
 | |
| 	klog.InfoS("Container runtime initialized",
 | |
| 		"containerRuntime", typedVersion.RuntimeName,
 | |
| 		"version", typedVersion.RuntimeVersion,
 | |
| 		"apiVersion", typedVersion.RuntimeApiVersion)
 | |
| 
 | |
| 	// If the container logs directory does not exist, create it.
 | |
| 	// TODO: create podLogsRootDirectory at kubelet.go when kubelet is refactored to
 | |
| 	// new runtime interface
 | |
| 	if _, err := osInterface.Stat(podLogsRootDirectory); os.IsNotExist(err) {
 | |
| 		if err := osInterface.MkdirAll(podLogsRootDirectory, 0755); err != nil {
 | |
| 			klog.ErrorS(err, "Failed to create pod log directory", "path", podLogsRootDirectory)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletCredentialProviders) && (imageCredentialProviderConfigFile != "" || imageCredentialProviderBinDir != "") {
 | |
| 		klog.InfoS("Flags --image-credential-provider-config or --image-credential-provider-bin-dir were set but the feature gate was disabled, these flags will be ignored",
 | |
| 			"featureGate", features.KubeletCredentialProviders)
 | |
| 	}
 | |
| 
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCredentialProviders) && (imageCredentialProviderConfigFile != "" || imageCredentialProviderBinDir != "") {
 | |
| 		if err := plugin.RegisterCredentialProviderPlugins(imageCredentialProviderConfigFile, imageCredentialProviderBinDir); err != nil {
 | |
| 			klog.ErrorS(err, "Failed to register CRI auth plugins")
 | |
| 			os.Exit(1)
 | |
| 		}
 | |
| 	}
 | |
| 	kubeRuntimeManager.keyring = credentialprovider.NewDockerKeyring()
 | |
| 
 | |
| 	kubeRuntimeManager.imagePuller = images.NewImageManager(
 | |
| 		kubecontainer.FilterEventRecorder(recorder),
 | |
| 		kubeRuntimeManager,
 | |
| 		imageBackOff,
 | |
| 		serializeImagePulls,
 | |
| 		imagePullQPS,
 | |
| 		imagePullBurst)
 | |
| 	kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
 | |
| 	kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
 | |
| 	kubeRuntimeManager.podStateProvider = podStateProvider
 | |
| 
 | |
| 	kubeRuntimeManager.versionCache = cache.NewObjectCache(
 | |
| 		func() (interface{}, error) {
 | |
| 			return kubeRuntimeManager.getTypedVersion()
 | |
| 		},
 | |
| 		versionCacheTTL,
 | |
| 	)
 | |
| 
 | |
| 	return kubeRuntimeManager, nil
 | |
| }
 | |
| 
 | |
| // Type returns the type of the container runtime.
 | |
| func (m *kubeGenericRuntimeManager) Type() string {
 | |
| 	return m.runtimeName
 | |
| }
 | |
| 
 | |
| // SupportsSingleFileMapping returns whether the container runtime supports single file mappings or not.
 | |
| // It is supported on Windows only if the container runtime is containerd.
 | |
| func (m *kubeGenericRuntimeManager) SupportsSingleFileMapping() bool {
 | |
| 	switch goruntime.GOOS {
 | |
| 	case "windows":
 | |
| 		return m.Type() != types.DockerContainerRuntime
 | |
| 	default:
 | |
| 		return true
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func newRuntimeVersion(version string) (*utilversion.Version, error) {
 | |
| 	if ver, err := utilversion.ParseSemantic(version); err == nil {
 | |
| 		return ver, err
 | |
| 	}
 | |
| 	return utilversion.ParseGeneric(version)
 | |
| }
 | |
| 
 | |
| func (m *kubeGenericRuntimeManager) getTypedVersion() (*runtimeapi.VersionResponse, error) {
 | |
| 	typedVersion, err := m.runtimeService.Version(kubeRuntimeAPIVersion)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("get remote runtime typed version failed: %v", err)
 | |
| 	}
 | |
| 	return typedVersion, nil
 | |
| }
 | |
| 
 | |
| // Version returns the version information of the container runtime.
 | |
| func (m *kubeGenericRuntimeManager) Version() (kubecontainer.Version, error) {
 | |
| 	typedVersion, err := m.getTypedVersion()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	return newRuntimeVersion(typedVersion.RuntimeVersion)
 | |
| }
 | |
| 
 | |
| // APIVersion returns the cached API version information of the container
 | |
| // runtime. Implementation is expected to update this cache periodically.
 | |
| // This may be different from the runtime engine's version.
 | |
| func (m *kubeGenericRuntimeManager) APIVersion() (kubecontainer.Version, error) {
 | |
| 	versionObject, err := m.versionCache.Get(m.machineInfo.MachineID)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	typedVersion := versionObject.(*runtimeapi.VersionResponse)
 | |
| 
 | |
| 	return newRuntimeVersion(typedVersion.RuntimeApiVersion)
 | |
| }
 | |
| 
 | |
| // Status returns the status of the runtime. An error is returned if the Status
 | |
| // function itself fails, nil otherwise.
 | |
| func (m *kubeGenericRuntimeManager) Status() (*kubecontainer.RuntimeStatus, error) {
 | |
| 	status, err := m.runtimeService.Status()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return toKubeRuntimeStatus(status), nil
 | |
| }
 | |
| 
 | |
| // GetPods returns a list of containers grouped by pods. The boolean parameter
 | |
| // specifies whether the runtime returns all containers including those already
 | |
| // exited and dead containers (used for garbage collection).
 | |
| func (m *kubeGenericRuntimeManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
 | |
| 	pods := make(map[kubetypes.UID]*kubecontainer.Pod)
 | |
| 	sandboxes, err := m.getKubeletSandboxes(all)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	for i := range sandboxes {
 | |
| 		s := sandboxes[i]
 | |
| 		if s.Metadata == nil {
 | |
| 			klog.V(4).InfoS("Sandbox does not have metadata", "sandbox", s)
 | |
| 			continue
 | |
| 		}
 | |
| 		podUID := kubetypes.UID(s.Metadata.Uid)
 | |
| 		if _, ok := pods[podUID]; !ok {
 | |
| 			pods[podUID] = &kubecontainer.Pod{
 | |
| 				ID:        podUID,
 | |
| 				Name:      s.Metadata.Name,
 | |
| 				Namespace: s.Metadata.Namespace,
 | |
| 			}
 | |
| 		}
 | |
| 		p := pods[podUID]
 | |
| 		converted, err := m.sandboxToKubeContainer(s)
 | |
| 		if err != nil {
 | |
| 			klog.V(4).InfoS("Convert sandbox of pod failed", "runtimeName", m.runtimeName, "sandbox", s, "podUID", podUID, "err", err)
 | |
| 			continue
 | |
| 		}
 | |
| 		p.Sandboxes = append(p.Sandboxes, converted)
 | |
| 	}
 | |
| 
 | |
| 	containers, err := m.getKubeletContainers(all)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	for i := range containers {
 | |
| 		c := containers[i]
 | |
| 		if c.Metadata == nil {
 | |
| 			klog.V(4).InfoS("Container does not have metadata", "container", c)
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		labelledInfo := getContainerInfoFromLabels(c.Labels)
 | |
| 		pod, found := pods[labelledInfo.PodUID]
 | |
| 		if !found {
 | |
| 			pod = &kubecontainer.Pod{
 | |
| 				ID:        labelledInfo.PodUID,
 | |
| 				Name:      labelledInfo.PodName,
 | |
| 				Namespace: labelledInfo.PodNamespace,
 | |
| 			}
 | |
| 			pods[labelledInfo.PodUID] = pod
 | |
| 		}
 | |
| 
 | |
| 		converted, err := m.toKubeContainer(c)
 | |
| 		if err != nil {
 | |
| 			klog.V(4).InfoS("Convert container of pod failed", "runtimeName", m.runtimeName, "container", c, "podUID", labelledInfo.PodUID, "err", err)
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		pod.Containers = append(pod.Containers, converted)
 | |
| 	}
 | |
| 
 | |
| 	// Convert map to list.
 | |
| 	var result []*kubecontainer.Pod
 | |
| 	for _, pod := range pods {
 | |
| 		result = append(result, pod)
 | |
| 	}
 | |
| 
 | |
| 	return result, nil
 | |
| }
 | |
| 
 | |
| // containerKillReason explains what killed a given container
 | |
| type containerKillReason string
 | |
| 
 | |
| const (
 | |
| 	reasonStartupProbe        containerKillReason = "StartupProbe"
 | |
| 	reasonLivenessProbe       containerKillReason = "LivenessProbe"
 | |
| 	reasonFailedPostStartHook containerKillReason = "FailedPostStartHook"
 | |
| 	reasonUnknown             containerKillReason = "Unknown"
 | |
| )
 | |
| 
 | |
| // containerToKillInfo contains necessary information to kill a container.
 | |
| type containerToKillInfo struct {
 | |
| 	// The spec of the container.
 | |
| 	container *v1.Container
 | |
| 	// The name of the container.
 | |
| 	name string
 | |
| 	// The message indicates why the container will be killed.
 | |
| 	message string
 | |
| 	// The reason is a clearer source of info on why a container will be killed
 | |
| 	// TODO: replace message with reason?
 | |
| 	reason containerKillReason
 | |
| }
 | |
| 
 | |
| // podActions keeps information what to do for a pod.
 | |
| type podActions struct {
 | |
| 	// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod.
 | |
| 	KillPod bool
 | |
| 	// Whether need to create a new sandbox. If needed to kill pod and create
 | |
| 	// a new pod sandbox, all init containers need to be purged (i.e., removed).
 | |
| 	CreateSandbox bool
 | |
| 	// The id of existing sandbox. It is used for starting containers in ContainersToStart.
 | |
| 	SandboxID string
 | |
| 	// The attempt number of creating sandboxes for the pod.
 | |
| 	Attempt uint32
 | |
| 
 | |
| 	// The next init container to start.
 | |
| 	NextInitContainerToStart *v1.Container
 | |
| 	// ContainersToStart keeps a list of indexes for the containers to start,
 | |
| 	// where the index is the index of the specific container in the pod spec (
 | |
| 	// pod.Spec.Containers.
 | |
| 	ContainersToStart []int
 | |
| 	// ContainersToKill keeps a map of containers that need to be killed, note that
 | |
| 	// the key is the container ID of the container, while
 | |
| 	// the value contains necessary information to kill a container.
 | |
| 	ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo
 | |
| 	// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start,
 | |
| 	// where the index is the index of the specific container in pod.Spec.EphemeralContainers.
 | |
| 	EphemeralContainersToStart []int
 | |
| }
 | |
| 
 | |
| // podSandboxChanged checks whether the spec of the pod is changed and returns
 | |
| // (changed, new attempt, original sandboxID if exist).
 | |
| func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
 | |
| 	if len(podStatus.SandboxStatuses) == 0 {
 | |
| 		klog.V(2).InfoS("No sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
 | |
| 		return true, 0, ""
 | |
| 	}
 | |
| 
 | |
| 	readySandboxCount := 0
 | |
| 	for _, s := range podStatus.SandboxStatuses {
 | |
| 		if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
 | |
| 			readySandboxCount++
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one.
 | |
| 	sandboxStatus := podStatus.SandboxStatuses[0]
 | |
| 	if readySandboxCount > 1 {
 | |
| 		klog.V(2).InfoS("Multiple sandboxes are ready for Pod. Need to reconcile them", "pod", klog.KObj(pod))
 | |
| 
 | |
| 		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
 | |
| 	}
 | |
| 	if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
 | |
| 		klog.V(2).InfoS("No ready sandbox for pod can be found. Need to start a new one", "pod", klog.KObj(pod))
 | |
| 		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
 | |
| 	}
 | |
| 
 | |
| 	// Needs to create a new sandbox when network namespace changed.
 | |
| 	if sandboxStatus.GetLinux().GetNamespaces().GetOptions().GetNetwork() != networkNamespaceForPod(pod) {
 | |
| 		klog.V(2).InfoS("Sandbox for pod has changed. Need to start a new one", "pod", klog.KObj(pod))
 | |
| 		return true, sandboxStatus.Metadata.Attempt + 1, ""
 | |
| 	}
 | |
| 
 | |
| 	// Needs to create a new sandbox when the sandbox does not have an IP address.
 | |
| 	if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" {
 | |
| 		klog.V(2).InfoS("Sandbox for pod has no IP address. Need to start a new one", "pod", klog.KObj(pod))
 | |
| 		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
 | |
| 	}
 | |
| 
 | |
| 	return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
 | |
| }
 | |
| 
 | |
| func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) {
 | |
| 	expectedHash := kubecontainer.HashContainer(container)
 | |
| 	return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
 | |
| }
 | |
| 
 | |
| func shouldRestartOnFailure(pod *v1.Pod) bool {
 | |
| 	return pod.Spec.RestartPolicy != v1.RestartPolicyNever
 | |
| }
 | |
| 
 | |
| func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
 | |
| 	cStatus := podStatus.FindContainerStatusByName(c.Name)
 | |
| 	if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
 | |
| 		return false
 | |
| 	}
 | |
| 	return cStatus.ExitCode == 0
 | |
| }
 | |
| 
 | |
| // computePodActions checks whether the pod spec has changed and returns the changes if true.
 | |
| func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
 | |
| 	klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
 | |
| 
 | |
| 	createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
 | |
| 	changes := podActions{
 | |
| 		KillPod:           createPodSandbox,
 | |
| 		CreateSandbox:     createPodSandbox,
 | |
| 		SandboxID:         sandboxID,
 | |
| 		Attempt:           attempt,
 | |
| 		ContainersToStart: []int{},
 | |
| 		ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
 | |
| 	}
 | |
| 
 | |
| 	// If we need to (re-)create the pod sandbox, everything will need to be
 | |
| 	// killed and recreated, and init containers should be purged.
 | |
| 	if createPodSandbox {
 | |
| 		if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
 | |
| 			// Should not restart the pod, just return.
 | |
| 			// we should not create a sandbox for a pod if it is already done.
 | |
| 			// if all containers are done and should not be started, there is no need to create a new sandbox.
 | |
| 			// this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it.
 | |
| 			//
 | |
| 			// If ContainerStatuses is empty, we assume that we've never
 | |
| 			// successfully created any containers. In this case, we should
 | |
| 			// retry creating the sandbox.
 | |
| 			changes.CreateSandbox = false
 | |
| 			return changes
 | |
| 		}
 | |
| 
 | |
| 		// Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure.
 | |
| 		var containersToStart []int
 | |
| 		for idx, c := range pod.Spec.Containers {
 | |
| 			if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
 | |
| 				continue
 | |
| 			}
 | |
| 			containersToStart = append(containersToStart, idx)
 | |
| 		}
 | |
| 		// We should not create a sandbox for a Pod if initialization is done and there is no container to start.
 | |
| 		if len(containersToStart) == 0 {
 | |
| 			_, _, done := findNextInitContainerToRun(pod, podStatus)
 | |
| 			if done {
 | |
| 				changes.CreateSandbox = false
 | |
| 				return changes
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if len(pod.Spec.InitContainers) != 0 {
 | |
| 			// Pod has init containers, return the first one.
 | |
| 			changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
 | |
| 			return changes
 | |
| 		}
 | |
| 		changes.ContainersToStart = containersToStart
 | |
| 		return changes
 | |
| 	}
 | |
| 
 | |
| 	// Ephemeral containers may be started even if initialization is not yet complete.
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
 | |
| 		for i := range pod.Spec.EphemeralContainers {
 | |
| 			c := (*v1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon)
 | |
| 
 | |
| 			// Ephemeral Containers are never restarted
 | |
| 			if podStatus.FindContainerStatusByName(c.Name) == nil {
 | |
| 				changes.EphemeralContainersToStart = append(changes.EphemeralContainersToStart, i)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Check initialization progress.
 | |
| 	initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
 | |
| 	if !done {
 | |
| 		if next != nil {
 | |
| 			initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
 | |
| 			if initFailed && !shouldRestartOnFailure(pod) {
 | |
| 				changes.KillPod = true
 | |
| 			} else {
 | |
| 				// Always try to stop containers in unknown state first.
 | |
| 				if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
 | |
| 					changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
 | |
| 						name:      next.Name,
 | |
| 						container: next,
 | |
| 						message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
 | |
| 							initLastStatus.State),
 | |
| 						reason: reasonUnknown,
 | |
| 					}
 | |
| 				}
 | |
| 				changes.NextInitContainerToStart = next
 | |
| 			}
 | |
| 		}
 | |
| 		// Initialization failed or still in progress. Skip inspecting non-init
 | |
| 		// containers.
 | |
| 		return changes
 | |
| 	}
 | |
| 
 | |
| 	// Number of running containers to keep.
 | |
| 	keepCount := 0
 | |
| 	// check the status of containers.
 | |
| 	for idx, container := range pod.Spec.Containers {
 | |
| 		containerStatus := podStatus.FindContainerStatusByName(container.Name)
 | |
| 
 | |
| 		// Call internal container post-stop lifecycle hook for any non-running container so that any
 | |
| 		// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
 | |
| 		// to it.
 | |
| 		if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
 | |
| 			if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
 | |
| 				klog.ErrorS(err, "Internal container post-stop lifecycle hook failed for container in pod with error",
 | |
| 					"containerName", container.Name, "pod", klog.KObj(pod))
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// If container does not exist, or is not running, check whether we
 | |
| 		// need to restart it.
 | |
| 		if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
 | |
| 			if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
 | |
| 				klog.V(3).InfoS("Container of pod is not in the desired state and shall be started", "containerName", container.Name, "pod", klog.KObj(pod))
 | |
| 				changes.ContainersToStart = append(changes.ContainersToStart, idx)
 | |
| 				if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
 | |
| 					// If container is in unknown state, we don't know whether it
 | |
| 					// is actually running or not, always try killing it before
 | |
| 					// restart to avoid having 2 running instances of the same container.
 | |
| 					changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
 | |
| 						name:      containerStatus.Name,
 | |
| 						container: &pod.Spec.Containers[idx],
 | |
| 						message: fmt.Sprintf("Container is in %q state, try killing it before restart",
 | |
| 							containerStatus.State),
 | |
| 						reason: reasonUnknown,
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 			continue
 | |
| 		}
 | |
| 		// The container is running, but kill the container if any of the following condition is met.
 | |
| 		var message string
 | |
| 		var reason containerKillReason
 | |
| 		restart := shouldRestartOnFailure(pod)
 | |
| 		if _, _, changed := containerChanged(&container, containerStatus); changed {
 | |
| 			message = fmt.Sprintf("Container %s definition changed", container.Name)
 | |
| 			// Restart regardless of the restart policy because the container
 | |
| 			// spec changed.
 | |
| 			restart = true
 | |
| 		} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
 | |
| 			// If the container failed the liveness probe, we should kill it.
 | |
| 			message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
 | |
| 			reason = reasonLivenessProbe
 | |
| 		} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
 | |
| 			// If the container failed the startup probe, we should kill it.
 | |
| 			message = fmt.Sprintf("Container %s failed startup probe", container.Name)
 | |
| 			reason = reasonStartupProbe
 | |
| 		} else {
 | |
| 			// Keep the container.
 | |
| 			keepCount++
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		// We need to kill the container, but if we also want to restart the
 | |
| 		// container afterwards, make the intent clear in the message. Also do
 | |
| 		// not kill the entire pod since we expect container to be running eventually.
 | |
| 		if restart {
 | |
| 			message = fmt.Sprintf("%s, will be restarted", message)
 | |
| 			changes.ContainersToStart = append(changes.ContainersToStart, idx)
 | |
| 		}
 | |
| 
 | |
| 		changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
 | |
| 			name:      containerStatus.Name,
 | |
| 			container: &pod.Spec.Containers[idx],
 | |
| 			message:   message,
 | |
| 			reason:    reason,
 | |
| 		}
 | |
| 		klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
 | |
| 	}
 | |
| 
 | |
| 	if keepCount == 0 && len(changes.ContainersToStart) == 0 {
 | |
| 		changes.KillPod = true
 | |
| 	}
 | |
| 
 | |
| 	return changes
 | |
| }
 | |
| 
 | |
| // SyncPod syncs the running pod into the desired pod by executing following steps:
 | |
| //
 | |
| //  1. Compute sandbox and container changes.
 | |
| //  2. Kill pod sandbox if necessary.
 | |
| //  3. Kill any containers that should not be running.
 | |
| //  4. Create sandbox if necessary.
 | |
| //  5. Create ephemeral containers.
 | |
| //  6. Create init containers.
 | |
| //  7. Create normal containers.
 | |
| func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
 | |
| 	// Step 1: Compute sandbox and container changes.
 | |
| 	podContainerChanges := m.computePodActions(pod, podStatus)
 | |
| 	klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
 | |
| 	if podContainerChanges.CreateSandbox {
 | |
| 		ref, err := ref.GetReference(legacyscheme.Scheme, pod)
 | |
| 		if err != nil {
 | |
| 			klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
 | |
| 		}
 | |
| 		if podContainerChanges.SandboxID != "" {
 | |
| 			m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
 | |
| 		} else {
 | |
| 			klog.V(4).InfoS("SyncPod received new pod, will create a sandbox for it", "pod", klog.KObj(pod))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Step 2: Kill the pod if the sandbox has changed.
 | |
| 	if podContainerChanges.KillPod {
 | |
| 		if podContainerChanges.CreateSandbox {
 | |
| 			klog.V(4).InfoS("Stopping PodSandbox for pod, will start new one", "pod", klog.KObj(pod))
 | |
| 		} else {
 | |
| 			klog.V(4).InfoS("Stopping PodSandbox for pod, because all other containers are dead", "pod", klog.KObj(pod))
 | |
| 		}
 | |
| 
 | |
| 		killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
 | |
| 		result.AddPodSyncResult(killResult)
 | |
| 		if killResult.Error() != nil {
 | |
| 			klog.ErrorS(killResult.Error(), "killPodWithSyncResult failed")
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		if podContainerChanges.CreateSandbox {
 | |
| 			m.purgeInitContainers(pod, podStatus)
 | |
| 		}
 | |
| 	} else {
 | |
| 		// Step 3: kill any running containers in this pod which are not to keep.
 | |
| 		for containerID, containerInfo := range podContainerChanges.ContainersToKill {
 | |
| 			klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
 | |
| 			killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
 | |
| 			result.AddSyncResult(killContainerResult)
 | |
| 			if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil); err != nil {
 | |
| 				killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
 | |
| 				klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
 | |
| 				return
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Keep terminated init containers fairly aggressively controlled
 | |
| 	// This is an optimization because container removals are typically handled
 | |
| 	// by container garbage collector.
 | |
| 	m.pruneInitContainersBeforeStart(pod, podStatus)
 | |
| 
 | |
| 	// We pass the value of the PRIMARY podIP and list of podIPs down to
 | |
| 	// generatePodSandboxConfig and generateContainerConfig, which in turn
 | |
| 	// passes it to various other functions, in order to facilitate functionality
 | |
| 	// that requires this value (hosts file and downward API) and avoid races determining
 | |
| 	// the pod IP in cases where a container requires restart but the
 | |
| 	// podIP isn't in the status manager yet. The list of podIPs is used to
 | |
| 	// generate the hosts file.
 | |
| 	//
 | |
| 	// We default to the IPs in the passed-in pod status, and overwrite them if the
 | |
| 	// sandbox needs to be (re)started.
 | |
| 	var podIPs []string
 | |
| 	if podStatus != nil {
 | |
| 		podIPs = podStatus.IPs
 | |
| 	}
 | |
| 
 | |
| 	// Step 4: Create a sandbox for the pod if necessary.
 | |
| 	podSandboxID := podContainerChanges.SandboxID
 | |
| 	if podContainerChanges.CreateSandbox {
 | |
| 		var msg string
 | |
| 		var err error
 | |
| 
 | |
| 		klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
 | |
| 		createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
 | |
| 		result.AddSyncResult(createSandboxResult)
 | |
| 		podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
 | |
| 		if err != nil {
 | |
| 			// createPodSandbox can return an error from CNI, CSI,
 | |
| 			// or CRI if the Pod has been deleted while the POD is
 | |
| 			// being created. If the pod has been deleted then it's
 | |
| 			// not a real error.
 | |
| 			if m.podStateProvider.IsPodDeleted(pod.UID) {
 | |
| 				klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
 | |
| 				return
 | |
| 			}
 | |
| 			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
 | |
| 			klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
 | |
| 			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
 | |
| 			if referr != nil {
 | |
| 				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
 | |
| 			}
 | |
| 			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed to create pod sandbox: %v", err)
 | |
| 			return
 | |
| 		}
 | |
| 		klog.V(4).InfoS("Created PodSandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
 | |
| 
 | |
| 		podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
 | |
| 		if err != nil {
 | |
| 			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
 | |
| 			if referr != nil {
 | |
| 				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
 | |
| 			}
 | |
| 			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
 | |
| 			klog.ErrorS(err, "Failed to get pod sandbox status; Skipping pod", "pod", klog.KObj(pod))
 | |
| 			result.Fail(err)
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		// If we ever allow updating a pod from non-host-network to
 | |
| 		// host-network, we may use a stale IP.
 | |
| 		if !kubecontainer.IsHostNetworkPod(pod) {
 | |
| 			// Overwrite the podIPs passed in the pod status, since we just started the pod sandbox.
 | |
| 			podIPs = m.determinePodSandboxIPs(pod.Namespace, pod.Name, podSandboxStatus)
 | |
| 			klog.V(4).InfoS("Determined the ip for pod after sandbox changed", "IPs", podIPs, "pod", klog.KObj(pod))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// the start containers routines depend on pod ip(as in primary pod ip)
 | |
| 	// instead of trying to figure out if we have 0 < len(podIPs)
 | |
| 	// everytime, we short circuit it here
 | |
| 	podIP := ""
 | |
| 	if len(podIPs) != 0 {
 | |
| 		podIP = podIPs[0]
 | |
| 	}
 | |
| 
 | |
| 	// Get podSandboxConfig for containers to start.
 | |
| 	configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
 | |
| 	result.AddSyncResult(configPodSandboxResult)
 | |
| 	podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
 | |
| 	if err != nil {
 | |
| 		message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
 | |
| 		klog.ErrorS(err, "GeneratePodSandboxConfig for pod failed", "pod", klog.KObj(pod))
 | |
| 		configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// Helper containing boilerplate common to starting all types of containers.
 | |
| 	// typeName is a label used to describe this type of container in log messages,
 | |
| 	// currently: "container", "init container" or "ephemeral container"
 | |
| 	start := func(typeName string, spec *startSpec) error {
 | |
| 		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
 | |
| 		result.AddSyncResult(startContainerResult)
 | |
| 
 | |
| 		isInBackOff, msg, err := m.doBackOff(pod, spec.container, podStatus, backOff)
 | |
| 		if isInBackOff {
 | |
| 			startContainerResult.Fail(err, msg)
 | |
| 			klog.V(4).InfoS("Backing Off restarting container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
 | |
| 		// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
 | |
| 		if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
 | |
| 			startContainerResult.Fail(err, msg)
 | |
| 			// known errors that are logged in other places are logged at higher levels here to avoid
 | |
| 			// repetitive log spam
 | |
| 			switch {
 | |
| 			case err == images.ErrImagePullBackOff:
 | |
| 				klog.V(3).InfoS("Container start failed in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod), "containerMessage", msg, "err", err)
 | |
| 			default:
 | |
| 				utilruntime.HandleError(fmt.Errorf("%v %+v start failed in pod %v: %v: %s", typeName, spec.container, format.Pod(pod), err, msg))
 | |
| 			}
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	// Step 5: start ephemeral containers
 | |
| 	// These are started "prior" to init containers to allow running ephemeral containers even when there
 | |
| 	// are errors starting an init container. In practice init containers will start first since ephemeral
 | |
| 	// containers cannot be specified on pod creation.
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
 | |
| 		for _, idx := range podContainerChanges.EphemeralContainersToStart {
 | |
| 			start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Step 6: start the init container.
 | |
| 	if container := podContainerChanges.NextInitContainerToStart; container != nil {
 | |
| 		// Start the next init container.
 | |
| 		if err := start("init container", containerStartSpec(container)); err != nil {
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		// Successfully started the container; clear the entry in the failure
 | |
| 		klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
 | |
| 	}
 | |
| 
 | |
| 	// Step 7: start containers in podContainerChanges.ContainersToStart.
 | |
| 	for _, idx := range podContainerChanges.ContainersToStart {
 | |
| 		start("container", containerStartSpec(&pod.Spec.Containers[idx]))
 | |
| 	}
 | |
| 
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // If a container is still in backoff, the function will return a brief backoff error and
 | |
| // a detailed error message.
 | |
| func (m *kubeGenericRuntimeManager) doBackOff(pod *v1.Pod, container *v1.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, string, error) {
 | |
| 	var cStatus *kubecontainer.Status
 | |
| 	for _, c := range podStatus.ContainerStatuses {
 | |
| 		if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
 | |
| 			cStatus = c
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if cStatus == nil {
 | |
| 		return false, "", nil
 | |
| 	}
 | |
| 
 | |
| 	klog.V(3).InfoS("Checking backoff for container in pod", "containerName", container.Name, "pod", klog.KObj(pod))
 | |
| 	// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
 | |
| 	ts := cStatus.FinishedAt
 | |
| 	// backOff requires a unique key to identify the container.
 | |
| 	key := getStableKey(pod, container)
 | |
| 	if backOff.IsInBackOffSince(key, ts) {
 | |
| 		if containerRef, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
 | |
| 			m.recorder.Eventf(containerRef, v1.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed container")
 | |
| 		}
 | |
| 		err := fmt.Errorf("back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod))
 | |
| 		klog.V(3).InfoS("Back-off restarting failed container", "err", err.Error())
 | |
| 		return true, err.Error(), kubecontainer.ErrCrashLoopBackOff
 | |
| 	}
 | |
| 
 | |
| 	backOff.Next(key, ts)
 | |
| 	return false, "", nil
 | |
| }
 | |
| 
 | |
| // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
 | |
| // gracePeriodOverride if specified allows the caller to override the pod default grace period.
 | |
| // only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
 | |
| // it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
 | |
| func (m *kubeGenericRuntimeManager) KillPod(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
 | |
| 	err := m.killPodWithSyncResult(pod, runningPod, gracePeriodOverride)
 | |
| 	return err.Error()
 | |
| }
 | |
| 
 | |
| // killPodWithSyncResult kills a runningPod and returns SyncResult.
 | |
| // Note: The pod passed in could be *nil* when kubelet restarted.
 | |
| func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
 | |
| 	killContainerResults := m.killContainersWithSyncResult(pod, runningPod, gracePeriodOverride)
 | |
| 	for _, containerResult := range killContainerResults {
 | |
| 		result.AddSyncResult(containerResult)
 | |
| 	}
 | |
| 
 | |
| 	// stop sandbox, the sandbox will be removed in GarbageCollect
 | |
| 	killSandboxResult := kubecontainer.NewSyncResult(kubecontainer.KillPodSandbox, runningPod.ID)
 | |
| 	result.AddSyncResult(killSandboxResult)
 | |
| 	// Stop all sandboxes belongs to same pod
 | |
| 	for _, podSandbox := range runningPod.Sandboxes {
 | |
| 		if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil {
 | |
| 			killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
 | |
| 			klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // GetPodStatus retrieves the status of the pod, including the
 | |
| // information of all containers in the pod that are visible in Runtime.
 | |
| func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
 | |
| 	// Now we retain restart count of container as a container label. Each time a container
 | |
| 	// restarts, pod will read the restart count from the registered dead container, increment
 | |
| 	// it to get the new restart count, and then add a label with the new restart count on
 | |
| 	// the newly started container.
 | |
| 	// However, there are some limitations of this method:
 | |
| 	//	1. When all dead containers were garbage collected, the container status could
 | |
| 	//	not get the historical value and would be *inaccurate*. Fortunately, the chance
 | |
| 	//	is really slim.
 | |
| 	//	2. When working with old version containers which have no restart count label,
 | |
| 	//	we can only assume their restart count is 0.
 | |
| 	// Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
 | |
| 	// these limitations now.
 | |
| 	// TODO: move this comment to SyncPod.
 | |
| 	podSandboxIDs, err := m.getSandboxIDByPodUID(uid, nil)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	pod := &v1.Pod{
 | |
| 		ObjectMeta: metav1.ObjectMeta{
 | |
| 			Name:      name,
 | |
| 			Namespace: namespace,
 | |
| 			UID:       uid,
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	podFullName := format.Pod(pod)
 | |
| 
 | |
| 	klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod))
 | |
| 
 | |
| 	sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs))
 | |
| 	podIPs := []string{}
 | |
| 	for idx, podSandboxID := range podSandboxIDs {
 | |
| 		podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
 | |
| 		if err != nil {
 | |
| 			klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		sandboxStatuses[idx] = podSandboxStatus
 | |
| 
 | |
| 		// Only get pod IP from latest sandbox
 | |
| 		if idx == 0 && podSandboxStatus.State == runtimeapi.PodSandboxState_SANDBOX_READY {
 | |
| 			podIPs = m.determinePodSandboxIPs(namespace, name, podSandboxStatus)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Get statuses of all containers visible in the pod.
 | |
| 	containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace)
 | |
| 	if err != nil {
 | |
| 		if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) {
 | |
| 			klog.ErrorS(err, "getPodContainerStatuses for pod failed", "pod", klog.KObj(pod))
 | |
| 		}
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	m.logReduction.ClearID(podFullName)
 | |
| 
 | |
| 	return &kubecontainer.PodStatus{
 | |
| 		ID:                uid,
 | |
| 		Name:              name,
 | |
| 		Namespace:         namespace,
 | |
| 		IPs:               podIPs,
 | |
| 		SandboxStatuses:   sandboxStatuses,
 | |
| 		ContainerStatuses: containerStatuses,
 | |
| 	}, nil
 | |
| }
 | |
| 
 | |
| // GarbageCollect removes dead containers using the specified container gc policy.
 | |
| func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
 | |
| 	return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
 | |
| }
 | |
| 
 | |
| // UpdatePodCIDR is just a passthrough method to update the runtimeConfig of the shim
 | |
| // with the podCIDR supplied by the kubelet.
 | |
| func (m *kubeGenericRuntimeManager) UpdatePodCIDR(podCIDR string) error {
 | |
| 	// TODO(#35531): do we really want to write a method on this manager for each
 | |
| 	// field of the config?
 | |
| 	klog.InfoS("Updating runtime config through cri with podcidr", "CIDR", podCIDR)
 | |
| 	return m.runtimeService.UpdateRuntimeConfig(
 | |
| 		&runtimeapi.RuntimeConfig{
 | |
| 			NetworkConfig: &runtimeapi.NetworkConfig{
 | |
| 				PodCidr: podCIDR,
 | |
| 			},
 | |
| 		})
 | |
| }
 |