mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-30 21:30:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			2310 lines
		
	
	
		
			89 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			2310 lines
		
	
	
		
			89 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2015 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package kubelet
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"crypto/tls"
 | |
| 	"fmt"
 | |
| 	"math"
 | |
| 	"net"
 | |
| 	"net/http"
 | |
| 	"net/url"
 | |
| 	"os"
 | |
| 	"path"
 | |
| 	"sort"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"sync/atomic"
 | |
| 	"time"
 | |
| 
 | |
| 	cadvisorapi "github.com/google/cadvisor/info/v1"
 | |
| 	utilexec "k8s.io/utils/exec"
 | |
| 	"k8s.io/utils/integer"
 | |
| 	"k8s.io/utils/mount"
 | |
| 	utilnet "k8s.io/utils/net"
 | |
| 
 | |
| 	v1 "k8s.io/api/core/v1"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	"k8s.io/apimachinery/pkg/fields"
 | |
| 	"k8s.io/apimachinery/pkg/labels"
 | |
| 	"k8s.io/apimachinery/pkg/types"
 | |
| 	"k8s.io/apimachinery/pkg/util/clock"
 | |
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 | |
| 	"k8s.io/apimachinery/pkg/util/sets"
 | |
| 	"k8s.io/apimachinery/pkg/util/wait"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	clientset "k8s.io/client-go/kubernetes"
 | |
| 	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
 | |
| 	corelisters "k8s.io/client-go/listers/core/v1"
 | |
| 	"k8s.io/client-go/tools/cache"
 | |
| 	"k8s.io/client-go/tools/record"
 | |
| 	"k8s.io/client-go/util/certificate"
 | |
| 	"k8s.io/client-go/util/flowcontrol"
 | |
| 	cloudprovider "k8s.io/cloud-provider"
 | |
| 	internalapi "k8s.io/cri-api/pkg/apis"
 | |
| 	"k8s.io/klog"
 | |
| 	pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
 | |
| 	api "k8s.io/kubernetes/pkg/apis/core"
 | |
| 	"k8s.io/kubernetes/pkg/features"
 | |
| 	kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/cadvisor"
 | |
| 	kubeletcertificate "k8s.io/kubernetes/pkg/kubelet/certificate"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/cloudresource"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/cm"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/config"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/configmap"
 | |
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/dockershim"
 | |
| 	dockerremote "k8s.io/kubernetes/pkg/kubelet/dockershim/remote"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/events"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/eviction"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/images"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/kubeletconfig"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/logs"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/metrics"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/metrics/collectors"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/network/dns"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/nodelease"
 | |
| 	oomwatcher "k8s.io/kubernetes/pkg/kubelet/oom"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/pleg"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/pluginmanager"
 | |
| 	plugincache "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
 | |
| 	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/preemption"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/prober"
 | |
| 	proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/remote"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/secret"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/server"
 | |
| 	servermetrics "k8s.io/kubernetes/pkg/kubelet/server/metrics"
 | |
| 	serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/server/streaming"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/stats"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/status"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/sysctl"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/token"
 | |
| 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/format"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/manager"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/queue"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/volumemanager"
 | |
| 	"k8s.io/kubernetes/pkg/security/apparmor"
 | |
| 	sysctlwhitelist "k8s.io/kubernetes/pkg/security/podsecuritypolicy/sysctl"
 | |
| 	utilipt "k8s.io/kubernetes/pkg/util/iptables"
 | |
| 	nodeutil "k8s.io/kubernetes/pkg/util/node"
 | |
| 	"k8s.io/kubernetes/pkg/util/oom"
 | |
| 	"k8s.io/kubernetes/pkg/util/selinux"
 | |
| 	"k8s.io/kubernetes/pkg/volume"
 | |
| 	"k8s.io/kubernetes/pkg/volume/csi"
 | |
| 	"k8s.io/kubernetes/pkg/volume/util/hostutil"
 | |
| 	"k8s.io/kubernetes/pkg/volume/util/subpath"
 | |
| 	"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// Max amount of time to wait for the container runtime to come up.
 | |
| 	maxWaitForContainerRuntime = 30 * time.Second
 | |
| 
 | |
| 	// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
 | |
| 	nodeStatusUpdateRetry = 5
 | |
| 
 | |
| 	// ContainerLogsDir is the location of container logs.
 | |
| 	ContainerLogsDir = "/var/log/containers"
 | |
| 
 | |
| 	// MaxContainerBackOff is the max backoff period, exported for the e2e test
 | |
| 	MaxContainerBackOff = 300 * time.Second
 | |
| 
 | |
| 	// Capacity of the channel for storing pods to kill. A small number should
 | |
| 	// suffice because a goroutine is dedicated to check the channel and does
 | |
| 	// not block on anything else.
 | |
| 	podKillingChannelCapacity = 50
 | |
| 
 | |
| 	// Period for performing global cleanup tasks.
 | |
| 	housekeepingPeriod = time.Second * 2
 | |
| 
 | |
| 	// Period for performing eviction monitoring.
 | |
| 	// TODO ensure this is in sync with internal cadvisor housekeeping.
 | |
| 	evictionMonitoringPeriod = time.Second * 10
 | |
| 
 | |
| 	// The path in containers' filesystems where the hosts file is mounted.
 | |
| 	etcHostsPath = "/etc/hosts"
 | |
| 
 | |
| 	// Capacity of the channel for receiving pod lifecycle events. This number
 | |
| 	// is a bit arbitrary and may be adjusted in the future.
 | |
| 	plegChannelCapacity = 1000
 | |
| 
 | |
| 	// Generic PLEG relies on relisting for discovering container events.
 | |
| 	// A longer period means that kubelet will take longer to detect container
 | |
| 	// changes and to update pod status. On the other hand, a shorter period
 | |
| 	// will cause more frequent relisting (e.g., container runtime operations),
 | |
| 	// leading to higher cpu usage.
 | |
| 	// Note that even though we set the period to 1s, the relisting itself can
 | |
| 	// take more than 1s to finish if the container runtime responds slowly
 | |
| 	// and/or when there are many container changes in one cycle.
 | |
| 	plegRelistPeriod = time.Second * 1
 | |
| 
 | |
| 	// backOffPeriod is the period to back off when pod syncing results in an
 | |
| 	// error. It is also used as the base period for the exponential backoff
 | |
| 	// container restarts and image pulls.
 | |
| 	backOffPeriod = time.Second * 10
 | |
| 
 | |
| 	// ContainerGCPeriod is the period for performing container garbage collection.
 | |
| 	ContainerGCPeriod = time.Minute
 | |
| 	// ImageGCPeriod is the period for performing image garbage collection.
 | |
| 	ImageGCPeriod = 5 * time.Minute
 | |
| 
 | |
| 	// Minimum number of dead containers to keep in a pod
 | |
| 	minDeadContainerInPod = 1
 | |
| )
 | |
| 
 | |
| // SyncHandler is an interface implemented by Kubelet, for testability
 | |
| type SyncHandler interface {
 | |
| 	HandlePodAdditions(pods []*v1.Pod)
 | |
| 	HandlePodUpdates(pods []*v1.Pod)
 | |
| 	HandlePodRemoves(pods []*v1.Pod)
 | |
| 	HandlePodReconcile(pods []*v1.Pod)
 | |
| 	HandlePodSyncs(pods []*v1.Pod)
 | |
| 	HandlePodCleanups() error
 | |
| }
 | |
| 
 | |
| // Option is a functional option type for Kubelet
 | |
| type Option func(*Kubelet)
 | |
| 
 | |
| // Bootstrap is a bootstrapping interface for kubelet, targets the initialization protocol
 | |
| type Bootstrap interface {
 | |
| 	GetConfiguration() kubeletconfiginternal.KubeletConfiguration
 | |
| 	BirthCry()
 | |
| 	StartGarbageCollection()
 | |
| 	ListenAndServe(address net.IP, port uint, tlsOptions *server.TLSOptions, auth server.AuthInterface, enableCAdvisorJSONEndpoints, enableDebuggingHandlers, enableContentionProfiling bool)
 | |
| 	ListenAndServeReadOnly(address net.IP, port uint, enableCAdvisorJSONEndpoints bool)
 | |
| 	ListenAndServePodResources()
 | |
| 	Run(<-chan kubetypes.PodUpdate)
 | |
| 	RunOnce(<-chan kubetypes.PodUpdate) ([]RunPodResult, error)
 | |
| }
 | |
| 
 | |
| // Builder creates and initializes a Kubelet instance
 | |
| type Builder func(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 | |
| 	kubeDeps *Dependencies,
 | |
| 	crOptions *config.ContainerRuntimeOptions,
 | |
| 	containerRuntime string,
 | |
| 	runtimeCgroups string,
 | |
| 	hostnameOverride string,
 | |
| 	nodeIP string,
 | |
| 	providerID string,
 | |
| 	cloudProvider string,
 | |
| 	certDirectory string,
 | |
| 	rootDirectory string,
 | |
| 	registerNode bool,
 | |
| 	registerWithTaints []api.Taint,
 | |
| 	allowedUnsafeSysctls []string,
 | |
| 	remoteRuntimeEndpoint string,
 | |
| 	remoteImageEndpoint string,
 | |
| 	experimentalMounterPath string,
 | |
| 	experimentalKernelMemcgNotification bool,
 | |
| 	experimentalCheckNodeCapabilitiesBeforeMount bool,
 | |
| 	experimentalNodeAllocatableIgnoreEvictionThreshold bool,
 | |
| 	minimumGCAge metav1.Duration,
 | |
| 	maxPerPodContainerCount int32,
 | |
| 	maxContainerCount int32,
 | |
| 	masterServiceNamespace string,
 | |
| 	registerSchedulable bool,
 | |
| 	nonMasqueradeCIDR string,
 | |
| 	keepTerminatedPodVolumes bool,
 | |
| 	nodeLabels map[string]string,
 | |
| 	seccompProfileRoot string,
 | |
| 	bootstrapCheckpointPath string,
 | |
| 	nodeStatusMaxImages int32) (Bootstrap, error)
 | |
| 
 | |
| // Dependencies is a bin for things we might consider "injected dependencies" -- objects constructed
 | |
| // at runtime that are necessary for running the Kubelet. This is a temporary solution for grouping
 | |
| // these objects while we figure out a more comprehensive dependency injection story for the Kubelet.
 | |
| type Dependencies struct {
 | |
| 	Options []Option
 | |
| 
 | |
| 	// Injected Dependencies
 | |
| 	Auth                    server.AuthInterface
 | |
| 	CAdvisorInterface       cadvisor.Interface
 | |
| 	Cloud                   cloudprovider.Interface
 | |
| 	ContainerManager        cm.ContainerManager
 | |
| 	DockerClientConfig      *dockershim.ClientConfig
 | |
| 	EventClient             v1core.EventsGetter
 | |
| 	HeartbeatClient         clientset.Interface
 | |
| 	OnHeartbeatFailure      func()
 | |
| 	KubeClient              clientset.Interface
 | |
| 	Mounter                 mount.Interface
 | |
| 	HostUtil                hostutil.HostUtils
 | |
| 	OOMAdjuster             *oom.OOMAdjuster
 | |
| 	OSInterface             kubecontainer.OSInterface
 | |
| 	PodConfig               *config.PodConfig
 | |
| 	Recorder                record.EventRecorder
 | |
| 	Subpather               subpath.Interface
 | |
| 	VolumePlugins           []volume.VolumePlugin
 | |
| 	DynamicPluginProber     volume.DynamicPluginProber
 | |
| 	TLSOptions              *server.TLSOptions
 | |
| 	KubeletConfigController *kubeletconfig.Controller
 | |
| 	RemoteRuntimeService    internalapi.RuntimeService
 | |
| 	RemoteImageService      internalapi.ImageManagerService
 | |
| 	criHandler              http.Handler
 | |
| 	dockerLegacyService     dockershim.DockerLegacyService
 | |
| 	// remove it after cadvisor.UsingLegacyCadvisorStats dropped.
 | |
| 	useLegacyCadvisorStats bool
 | |
| }
 | |
| 
 | |
| // makePodSourceConfig creates a config.PodConfig from the given
 | |
| // KubeletConfiguration or returns an error.
 | |
| func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *Dependencies, nodeName types.NodeName, bootstrapCheckpointPath string) (*config.PodConfig, error) {
 | |
| 	manifestURLHeader := make(http.Header)
 | |
| 	if len(kubeCfg.StaticPodURLHeader) > 0 {
 | |
| 		for k, v := range kubeCfg.StaticPodURLHeader {
 | |
| 			for i := range v {
 | |
| 				manifestURLHeader.Add(k, v[i])
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// source of all configuration
 | |
| 	cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kubeDeps.Recorder)
 | |
| 
 | |
| 	// define file config source
 | |
| 	if kubeCfg.StaticPodPath != "" {
 | |
| 		klog.Infof("Adding pod path: %v", kubeCfg.StaticPodPath)
 | |
| 		config.NewSourceFile(kubeCfg.StaticPodPath, nodeName, kubeCfg.FileCheckFrequency.Duration, cfg.Channel(kubetypes.FileSource))
 | |
| 	}
 | |
| 
 | |
| 	// define url config source
 | |
| 	if kubeCfg.StaticPodURL != "" {
 | |
| 		klog.Infof("Adding pod url %q with HTTP header %v", kubeCfg.StaticPodURL, manifestURLHeader)
 | |
| 		config.NewSourceURL(kubeCfg.StaticPodURL, manifestURLHeader, nodeName, kubeCfg.HTTPCheckFrequency.Duration, cfg.Channel(kubetypes.HTTPSource))
 | |
| 	}
 | |
| 
 | |
| 	// Restore from the checkpoint path
 | |
| 	// NOTE: This MUST happen before creating the apiserver source
 | |
| 	// below, or the checkpoint would override the source of truth.
 | |
| 
 | |
| 	var updatechannel chan<- interface{}
 | |
| 	if bootstrapCheckpointPath != "" {
 | |
| 		klog.Infof("Adding checkpoint path: %v", bootstrapCheckpointPath)
 | |
| 		updatechannel = cfg.Channel(kubetypes.ApiserverSource)
 | |
| 		err := cfg.Restore(bootstrapCheckpointPath, updatechannel)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if kubeDeps.KubeClient != nil {
 | |
| 		klog.Infof("Watching apiserver")
 | |
| 		if updatechannel == nil {
 | |
| 			updatechannel = cfg.Channel(kubetypes.ApiserverSource)
 | |
| 		}
 | |
| 		config.NewSourceApiserver(kubeDeps.KubeClient, nodeName, updatechannel)
 | |
| 	}
 | |
| 	return cfg, nil
 | |
| }
 | |
| 
 | |
| // PreInitRuntimeService will init runtime service before RunKubelet.
 | |
| func PreInitRuntimeService(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 | |
| 	kubeDeps *Dependencies,
 | |
| 	crOptions *config.ContainerRuntimeOptions,
 | |
| 	containerRuntime string,
 | |
| 	runtimeCgroups string,
 | |
| 	remoteRuntimeEndpoint string,
 | |
| 	remoteImageEndpoint string,
 | |
| 	nonMasqueradeCIDR string) error {
 | |
| 	if remoteRuntimeEndpoint != "" {
 | |
| 		// remoteImageEndpoint is same as remoteRuntimeEndpoint if not explicitly specified
 | |
| 		if remoteImageEndpoint == "" {
 | |
| 			remoteImageEndpoint = remoteRuntimeEndpoint
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	switch containerRuntime {
 | |
| 	case kubetypes.DockerContainerRuntime:
 | |
| 		// TODO: These need to become arguments to a standalone docker shim.
 | |
| 		pluginSettings := dockershim.NetworkPluginSettings{
 | |
| 			HairpinMode:        kubeletconfiginternal.HairpinMode(kubeCfg.HairpinMode),
 | |
| 			NonMasqueradeCIDR:  nonMasqueradeCIDR,
 | |
| 			PluginName:         crOptions.NetworkPluginName,
 | |
| 			PluginConfDir:      crOptions.CNIConfDir,
 | |
| 			PluginBinDirString: crOptions.CNIBinDir,
 | |
| 			PluginCacheDir:     crOptions.CNICacheDir,
 | |
| 			MTU:                int(crOptions.NetworkPluginMTU),
 | |
| 		}
 | |
| 
 | |
| 		// Create and start the CRI shim running as a grpc server.
 | |
| 		streamingConfig := getStreamingConfig(kubeCfg, kubeDeps, crOptions)
 | |
| 		ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
 | |
| 			&pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory, !crOptions.RedirectContainerStreaming)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if crOptions.RedirectContainerStreaming {
 | |
| 			kubeDeps.criHandler = ds
 | |
| 		}
 | |
| 
 | |
| 		// The unix socket for kubelet <-> dockershim communication, dockershim start before runtime service init.
 | |
| 		klog.V(5).Infof("RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q",
 | |
| 			remoteRuntimeEndpoint,
 | |
| 			remoteImageEndpoint)
 | |
| 		klog.V(2).Infof("Starting the GRPC server for the docker CRI shim.")
 | |
| 		dockerServer := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
 | |
| 		if err := dockerServer.Start(); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		// Create dockerLegacyService when the logging driver is not supported.
 | |
| 		supported, err := ds.IsCRISupportedLogDriver()
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if !supported {
 | |
| 			kubeDeps.dockerLegacyService = ds
 | |
| 		}
 | |
| 	case kubetypes.RemoteContainerRuntime:
 | |
| 		// No-op.
 | |
| 		break
 | |
| 	default:
 | |
| 		return fmt.Errorf("unsupported CRI runtime: %q", containerRuntime)
 | |
| 	}
 | |
| 
 | |
| 	var err error
 | |
| 	if kubeDeps.RemoteRuntimeService, err = remote.NewRemoteRuntimeService(remoteRuntimeEndpoint, kubeCfg.RuntimeRequestTimeout.Duration); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if kubeDeps.RemoteImageService, err = remote.NewRemoteImageService(remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout.Duration); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	kubeDeps.useLegacyCadvisorStats = cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint)
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // NewMainKubelet instantiates a new Kubelet object along with all the required internal modules.
 | |
| // No initialization of Kubelet and its modules should happen here.
 | |
| func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 | |
| 	kubeDeps *Dependencies,
 | |
| 	crOptions *config.ContainerRuntimeOptions,
 | |
| 	containerRuntime string,
 | |
| 	hostnameOverride string,
 | |
| 	nodeIP string,
 | |
| 	providerID string,
 | |
| 	cloudProvider string,
 | |
| 	certDirectory string,
 | |
| 	rootDirectory string,
 | |
| 	registerNode bool,
 | |
| 	registerWithTaints []api.Taint,
 | |
| 	allowedUnsafeSysctls []string,
 | |
| 	experimentalMounterPath string,
 | |
| 	experimentalKernelMemcgNotification bool,
 | |
| 	experimentalCheckNodeCapabilitiesBeforeMount bool,
 | |
| 	experimentalNodeAllocatableIgnoreEvictionThreshold bool,
 | |
| 	minimumGCAge metav1.Duration,
 | |
| 	maxPerPodContainerCount int32,
 | |
| 	maxContainerCount int32,
 | |
| 	masterServiceNamespace string,
 | |
| 	registerSchedulable bool,
 | |
| 	keepTerminatedPodVolumes bool,
 | |
| 	nodeLabels map[string]string,
 | |
| 	seccompProfileRoot string,
 | |
| 	bootstrapCheckpointPath string,
 | |
| 	nodeStatusMaxImages int32) (*Kubelet, error) {
 | |
| 	if rootDirectory == "" {
 | |
| 		return nil, fmt.Errorf("invalid root directory %q", rootDirectory)
 | |
| 	}
 | |
| 	if kubeCfg.SyncFrequency.Duration <= 0 {
 | |
| 		return nil, fmt.Errorf("invalid sync frequency %d", kubeCfg.SyncFrequency.Duration)
 | |
| 	}
 | |
| 
 | |
| 	if kubeCfg.MakeIPTablesUtilChains {
 | |
| 		if kubeCfg.IPTablesMasqueradeBit > 31 || kubeCfg.IPTablesMasqueradeBit < 0 {
 | |
| 			return nil, fmt.Errorf("iptables-masquerade-bit is not valid. Must be within [0, 31]")
 | |
| 		}
 | |
| 		if kubeCfg.IPTablesDropBit > 31 || kubeCfg.IPTablesDropBit < 0 {
 | |
| 			return nil, fmt.Errorf("iptables-drop-bit is not valid. Must be within [0, 31]")
 | |
| 		}
 | |
| 		if kubeCfg.IPTablesDropBit == kubeCfg.IPTablesMasqueradeBit {
 | |
| 			return nil, fmt.Errorf("iptables-masquerade-bit and iptables-drop-bit must be different")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	hostname, err := nodeutil.GetHostname(hostnameOverride)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	// Query the cloud provider for our node name, default to hostname
 | |
| 	nodeName := types.NodeName(hostname)
 | |
| 	if kubeDeps.Cloud != nil {
 | |
| 		var err error
 | |
| 		instances, ok := kubeDeps.Cloud.Instances()
 | |
| 		if !ok {
 | |
| 			return nil, fmt.Errorf("failed to get instances from cloud provider")
 | |
| 		}
 | |
| 
 | |
| 		nodeName, err = instances.CurrentNodeName(context.TODO(), hostname)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("error fetching current instance name from cloud provider: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		klog.V(2).Infof("cloud provider determined current node name to be %s", nodeName)
 | |
| 	}
 | |
| 
 | |
| 	if kubeDeps.PodConfig == nil {
 | |
| 		var err error
 | |
| 		kubeDeps.PodConfig, err = makePodSourceConfig(kubeCfg, kubeDeps, nodeName, bootstrapCheckpointPath)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	containerGCPolicy := kubecontainer.ContainerGCPolicy{
 | |
| 		MinAge:             minimumGCAge.Duration,
 | |
| 		MaxPerPodContainer: int(maxPerPodContainerCount),
 | |
| 		MaxContainers:      int(maxContainerCount),
 | |
| 	}
 | |
| 
 | |
| 	daemonEndpoints := &v1.NodeDaemonEndpoints{
 | |
| 		KubeletEndpoint: v1.DaemonEndpoint{Port: kubeCfg.Port},
 | |
| 	}
 | |
| 
 | |
| 	imageGCPolicy := images.ImageGCPolicy{
 | |
| 		MinAge:               kubeCfg.ImageMinimumGCAge.Duration,
 | |
| 		HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
 | |
| 		LowThresholdPercent:  int(kubeCfg.ImageGCLowThresholdPercent),
 | |
| 	}
 | |
| 
 | |
| 	enforceNodeAllocatable := kubeCfg.EnforceNodeAllocatable
 | |
| 	if experimentalNodeAllocatableIgnoreEvictionThreshold {
 | |
| 		// Do not provide kubeCfg.EnforceNodeAllocatable to eviction threshold parsing if we are not enforcing Evictions
 | |
| 		enforceNodeAllocatable = []string{}
 | |
| 	}
 | |
| 	thresholds, err := eviction.ParseThresholdConfig(enforceNodeAllocatable, kubeCfg.EvictionHard, kubeCfg.EvictionSoft, kubeCfg.EvictionSoftGracePeriod, kubeCfg.EvictionMinimumReclaim)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	evictionConfig := eviction.Config{
 | |
| 		PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
 | |
| 		MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
 | |
| 		Thresholds:               thresholds,
 | |
| 		KernelMemcgNotification:  experimentalKernelMemcgNotification,
 | |
| 		PodCgroupRoot:            kubeDeps.ContainerManager.GetPodCgroupRoot(),
 | |
| 	}
 | |
| 
 | |
| 	serviceIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
 | |
| 	if kubeDeps.KubeClient != nil {
 | |
| 		serviceLW := cache.NewListWatchFromClient(kubeDeps.KubeClient.CoreV1().RESTClient(), "services", metav1.NamespaceAll, fields.Everything())
 | |
| 		r := cache.NewReflector(serviceLW, &v1.Service{}, serviceIndexer, 0)
 | |
| 		go r.Run(wait.NeverStop)
 | |
| 	}
 | |
| 	serviceLister := corelisters.NewServiceLister(serviceIndexer)
 | |
| 
 | |
| 	nodeIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{})
 | |
| 	if kubeDeps.KubeClient != nil {
 | |
| 		fieldSelector := fields.Set{api.ObjectNameField: string(nodeName)}.AsSelector()
 | |
| 		nodeLW := cache.NewListWatchFromClient(kubeDeps.KubeClient.CoreV1().RESTClient(), "nodes", metav1.NamespaceAll, fieldSelector)
 | |
| 		r := cache.NewReflector(nodeLW, &v1.Node{}, nodeIndexer, 0)
 | |
| 		go r.Run(wait.NeverStop)
 | |
| 	}
 | |
| 	nodeLister := corelisters.NewNodeLister(nodeIndexer)
 | |
| 
 | |
| 	// TODO: get the real node object of ourself,
 | |
| 	// and use the real node name and UID.
 | |
| 	// TODO: what is namespace for node?
 | |
| 	nodeRef := &v1.ObjectReference{
 | |
| 		Kind:      "Node",
 | |
| 		Name:      string(nodeName),
 | |
| 		UID:       types.UID(nodeName),
 | |
| 		Namespace: "",
 | |
| 	}
 | |
| 
 | |
| 	containerRefManager := kubecontainer.NewRefManager()
 | |
| 
 | |
| 	oomWatcher, err := oomwatcher.NewWatcher(kubeDeps.Recorder)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	clusterDNS := make([]net.IP, 0, len(kubeCfg.ClusterDNS))
 | |
| 	for _, ipEntry := range kubeCfg.ClusterDNS {
 | |
| 		ip := net.ParseIP(ipEntry)
 | |
| 		if ip == nil {
 | |
| 			klog.Warningf("Invalid clusterDNS ip '%q'", ipEntry)
 | |
| 		} else {
 | |
| 			clusterDNS = append(clusterDNS, ip)
 | |
| 		}
 | |
| 	}
 | |
| 	httpClient := &http.Client{}
 | |
| 	parsedNodeIP := net.ParseIP(nodeIP)
 | |
| 	protocol := utilipt.ProtocolIpv4
 | |
| 	if utilnet.IsIPv6(parsedNodeIP) {
 | |
| 		klog.V(0).Infof("IPv6 node IP (%s), assume IPv6 operation", nodeIP)
 | |
| 		protocol = utilipt.ProtocolIpv6
 | |
| 	}
 | |
| 
 | |
| 	klet := &Kubelet{
 | |
| 		hostname:                                hostname,
 | |
| 		hostnameOverridden:                      len(hostnameOverride) > 0,
 | |
| 		nodeName:                                nodeName,
 | |
| 		kubeClient:                              kubeDeps.KubeClient,
 | |
| 		heartbeatClient:                         kubeDeps.HeartbeatClient,
 | |
| 		onRepeatedHeartbeatFailure:              kubeDeps.OnHeartbeatFailure,
 | |
| 		rootDirectory:                           rootDirectory,
 | |
| 		resyncInterval:                          kubeCfg.SyncFrequency.Duration,
 | |
| 		sourcesReady:                            config.NewSourcesReady(kubeDeps.PodConfig.SeenAllSources),
 | |
| 		registerNode:                            registerNode,
 | |
| 		registerWithTaints:                      registerWithTaints,
 | |
| 		registerSchedulable:                     registerSchedulable,
 | |
| 		dnsConfigurer:                           dns.NewConfigurer(kubeDeps.Recorder, nodeRef, parsedNodeIP, clusterDNS, kubeCfg.ClusterDomain, kubeCfg.ResolverConfig),
 | |
| 		serviceLister:                           serviceLister,
 | |
| 		nodeLister:                              nodeLister,
 | |
| 		masterServiceNamespace:                  masterServiceNamespace,
 | |
| 		streamingConnectionIdleTimeout:          kubeCfg.StreamingConnectionIdleTimeout.Duration,
 | |
| 		recorder:                                kubeDeps.Recorder,
 | |
| 		cadvisor:                                kubeDeps.CAdvisorInterface,
 | |
| 		cloud:                                   kubeDeps.Cloud,
 | |
| 		externalCloudProvider:                   cloudprovider.IsExternal(cloudProvider),
 | |
| 		providerID:                              providerID,
 | |
| 		nodeRef:                                 nodeRef,
 | |
| 		nodeLabels:                              nodeLabels,
 | |
| 		nodeStatusUpdateFrequency:               kubeCfg.NodeStatusUpdateFrequency.Duration,
 | |
| 		nodeStatusReportFrequency:               kubeCfg.NodeStatusReportFrequency.Duration,
 | |
| 		os:                                      kubeDeps.OSInterface,
 | |
| 		oomWatcher:                              oomWatcher,
 | |
| 		cgroupsPerQOS:                           kubeCfg.CgroupsPerQOS,
 | |
| 		cgroupRoot:                              kubeCfg.CgroupRoot,
 | |
| 		mounter:                                 kubeDeps.Mounter,
 | |
| 		hostutil:                                kubeDeps.HostUtil,
 | |
| 		subpather:                               kubeDeps.Subpather,
 | |
| 		maxPods:                                 int(kubeCfg.MaxPods),
 | |
| 		podsPerCore:                             int(kubeCfg.PodsPerCore),
 | |
| 		syncLoopMonitor:                         atomic.Value{},
 | |
| 		daemonEndpoints:                         daemonEndpoints,
 | |
| 		containerManager:                        kubeDeps.ContainerManager,
 | |
| 		containerRuntimeName:                    containerRuntime,
 | |
| 		redirectContainerStreaming:              crOptions.RedirectContainerStreaming,
 | |
| 		nodeIP:                                  parsedNodeIP,
 | |
| 		nodeIPValidator:                         validateNodeIP,
 | |
| 		clock:                                   clock.RealClock{},
 | |
| 		enableControllerAttachDetach:            kubeCfg.EnableControllerAttachDetach,
 | |
| 		iptClient:                               utilipt.New(utilexec.New(), protocol),
 | |
| 		makeIPTablesUtilChains:                  kubeCfg.MakeIPTablesUtilChains,
 | |
| 		iptablesMasqueradeBit:                   int(kubeCfg.IPTablesMasqueradeBit),
 | |
| 		iptablesDropBit:                         int(kubeCfg.IPTablesDropBit),
 | |
| 		experimentalHostUserNamespaceDefaulting: utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalHostUserNamespaceDefaultingGate),
 | |
| 		keepTerminatedPodVolumes:                keepTerminatedPodVolumes,
 | |
| 		nodeStatusMaxImages:                     nodeStatusMaxImages,
 | |
| 	}
 | |
| 
 | |
| 	if klet.cloud != nil {
 | |
| 		klet.cloudResourceSyncManager = cloudresource.NewSyncManager(klet.cloud, nodeName, klet.nodeStatusUpdateFrequency)
 | |
| 	}
 | |
| 
 | |
| 	var secretManager secret.Manager
 | |
| 	var configMapManager configmap.Manager
 | |
| 	switch kubeCfg.ConfigMapAndSecretChangeDetectionStrategy {
 | |
| 	case kubeletconfiginternal.WatchChangeDetectionStrategy:
 | |
| 		secretManager = secret.NewWatchingSecretManager(kubeDeps.KubeClient)
 | |
| 		configMapManager = configmap.NewWatchingConfigMapManager(kubeDeps.KubeClient)
 | |
| 	case kubeletconfiginternal.TTLCacheChangeDetectionStrategy:
 | |
| 		secretManager = secret.NewCachingSecretManager(
 | |
| 			kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
 | |
| 		configMapManager = configmap.NewCachingConfigMapManager(
 | |
| 			kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
 | |
| 	case kubeletconfiginternal.GetChangeDetectionStrategy:
 | |
| 		secretManager = secret.NewSimpleSecretManager(kubeDeps.KubeClient)
 | |
| 		configMapManager = configmap.NewSimpleConfigMapManager(kubeDeps.KubeClient)
 | |
| 	default:
 | |
| 		return nil, fmt.Errorf("unknown configmap and secret manager mode: %v", kubeCfg.ConfigMapAndSecretChangeDetectionStrategy)
 | |
| 	}
 | |
| 
 | |
| 	klet.secretManager = secretManager
 | |
| 	klet.configMapManager = configMapManager
 | |
| 
 | |
| 	if klet.experimentalHostUserNamespaceDefaulting {
 | |
| 		klog.Infof("Experimental host user namespace defaulting is enabled.")
 | |
| 	}
 | |
| 
 | |
| 	machineInfo, err := klet.cadvisor.MachineInfo()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.machineInfo = machineInfo
 | |
| 
 | |
| 	imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
 | |
| 
 | |
| 	klet.livenessManager = proberesults.NewManager()
 | |
| 	klet.startupManager = proberesults.NewManager()
 | |
| 
 | |
| 	klet.podCache = kubecontainer.NewCache()
 | |
| 	var checkpointManager checkpointmanager.CheckpointManager
 | |
| 	if bootstrapCheckpointPath != "" {
 | |
| 		checkpointManager, err = checkpointmanager.NewCheckpointManager(bootstrapCheckpointPath)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed to initialize checkpoint manager: %+v", err)
 | |
| 		}
 | |
| 	}
 | |
| 	// podManager is also responsible for keeping secretManager and configMapManager contents up-to-date.
 | |
| 	mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister)
 | |
| 	klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient, secretManager, configMapManager, checkpointManager)
 | |
| 
 | |
| 	klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet)
 | |
| 
 | |
| 	klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration)
 | |
| 
 | |
| 	klet.dockerLegacyService = kubeDeps.dockerLegacyService
 | |
| 	klet.criHandler = kubeDeps.criHandler
 | |
| 	klet.runtimeService = kubeDeps.RemoteRuntimeService
 | |
| 
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClass) && kubeDeps.KubeClient != nil {
 | |
| 		klet.runtimeClassManager = runtimeclass.NewManager(kubeDeps.KubeClient)
 | |
| 	}
 | |
| 
 | |
| 	runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
 | |
| 		kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
 | |
| 		klet.livenessManager,
 | |
| 		klet.startupManager,
 | |
| 		seccompProfileRoot,
 | |
| 		containerRefManager,
 | |
| 		machineInfo,
 | |
| 		klet,
 | |
| 		kubeDeps.OSInterface,
 | |
| 		klet,
 | |
| 		httpClient,
 | |
| 		imageBackOff,
 | |
| 		kubeCfg.SerializeImagePulls,
 | |
| 		float32(kubeCfg.RegistryPullQPS),
 | |
| 		int(kubeCfg.RegistryBurst),
 | |
| 		kubeCfg.CPUCFSQuota,
 | |
| 		kubeCfg.CPUCFSQuotaPeriod,
 | |
| 		kubeDeps.RemoteRuntimeService,
 | |
| 		kubeDeps.RemoteImageService,
 | |
| 		kubeDeps.ContainerManager.InternalContainerLifecycle(),
 | |
| 		kubeDeps.dockerLegacyService,
 | |
| 		klet.runtimeClassManager,
 | |
| 	)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.containerRuntime = runtime
 | |
| 	klet.streamingRuntime = runtime
 | |
| 	klet.runner = runtime
 | |
| 
 | |
| 	runtimeCache, err := kubecontainer.NewRuntimeCache(klet.containerRuntime)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.runtimeCache = runtimeCache
 | |
| 
 | |
| 	if kubeDeps.useLegacyCadvisorStats {
 | |
| 		klet.StatsProvider = stats.NewCadvisorStatsProvider(
 | |
| 			klet.cadvisor,
 | |
| 			klet.resourceAnalyzer,
 | |
| 			klet.podManager,
 | |
| 			klet.runtimeCache,
 | |
| 			klet.containerRuntime,
 | |
| 			klet.statusManager)
 | |
| 	} else {
 | |
| 		klet.StatsProvider = stats.NewCRIStatsProvider(
 | |
| 			klet.cadvisor,
 | |
| 			klet.resourceAnalyzer,
 | |
| 			klet.podManager,
 | |
| 			klet.runtimeCache,
 | |
| 			kubeDeps.RemoteRuntimeService,
 | |
| 			kubeDeps.RemoteImageService,
 | |
| 			stats.NewLogMetricsService(),
 | |
| 			kubecontainer.RealOS{})
 | |
| 	}
 | |
| 
 | |
| 	klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})
 | |
| 	klet.runtimeState = newRuntimeState(maxWaitForContainerRuntime)
 | |
| 	klet.runtimeState.addHealthCheck("PLEG", klet.pleg.Healthy)
 | |
| 	if _, err := klet.updatePodCIDR(kubeCfg.PodCIDR); err != nil {
 | |
| 		klog.Errorf("Pod CIDR update failed %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// setup containerGC
 | |
| 	containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.containerGC = containerGC
 | |
| 	klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, integer.IntMax(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))
 | |
| 
 | |
| 	// setup imageManager
 | |
| 	imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to initialize image manager: %v", err)
 | |
| 	}
 | |
| 	klet.imageManager = imageManager
 | |
| 
 | |
| 	if containerRuntime == kubetypes.RemoteContainerRuntime && utilfeature.DefaultFeatureGate.Enabled(features.CRIContainerLogRotation) {
 | |
| 		// setup containerLogManager for CRI container runtime
 | |
| 		containerLogManager, err := logs.NewContainerLogManager(
 | |
| 			klet.runtimeService,
 | |
| 			kubeCfg.ContainerLogMaxSize,
 | |
| 			int(kubeCfg.ContainerLogMaxFiles),
 | |
| 		)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed to initialize container log manager: %v", err)
 | |
| 		}
 | |
| 		klet.containerLogManager = containerLogManager
 | |
| 	} else {
 | |
| 		klet.containerLogManager = logs.NewStubContainerLogManager()
 | |
| 	}
 | |
| 
 | |
| 	if kubeCfg.ServerTLSBootstrap && kubeDeps.TLSOptions != nil && utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletServerCertificate) {
 | |
| 		klet.serverCertificateManager, err = kubeletcertificate.NewKubeletServerCertificateManager(klet.kubeClient, kubeCfg, klet.nodeName, klet.getLastObservedNodeAddresses, certDirectory)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("failed to initialize certificate manager: %v", err)
 | |
| 		}
 | |
| 		kubeDeps.TLSOptions.Config.GetCertificate = func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
 | |
| 			cert := klet.serverCertificateManager.Current()
 | |
| 			if cert == nil {
 | |
| 				return nil, fmt.Errorf("no serving certificate available for the kubelet")
 | |
| 			}
 | |
| 			return cert, nil
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	klet.probeManager = prober.NewManager(
 | |
| 		klet.statusManager,
 | |
| 		klet.livenessManager,
 | |
| 		klet.startupManager,
 | |
| 		klet.runner,
 | |
| 		containerRefManager,
 | |
| 		kubeDeps.Recorder)
 | |
| 
 | |
| 	tokenManager := token.NewManager(kubeDeps.KubeClient)
 | |
| 
 | |
| 	// NewInitializedVolumePluginMgr initializes some storageErrors on the Kubelet runtimeState (in csi_plugin.go init)
 | |
| 	// which affects node ready status. This function must be called before Kubelet is initialized so that the Node
 | |
| 	// ReadyState is accurate with the storage state.
 | |
| 	klet.volumePluginMgr, err =
 | |
| 		NewInitializedVolumePluginMgr(klet, secretManager, configMapManager, tokenManager, kubeDeps.VolumePlugins, kubeDeps.DynamicPluginProber)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.pluginManager = pluginmanager.NewPluginManager(
 | |
| 		klet.getPluginsRegistrationDir(), /* sockDir */
 | |
| 		kubeDeps.Recorder,
 | |
| 	)
 | |
| 
 | |
| 	// If the experimentalMounterPathFlag is set, we do not want to
 | |
| 	// check node capabilities since the mount path is not the default
 | |
| 	if len(experimentalMounterPath) != 0 {
 | |
| 		experimentalCheckNodeCapabilitiesBeforeMount = false
 | |
| 		// Replace the nameserver in containerized-mounter's rootfs/etc/resolve.conf with kubelet.ClusterDNS
 | |
| 		// so that service name could be resolved
 | |
| 		klet.dnsConfigurer.SetupDNSinContainerizedMounter(experimentalMounterPath)
 | |
| 	}
 | |
| 
 | |
| 	// setup volumeManager
 | |
| 	klet.volumeManager = volumemanager.NewVolumeManager(
 | |
| 		kubeCfg.EnableControllerAttachDetach,
 | |
| 		nodeName,
 | |
| 		klet.podManager,
 | |
| 		klet.statusManager,
 | |
| 		klet.kubeClient,
 | |
| 		klet.volumePluginMgr,
 | |
| 		klet.containerRuntime,
 | |
| 		kubeDeps.Mounter,
 | |
| 		kubeDeps.HostUtil,
 | |
| 		klet.getPodsDir(),
 | |
| 		kubeDeps.Recorder,
 | |
| 		experimentalCheckNodeCapabilitiesBeforeMount,
 | |
| 		keepTerminatedPodVolumes,
 | |
| 		volumepathhandler.NewBlockVolumePathHandler())
 | |
| 
 | |
| 	klet.reasonCache = NewReasonCache()
 | |
| 	klet.workQueue = queue.NewBasicWorkQueue(klet.clock)
 | |
| 	klet.podWorkers = newPodWorkers(klet.syncPod, kubeDeps.Recorder, klet.workQueue, klet.resyncInterval, backOffPeriod, klet.podCache)
 | |
| 
 | |
| 	klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
 | |
| 	klet.podKillingCh = make(chan *kubecontainer.PodPair, podKillingChannelCapacity)
 | |
| 
 | |
| 	// setup eviction manager
 | |
| 	evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
 | |
| 
 | |
| 	klet.evictionManager = evictionManager
 | |
| 	klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
 | |
| 
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.Sysctls) {
 | |
| 		// Safe, whitelisted sysctls can always be used as unsafe sysctls in the spec.
 | |
| 		// Hence, we concatenate those two lists.
 | |
| 		safeAndUnsafeSysctls := append(sysctlwhitelist.SafeSysctlWhitelist(), allowedUnsafeSysctls...)
 | |
| 		sysctlsWhitelist, err := sysctl.NewWhitelist(safeAndUnsafeSysctls)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		klet.admitHandlers.AddPodAdmitHandler(sysctlsWhitelist)
 | |
| 	}
 | |
| 
 | |
| 	// enable active deadline handler
 | |
| 	activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	klet.AddPodSyncLoopHandler(activeDeadlineHandler)
 | |
| 	klet.AddPodSyncHandler(activeDeadlineHandler)
 | |
| 
 | |
| 	klet.admitHandlers.AddPodAdmitHandler(klet.containerManager.GetAllocateResourcesPodAdmitHandler())
 | |
| 
 | |
| 	criticalPodAdmissionHandler := preemption.NewCriticalPodAdmissionHandler(klet.GetActivePods, killPodNow(klet.podWorkers, kubeDeps.Recorder), kubeDeps.Recorder)
 | |
| 	klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay, criticalPodAdmissionHandler, klet.containerManager.UpdatePluginResources))
 | |
| 	// apply functional Option's
 | |
| 	for _, opt := range kubeDeps.Options {
 | |
| 		opt(klet)
 | |
| 	}
 | |
| 
 | |
| 	klet.appArmorValidator = apparmor.NewValidator(containerRuntime)
 | |
| 	klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
 | |
| 	klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewNoNewPrivsAdmitHandler(klet.containerRuntime))
 | |
| 	klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewProcMountAdmitHandler(klet.containerRuntime))
 | |
| 
 | |
| 	klet.nodeLeaseController = nodelease.NewController(klet.clock, klet.heartbeatClient, string(klet.nodeName), kubeCfg.NodeLeaseDurationSeconds, klet.onRepeatedHeartbeatFailure)
 | |
| 
 | |
| 	// Finally, put the most recent version of the config on the Kubelet, so
 | |
| 	// people can see how it was configured.
 | |
| 	klet.kubeletConfiguration = *kubeCfg
 | |
| 
 | |
| 	// Generating the status funcs should be the last thing we do,
 | |
| 	// since this relies on the rest of the Kubelet having been constructed.
 | |
| 	klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs()
 | |
| 
 | |
| 	return klet, nil
 | |
| }
 | |
| 
 | |
| type serviceLister interface {
 | |
| 	List(labels.Selector) ([]*v1.Service, error)
 | |
| }
 | |
| 
 | |
| // Kubelet is the main kubelet implementation.
 | |
| type Kubelet struct {
 | |
| 	kubeletConfiguration kubeletconfiginternal.KubeletConfiguration
 | |
| 
 | |
| 	// hostname is the hostname the kubelet detected or was given via flag/config
 | |
| 	hostname string
 | |
| 	// hostnameOverridden indicates the hostname was overridden via flag/config
 | |
| 	hostnameOverridden bool
 | |
| 
 | |
| 	nodeName        types.NodeName
 | |
| 	runtimeCache    kubecontainer.RuntimeCache
 | |
| 	kubeClient      clientset.Interface
 | |
| 	heartbeatClient clientset.Interface
 | |
| 	iptClient       utilipt.Interface
 | |
| 	rootDirectory   string
 | |
| 
 | |
| 	lastObservedNodeAddressesMux sync.RWMutex
 | |
| 	lastObservedNodeAddresses    []v1.NodeAddress
 | |
| 
 | |
| 	// onRepeatedHeartbeatFailure is called when a heartbeat operation fails more than once. optional.
 | |
| 	onRepeatedHeartbeatFailure func()
 | |
| 
 | |
| 	// podWorkers handle syncing Pods in response to events.
 | |
| 	podWorkers PodWorkers
 | |
| 
 | |
| 	// resyncInterval is the interval between periodic full reconciliations of
 | |
| 	// pods on this node.
 | |
| 	resyncInterval time.Duration
 | |
| 
 | |
| 	// sourcesReady records the sources seen by the kubelet, it is thread-safe.
 | |
| 	sourcesReady config.SourcesReady
 | |
| 
 | |
| 	// podManager is a facade that abstracts away the various sources of pods
 | |
| 	// this Kubelet services.
 | |
| 	podManager kubepod.Manager
 | |
| 
 | |
| 	// Needed to observe and respond to situations that could impact node stability
 | |
| 	evictionManager eviction.Manager
 | |
| 
 | |
| 	// Optional, defaults to /logs/ from /var/log
 | |
| 	logServer http.Handler
 | |
| 	// Optional, defaults to simple Docker implementation
 | |
| 	runner kubecontainer.ContainerCommandRunner
 | |
| 
 | |
| 	// cAdvisor used for container information.
 | |
| 	cadvisor cadvisor.Interface
 | |
| 
 | |
| 	// Set to true to have the node register itself with the apiserver.
 | |
| 	registerNode bool
 | |
| 	// List of taints to add to a node object when the kubelet registers itself.
 | |
| 	registerWithTaints []api.Taint
 | |
| 	// Set to true to have the node register itself as schedulable.
 | |
| 	registerSchedulable bool
 | |
| 	// for internal book keeping; access only from within registerWithApiserver
 | |
| 	registrationCompleted bool
 | |
| 
 | |
| 	// dnsConfigurer is used for setting up DNS resolver configuration when launching pods.
 | |
| 	dnsConfigurer *dns.Configurer
 | |
| 
 | |
| 	// masterServiceNamespace is the namespace that the master service is exposed in.
 | |
| 	masterServiceNamespace string
 | |
| 	// serviceLister knows how to list services
 | |
| 	serviceLister serviceLister
 | |
| 	// nodeLister knows how to list nodes
 | |
| 	nodeLister corelisters.NodeLister
 | |
| 
 | |
| 	// a list of node labels to register
 | |
| 	nodeLabels map[string]string
 | |
| 
 | |
| 	// Last timestamp when runtime responded on ping.
 | |
| 	// Mutex is used to protect this value.
 | |
| 	runtimeState *runtimeState
 | |
| 
 | |
| 	// Volume plugins.
 | |
| 	volumePluginMgr *volume.VolumePluginMgr
 | |
| 
 | |
| 	// Handles container probing.
 | |
| 	probeManager prober.Manager
 | |
| 	// Manages container health check results.
 | |
| 	livenessManager proberesults.Manager
 | |
| 	startupManager  proberesults.Manager
 | |
| 
 | |
| 	// How long to keep idle streaming command execution/port forwarding
 | |
| 	// connections open before terminating them
 | |
| 	streamingConnectionIdleTimeout time.Duration
 | |
| 
 | |
| 	// The EventRecorder to use
 | |
| 	recorder record.EventRecorder
 | |
| 
 | |
| 	// Policy for handling garbage collection of dead containers.
 | |
| 	containerGC kubecontainer.ContainerGC
 | |
| 
 | |
| 	// Manager for image garbage collection.
 | |
| 	imageManager images.ImageGCManager
 | |
| 
 | |
| 	// Manager for container logs.
 | |
| 	containerLogManager logs.ContainerLogManager
 | |
| 
 | |
| 	// Secret manager.
 | |
| 	secretManager secret.Manager
 | |
| 
 | |
| 	// ConfigMap manager.
 | |
| 	configMapManager configmap.Manager
 | |
| 
 | |
| 	// Cached MachineInfo returned by cadvisor.
 | |
| 	machineInfo *cadvisorapi.MachineInfo
 | |
| 
 | |
| 	// Handles certificate rotations.
 | |
| 	serverCertificateManager certificate.Manager
 | |
| 
 | |
| 	// Syncs pods statuses with apiserver; also used as a cache of statuses.
 | |
| 	statusManager status.Manager
 | |
| 
 | |
| 	// VolumeManager runs a set of asynchronous loops that figure out which
 | |
| 	// volumes need to be attached/mounted/unmounted/detached based on the pods
 | |
| 	// scheduled on this node and makes it so.
 | |
| 	volumeManager volumemanager.VolumeManager
 | |
| 
 | |
| 	// Cloud provider interface.
 | |
| 	cloud cloudprovider.Interface
 | |
| 	// Handles requests to cloud provider with timeout
 | |
| 	cloudResourceSyncManager cloudresource.SyncManager
 | |
| 
 | |
| 	// Indicates that the node initialization happens in an external cloud controller
 | |
| 	externalCloudProvider bool
 | |
| 	// Reference to this node.
 | |
| 	nodeRef *v1.ObjectReference
 | |
| 
 | |
| 	// The name of the container runtime
 | |
| 	containerRuntimeName string
 | |
| 
 | |
| 	// redirectContainerStreaming enables container streaming redirect.
 | |
| 	redirectContainerStreaming bool
 | |
| 
 | |
| 	// Container runtime.
 | |
| 	containerRuntime kubecontainer.Runtime
 | |
| 
 | |
| 	// Streaming runtime handles container streaming.
 | |
| 	streamingRuntime kubecontainer.StreamingRuntime
 | |
| 
 | |
| 	// Container runtime service (needed by container runtime Start()).
 | |
| 	// TODO(CD): try to make this available without holding a reference in this
 | |
| 	//           struct. For example, by adding a getter to generic runtime.
 | |
| 	runtimeService internalapi.RuntimeService
 | |
| 
 | |
| 	// reasonCache caches the failure reason of the last creation of all containers, which is
 | |
| 	// used for generating ContainerStatus.
 | |
| 	reasonCache *ReasonCache
 | |
| 
 | |
| 	// nodeStatusUpdateFrequency specifies how often kubelet computes node status. If node lease
 | |
| 	// feature is not enabled, it is also the frequency that kubelet posts node status to master.
 | |
| 	// In that case, be cautious when changing the constant, it must work with nodeMonitorGracePeriod
 | |
| 	// in nodecontroller. There are several constraints:
 | |
| 	// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
 | |
| 	//    N means number of retries allowed for kubelet to post node status. It is pointless
 | |
| 	//    to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
 | |
| 	//    will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
 | |
| 	//    The constant must be less than podEvictionTimeout.
 | |
| 	// 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
 | |
| 	//    status. Kubelet may fail to update node status reliably if the value is too small,
 | |
| 	//    as it takes time to gather all necessary node information.
 | |
| 	nodeStatusUpdateFrequency time.Duration
 | |
| 
 | |
| 	// nodeStatusReportFrequency is the frequency that kubelet posts node
 | |
| 	// status to master. It is only used when node lease feature is enabled.
 | |
| 	nodeStatusReportFrequency time.Duration
 | |
| 
 | |
| 	// lastStatusReportTime is the time when node status was last reported.
 | |
| 	lastStatusReportTime time.Time
 | |
| 
 | |
| 	// syncNodeStatusMux is a lock on updating the node status, because this path is not thread-safe.
 | |
| 	// This lock is used by Kubelet.syncNodeStatus function and shouldn't be used anywhere else.
 | |
| 	syncNodeStatusMux sync.Mutex
 | |
| 
 | |
| 	// updatePodCIDRMux is a lock on updating pod CIDR, because this path is not thread-safe.
 | |
| 	// This lock is used by Kubelet.syncNodeStatus function and shouldn't be used anywhere else.
 | |
| 	updatePodCIDRMux sync.Mutex
 | |
| 
 | |
| 	// updateRuntimeMux is a lock on updating runtime, because this path is not thread-safe.
 | |
| 	// This lock is used by Kubelet.updateRuntimeUp function and shouldn't be used anywhere else.
 | |
| 	updateRuntimeMux sync.Mutex
 | |
| 
 | |
| 	// nodeLeaseController claims and renews the node lease for this Kubelet
 | |
| 	nodeLeaseController nodelease.Controller
 | |
| 
 | |
| 	// Generates pod events.
 | |
| 	pleg pleg.PodLifecycleEventGenerator
 | |
| 
 | |
| 	// Store kubecontainer.PodStatus for all pods.
 | |
| 	podCache kubecontainer.Cache
 | |
| 
 | |
| 	// os is a facade for various syscalls that need to be mocked during testing.
 | |
| 	os kubecontainer.OSInterface
 | |
| 
 | |
| 	// Watcher of out of memory events.
 | |
| 	oomWatcher oomwatcher.Watcher
 | |
| 
 | |
| 	// Monitor resource usage
 | |
| 	resourceAnalyzer serverstats.ResourceAnalyzer
 | |
| 
 | |
| 	// Whether or not we should have the QOS cgroup hierarchy for resource management
 | |
| 	cgroupsPerQOS bool
 | |
| 
 | |
| 	// If non-empty, pass this to the container runtime as the root cgroup.
 | |
| 	cgroupRoot string
 | |
| 
 | |
| 	// Mounter to use for volumes.
 | |
| 	mounter mount.Interface
 | |
| 
 | |
| 	// hostutil to interact with filesystems
 | |
| 	hostutil hostutil.HostUtils
 | |
| 
 | |
| 	// subpather to execute subpath actions
 | |
| 	subpather subpath.Interface
 | |
| 
 | |
| 	// Manager of non-Runtime containers.
 | |
| 	containerManager cm.ContainerManager
 | |
| 
 | |
| 	// Maximum Number of Pods which can be run by this Kubelet
 | |
| 	maxPods int
 | |
| 
 | |
| 	// Monitor Kubelet's sync loop
 | |
| 	syncLoopMonitor atomic.Value
 | |
| 
 | |
| 	// Container restart Backoff
 | |
| 	backOff *flowcontrol.Backoff
 | |
| 
 | |
| 	// Channel for sending pods to kill.
 | |
| 	podKillingCh chan *kubecontainer.PodPair
 | |
| 
 | |
| 	// Information about the ports which are opened by daemons on Node running this Kubelet server.
 | |
| 	daemonEndpoints *v1.NodeDaemonEndpoints
 | |
| 
 | |
| 	// A queue used to trigger pod workers.
 | |
| 	workQueue queue.WorkQueue
 | |
| 
 | |
| 	// oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up.
 | |
| 	oneTimeInitializer sync.Once
 | |
| 
 | |
| 	// If non-nil, use this IP address for the node
 | |
| 	nodeIP net.IP
 | |
| 
 | |
| 	// use this function to validate the kubelet nodeIP
 | |
| 	nodeIPValidator func(net.IP) error
 | |
| 
 | |
| 	// If non-nil, this is a unique identifier for the node in an external database, eg. cloudprovider
 | |
| 	providerID string
 | |
| 
 | |
| 	// clock is an interface that provides time related functionality in a way that makes it
 | |
| 	// easy to test the code.
 | |
| 	clock clock.Clock
 | |
| 
 | |
| 	// handlers called during the tryUpdateNodeStatus cycle
 | |
| 	setNodeStatusFuncs []func(*v1.Node) error
 | |
| 
 | |
| 	lastNodeUnschedulableLock sync.Mutex
 | |
| 	// maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
 | |
| 	lastNodeUnschedulable bool
 | |
| 
 | |
| 	// TODO: think about moving this to be centralized in PodWorkers in follow-on.
 | |
| 	// the list of handlers to call during pod admission.
 | |
| 	admitHandlers lifecycle.PodAdmitHandlers
 | |
| 
 | |
| 	// softAdmithandlers are applied to the pod after it is admitted by the Kubelet, but before it is
 | |
| 	// run. A pod rejected by a softAdmitHandler will be left in a Pending state indefinitely. If a
 | |
| 	// rejected pod should not be recreated, or the scheduler is not aware of the rejection rule, the
 | |
| 	// admission rule should be applied by a softAdmitHandler.
 | |
| 	softAdmitHandlers lifecycle.PodAdmitHandlers
 | |
| 
 | |
| 	// the list of handlers to call during pod sync loop.
 | |
| 	lifecycle.PodSyncLoopHandlers
 | |
| 
 | |
| 	// the list of handlers to call during pod sync.
 | |
| 	lifecycle.PodSyncHandlers
 | |
| 
 | |
| 	// the number of allowed pods per core
 | |
| 	podsPerCore int
 | |
| 
 | |
| 	// enableControllerAttachDetach indicates the Attach/Detach controller
 | |
| 	// should manage attachment/detachment of volumes scheduled to this node,
 | |
| 	// and disable kubelet from executing any attach/detach operations
 | |
| 	enableControllerAttachDetach bool
 | |
| 
 | |
| 	// trigger deleting containers in a pod
 | |
| 	containerDeletor *podContainerDeletor
 | |
| 
 | |
| 	// config iptables util rules
 | |
| 	makeIPTablesUtilChains bool
 | |
| 
 | |
| 	// The bit of the fwmark space to mark packets for SNAT.
 | |
| 	iptablesMasqueradeBit int
 | |
| 
 | |
| 	// The bit of the fwmark space to mark packets for dropping.
 | |
| 	iptablesDropBit int
 | |
| 
 | |
| 	// The AppArmor validator for checking whether AppArmor is supported.
 | |
| 	appArmorValidator apparmor.Validator
 | |
| 
 | |
| 	// The handler serving CRI streaming calls (exec/attach/port-forward).
 | |
| 	criHandler http.Handler
 | |
| 
 | |
| 	// experimentalHostUserNamespaceDefaulting sets userns=true when users request host namespaces (pid, ipc, net),
 | |
| 	// are using non-namespaced capabilities (mknod, sys_time, sys_module), the pod contains a privileged container,
 | |
| 	// or using host path volumes.
 | |
| 	// This should only be enabled when the container runtime is performing user remapping AND if the
 | |
| 	// experimental behavior is desired.
 | |
| 	experimentalHostUserNamespaceDefaulting bool
 | |
| 
 | |
| 	// dockerLegacyService contains some legacy methods for backward compatibility.
 | |
| 	// It should be set only when docker is using non json-file logging driver.
 | |
| 	dockerLegacyService dockershim.DockerLegacyService
 | |
| 
 | |
| 	// StatsProvider provides the node and the container stats.
 | |
| 	*stats.StatsProvider
 | |
| 
 | |
| 	// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
 | |
| 	// This can be useful for debugging volume related issues.
 | |
| 	keepTerminatedPodVolumes bool // DEPRECATED
 | |
| 
 | |
| 	// pluginmanager runs a set of asynchronous loops that figure out which
 | |
| 	// plugins need to be registered/unregistered based on this node and makes it so.
 | |
| 	pluginManager pluginmanager.PluginManager
 | |
| 
 | |
| 	// This flag sets a maximum number of images to report in the node status.
 | |
| 	nodeStatusMaxImages int32
 | |
| 
 | |
| 	// Handles RuntimeClass objects for the Kubelet.
 | |
| 	runtimeClassManager *runtimeclass.Manager
 | |
| }
 | |
| 
 | |
| // setupDataDirs creates:
 | |
| // 1.  the root directory
 | |
| // 2.  the pods directory
 | |
| // 3.  the plugins directory
 | |
| // 4.  the pod-resources directory
 | |
| func (kl *Kubelet) setupDataDirs() error {
 | |
| 	kl.rootDirectory = path.Clean(kl.rootDirectory)
 | |
| 	pluginRegistrationDir := kl.getPluginsRegistrationDir()
 | |
| 	pluginsDir := kl.getPluginsDir()
 | |
| 	if err := os.MkdirAll(kl.getRootDir(), 0750); err != nil {
 | |
| 		return fmt.Errorf("error creating root directory: %v", err)
 | |
| 	}
 | |
| 	if err := kl.hostutil.MakeRShared(kl.getRootDir()); err != nil {
 | |
| 		return fmt.Errorf("error configuring root directory: %v", err)
 | |
| 	}
 | |
| 	if err := os.MkdirAll(kl.getPodsDir(), 0750); err != nil {
 | |
| 		return fmt.Errorf("error creating pods directory: %v", err)
 | |
| 	}
 | |
| 	if err := os.MkdirAll(kl.getPluginsDir(), 0750); err != nil {
 | |
| 		return fmt.Errorf("error creating plugins directory: %v", err)
 | |
| 	}
 | |
| 	if err := os.MkdirAll(kl.getPluginsRegistrationDir(), 0750); err != nil {
 | |
| 		return fmt.Errorf("error creating plugins registry directory: %v", err)
 | |
| 	}
 | |
| 	if err := os.MkdirAll(kl.getPodResourcesDir(), 0750); err != nil {
 | |
| 		return fmt.Errorf("error creating podresources directory: %v", err)
 | |
| 	}
 | |
| 	if selinux.SELinuxEnabled() {
 | |
| 		err := selinux.SetFileLabel(pluginRegistrationDir, config.KubeletPluginsDirSELinuxLabel)
 | |
| 		if err != nil {
 | |
| 			klog.Warningf("Unprivileged containerized plugins might not work. Could not set selinux context on %s: %v", pluginRegistrationDir, err)
 | |
| 		}
 | |
| 		err = selinux.SetFileLabel(pluginsDir, config.KubeletPluginsDirSELinuxLabel)
 | |
| 		if err != nil {
 | |
| 			klog.Warningf("Unprivileged containerized plugins might not work. Could not set selinux context on %s: %v", pluginsDir, err)
 | |
| 		}
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // StartGarbageCollection starts garbage collection threads.
 | |
| func (kl *Kubelet) StartGarbageCollection() {
 | |
| 	loggedContainerGCFailure := false
 | |
| 	go wait.Until(func() {
 | |
| 		if err := kl.containerGC.GarbageCollect(); err != nil {
 | |
| 			klog.Errorf("Container garbage collection failed: %v", err)
 | |
| 			kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
 | |
| 			loggedContainerGCFailure = true
 | |
| 		} else {
 | |
| 			var vLevel klog.Level = 4
 | |
| 			if loggedContainerGCFailure {
 | |
| 				vLevel = 1
 | |
| 				loggedContainerGCFailure = false
 | |
| 			}
 | |
| 
 | |
| 			klog.V(vLevel).Infof("Container garbage collection succeeded")
 | |
| 		}
 | |
| 	}, ContainerGCPeriod, wait.NeverStop)
 | |
| 
 | |
| 	// when the high threshold is set to 100, stub the image GC manager
 | |
| 	if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
 | |
| 		klog.V(2).Infof("ImageGCHighThresholdPercent is set 100, Disable image GC")
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	prevImageGCFailed := false
 | |
| 	go wait.Until(func() {
 | |
| 		if err := kl.imageManager.GarbageCollect(); err != nil {
 | |
| 			if prevImageGCFailed {
 | |
| 				klog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
 | |
| 				// Only create an event for repeated failures
 | |
| 				kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
 | |
| 			} else {
 | |
| 				klog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
 | |
| 			}
 | |
| 			prevImageGCFailed = true
 | |
| 		} else {
 | |
| 			var vLevel klog.Level = 4
 | |
| 			if prevImageGCFailed {
 | |
| 				vLevel = 1
 | |
| 				prevImageGCFailed = false
 | |
| 			}
 | |
| 
 | |
| 			klog.V(vLevel).Infof("Image garbage collection succeeded")
 | |
| 		}
 | |
| 	}, ImageGCPeriod, wait.NeverStop)
 | |
| }
 | |
| 
 | |
| // initializeModules will initialize internal modules that do not require the container runtime to be up.
 | |
| // Note that the modules here must not depend on modules that are not initialized here.
 | |
| func (kl *Kubelet) initializeModules() error {
 | |
| 	// Prometheus metrics.
 | |
| 	metrics.Register(
 | |
| 		kl.runtimeCache,
 | |
| 		collectors.NewVolumeStatsCollector(kl),
 | |
| 		collectors.NewLogMetricsCollector(kl.StatsProvider.ListPodStats),
 | |
| 	)
 | |
| 	metrics.SetNodeName(kl.nodeName)
 | |
| 	servermetrics.Register()
 | |
| 
 | |
| 	// Setup filesystem directories.
 | |
| 	if err := kl.setupDataDirs(); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// If the container logs directory does not exist, create it.
 | |
| 	if _, err := os.Stat(ContainerLogsDir); err != nil {
 | |
| 		if err := kl.os.MkdirAll(ContainerLogsDir, 0755); err != nil {
 | |
| 			klog.Errorf("Failed to create directory %q: %v", ContainerLogsDir, err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Start the image manager.
 | |
| 	kl.imageManager.Start()
 | |
| 
 | |
| 	// Start the certificate manager if it was enabled.
 | |
| 	if kl.serverCertificateManager != nil {
 | |
| 		kl.serverCertificateManager.Start()
 | |
| 	}
 | |
| 
 | |
| 	// Start out of memory watcher.
 | |
| 	if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
 | |
| 		return fmt.Errorf("failed to start OOM watcher %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// Start resource analyzer
 | |
| 	kl.resourceAnalyzer.Start()
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // initializeRuntimeDependentModules will initialize internal modules that require the container runtime to be up.
 | |
| func (kl *Kubelet) initializeRuntimeDependentModules() {
 | |
| 	if err := kl.cadvisor.Start(); err != nil {
 | |
| 		// Fail kubelet and rely on the babysitter to retry starting kubelet.
 | |
| 		// TODO(random-liu): Add backoff logic in the babysitter
 | |
| 		klog.Fatalf("Failed to start cAdvisor %v", err)
 | |
| 	}
 | |
| 
 | |
| 	// trigger on-demand stats collection once so that we have capacity information for ephemeral storage.
 | |
| 	// ignore any errors, since if stats collection is not successful, the container manager will fail to start below.
 | |
| 	kl.StatsProvider.GetCgroupStats("/", true)
 | |
| 	// Start container manager.
 | |
| 	node, err := kl.getNodeAnyWay()
 | |
| 	if err != nil {
 | |
| 		// Fail kubelet and rely on the babysitter to retry starting kubelet.
 | |
| 		klog.Fatalf("Kubelet failed to get node info: %v", err)
 | |
| 	}
 | |
| 	// containerManager must start after cAdvisor because it needs filesystem capacity information
 | |
| 	if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
 | |
| 		// Fail kubelet and rely on the babysitter to retry starting kubelet.
 | |
| 		klog.Fatalf("Failed to start ContainerManager %v", err)
 | |
| 	}
 | |
| 	// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
 | |
| 	kl.evictionManager.Start(kl.StatsProvider, kl.GetActivePods, kl.podResourcesAreReclaimed, evictionMonitoringPeriod)
 | |
| 
 | |
| 	// container log manager must start after container runtime is up to retrieve information from container runtime
 | |
| 	// and inform container to reopen log file after log rotation.
 | |
| 	kl.containerLogManager.Start()
 | |
| 	// Adding Registration Callback function for CSI Driver
 | |
| 	kl.pluginManager.AddHandler(pluginwatcherapi.CSIPlugin, plugincache.PluginHandler(csi.PluginHandler))
 | |
| 	// Adding Registration Callback function for Device Manager
 | |
| 	kl.pluginManager.AddHandler(pluginwatcherapi.DevicePlugin, kl.containerManager.GetPluginRegistrationHandler())
 | |
| 	// Start the plugin manager
 | |
| 	klog.V(4).Infof("starting plugin manager")
 | |
| 	go kl.pluginManager.Run(kl.sourcesReady, wait.NeverStop)
 | |
| }
 | |
| 
 | |
| // Run starts the kubelet reacting to config updates
 | |
| func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 | |
| 	if kl.logServer == nil {
 | |
| 		kl.logServer = http.StripPrefix("/logs/", http.FileServer(http.Dir("/var/log/")))
 | |
| 	}
 | |
| 	if kl.kubeClient == nil {
 | |
| 		klog.Warning("No api server defined - no node status update will be sent.")
 | |
| 	}
 | |
| 
 | |
| 	// Start the cloud provider sync manager
 | |
| 	if kl.cloudResourceSyncManager != nil {
 | |
| 		go kl.cloudResourceSyncManager.Run(wait.NeverStop)
 | |
| 	}
 | |
| 
 | |
| 	if err := kl.initializeModules(); err != nil {
 | |
| 		kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.KubeletSetupFailed, err.Error())
 | |
| 		klog.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	// Start volume manager
 | |
| 	go kl.volumeManager.Run(kl.sourcesReady, wait.NeverStop)
 | |
| 
 | |
| 	if kl.kubeClient != nil {
 | |
| 		// Start syncing node status immediately, this may set up things the runtime needs to run.
 | |
| 		go wait.Until(kl.syncNodeStatus, kl.nodeStatusUpdateFrequency, wait.NeverStop)
 | |
| 		go kl.fastStatusUpdateOnce()
 | |
| 
 | |
| 		// start syncing lease
 | |
| 		go kl.nodeLeaseController.Run(wait.NeverStop)
 | |
| 	}
 | |
| 	go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop)
 | |
| 
 | |
| 	// Set up iptables util rules
 | |
| 	if kl.makeIPTablesUtilChains {
 | |
| 		kl.initNetworkUtil()
 | |
| 	}
 | |
| 
 | |
| 	// Start a goroutine responsible for killing pods (that are not properly
 | |
| 	// handled by pod workers).
 | |
| 	go wait.Until(kl.podKiller, 1*time.Second, wait.NeverStop)
 | |
| 
 | |
| 	// Start component sync loops.
 | |
| 	kl.statusManager.Start()
 | |
| 	kl.probeManager.Start()
 | |
| 
 | |
| 	// Start syncing RuntimeClasses if enabled.
 | |
| 	if kl.runtimeClassManager != nil {
 | |
| 		kl.runtimeClassManager.Start(wait.NeverStop)
 | |
| 	}
 | |
| 
 | |
| 	// Start the pod lifecycle event generator.
 | |
| 	kl.pleg.Start()
 | |
| 	kl.syncLoop(updates, kl)
 | |
| }
 | |
| 
 | |
| // syncPod is the transaction script for the sync of a single pod.
 | |
| //
 | |
| // Arguments:
 | |
| //
 | |
| // o - the SyncPodOptions for this invocation
 | |
| //
 | |
| // The workflow is:
 | |
| // * If the pod is being created, record pod worker start latency
 | |
| // * Call generateAPIPodStatus to prepare an v1.PodStatus for the pod
 | |
| // * If the pod is being seen as running for the first time, record pod
 | |
| //   start latency
 | |
| // * Update the status of the pod in the status manager
 | |
| // * Kill the pod if it should not be running
 | |
| // * Create a mirror pod if the pod is a static pod, and does not
 | |
| //   already have a mirror pod
 | |
| // * Create the data directories for the pod if they do not exist
 | |
| // * Wait for volumes to attach/mount
 | |
| // * Fetch the pull secrets for the pod
 | |
| // * Call the container runtime's SyncPod callback
 | |
| // * Update the traffic shaping for the pod's ingress and egress limits
 | |
| //
 | |
| // If any step of this workflow errors, the error is returned, and is repeated
 | |
| // on the next syncPod call.
 | |
| //
 | |
| // This operation writes all events that are dispatched in order to provide
 | |
| // the most accurate information possible about an error situation to aid debugging.
 | |
| // Callers should not throw an event if this operation returns an error.
 | |
| func (kl *Kubelet) syncPod(o syncPodOptions) error {
 | |
| 	// pull out the required options
 | |
| 	pod := o.pod
 | |
| 	mirrorPod := o.mirrorPod
 | |
| 	podStatus := o.podStatus
 | |
| 	updateType := o.updateType
 | |
| 
 | |
| 	// if we want to kill a pod, do it now!
 | |
| 	if updateType == kubetypes.SyncPodKill {
 | |
| 		killPodOptions := o.killPodOptions
 | |
| 		if killPodOptions == nil || killPodOptions.PodStatusFunc == nil {
 | |
| 			return fmt.Errorf("kill pod options are required if update type is kill")
 | |
| 		}
 | |
| 		apiPodStatus := killPodOptions.PodStatusFunc(pod, podStatus)
 | |
| 		kl.statusManager.SetPodStatus(pod, apiPodStatus)
 | |
| 		// we kill the pod with the specified grace period since this is a termination
 | |
| 		if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil {
 | |
| 			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
 | |
| 			// there was an error killing the pod, so we return that error directly
 | |
| 			utilruntime.HandleError(err)
 | |
| 			return err
 | |
| 		}
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	// Latency measurements for the main workflow are relative to the
 | |
| 	// first time the pod was seen by the API server.
 | |
| 	var firstSeenTime time.Time
 | |
| 	if firstSeenTimeStr, ok := pod.Annotations[kubetypes.ConfigFirstSeenAnnotationKey]; ok {
 | |
| 		firstSeenTime = kubetypes.ConvertToTimestamp(firstSeenTimeStr).Get()
 | |
| 	}
 | |
| 
 | |
| 	// Record pod worker start latency if being created
 | |
| 	// TODO: make pod workers record their own latencies
 | |
| 	if updateType == kubetypes.SyncPodCreate {
 | |
| 		if !firstSeenTime.IsZero() {
 | |
| 			// This is the first time we are syncing the pod. Record the latency
 | |
| 			// since kubelet first saw the pod if firstSeenTime is set.
 | |
| 			metrics.PodWorkerStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
 | |
| 		} else {
 | |
| 			klog.V(3).Infof("First seen time not recorded for pod %q", pod.UID)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Generate final API pod status with pod and status manager status
 | |
| 	apiPodStatus := kl.generateAPIPodStatus(pod, podStatus)
 | |
| 	// The pod IP may be changed in generateAPIPodStatus if the pod is using host network. (See #24576)
 | |
| 	// TODO(random-liu): After writing pod spec into container labels, check whether pod is using host network, and
 | |
| 	// set pod IP to hostIP directly in runtime.GetPodStatus
 | |
| 	podStatus.IPs = make([]string, 0, len(apiPodStatus.PodIPs))
 | |
| 	for _, ipInfo := range apiPodStatus.PodIPs {
 | |
| 		podStatus.IPs = append(podStatus.IPs, ipInfo.IP)
 | |
| 	}
 | |
| 
 | |
| 	if len(podStatus.IPs) == 0 && len(apiPodStatus.PodIP) > 0 {
 | |
| 		podStatus.IPs = []string{apiPodStatus.PodIP}
 | |
| 	}
 | |
| 
 | |
| 	// Record the time it takes for the pod to become running.
 | |
| 	existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
 | |
| 	if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
 | |
| 		!firstSeenTime.IsZero() {
 | |
| 		metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
 | |
| 	}
 | |
| 
 | |
| 	runnable := kl.canRunPod(pod)
 | |
| 	if !runnable.Admit {
 | |
| 		// Pod is not runnable; update the Pod and Container statuses to why.
 | |
| 		apiPodStatus.Reason = runnable.Reason
 | |
| 		apiPodStatus.Message = runnable.Message
 | |
| 		// Waiting containers are not creating.
 | |
| 		const waitingReason = "Blocked"
 | |
| 		for _, cs := range apiPodStatus.InitContainerStatuses {
 | |
| 			if cs.State.Waiting != nil {
 | |
| 				cs.State.Waiting.Reason = waitingReason
 | |
| 			}
 | |
| 		}
 | |
| 		for _, cs := range apiPodStatus.ContainerStatuses {
 | |
| 			if cs.State.Waiting != nil {
 | |
| 				cs.State.Waiting.Reason = waitingReason
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Update status in the status manager
 | |
| 	kl.statusManager.SetPodStatus(pod, apiPodStatus)
 | |
| 
 | |
| 	// Kill pod if it should not be running
 | |
| 	if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed {
 | |
| 		var syncErr error
 | |
| 		if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
 | |
| 			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
 | |
| 			syncErr = fmt.Errorf("error killing pod: %v", err)
 | |
| 			utilruntime.HandleError(syncErr)
 | |
| 		} else {
 | |
| 			if !runnable.Admit {
 | |
| 				// There was no error killing the pod, but the pod cannot be run.
 | |
| 				// Return an error to signal that the sync loop should back off.
 | |
| 				syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
 | |
| 			}
 | |
| 		}
 | |
| 		return syncErr
 | |
| 	}
 | |
| 
 | |
| 	// If the network plugin is not ready, only start the pod if it uses the host network
 | |
| 	if err := kl.runtimeState.networkErrors(); err != nil && !kubecontainer.IsHostNetworkPod(pod) {
 | |
| 		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "%s: %v", NetworkNotReadyErrorMsg, err)
 | |
| 		return fmt.Errorf("%s: %v", NetworkNotReadyErrorMsg, err)
 | |
| 	}
 | |
| 
 | |
| 	// Create Cgroups for the pod and apply resource parameters
 | |
| 	// to them if cgroups-per-qos flag is enabled.
 | |
| 	pcm := kl.containerManager.NewPodContainerManager()
 | |
| 	// If pod has already been terminated then we need not create
 | |
| 	// or update the pod's cgroup
 | |
| 	if !kl.podIsTerminated(pod) {
 | |
| 		// When the kubelet is restarted with the cgroups-per-qos
 | |
| 		// flag enabled, all the pod's running containers
 | |
| 		// should be killed intermittently and brought back up
 | |
| 		// under the qos cgroup hierarchy.
 | |
| 		// Check if this is the pod's first sync
 | |
| 		firstSync := true
 | |
| 		for _, containerStatus := range apiPodStatus.ContainerStatuses {
 | |
| 			if containerStatus.State.Running != nil {
 | |
| 				firstSync = false
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		// Don't kill containers in pod if pod's cgroups already
 | |
| 		// exists or the pod is running for the first time
 | |
| 		podKilled := false
 | |
| 		if !pcm.Exists(pod) && !firstSync {
 | |
| 			if err := kl.killPod(pod, nil, podStatus, nil); err == nil {
 | |
| 				podKilled = true
 | |
| 			}
 | |
| 		}
 | |
| 		// Create and Update pod's Cgroups
 | |
| 		// Don't create cgroups for run once pod if it was killed above
 | |
| 		// The current policy is not to restart the run once pods when
 | |
| 		// the kubelet is restarted with the new flag as run once pods are
 | |
| 		// expected to run only once and if the kubelet is restarted then
 | |
| 		// they are not expected to run again.
 | |
| 		// We don't create and apply updates to cgroup if its a run once pod and was killed above
 | |
| 		if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) {
 | |
| 			if !pcm.Exists(pod) {
 | |
| 				if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
 | |
| 					klog.V(2).Infof("Failed to update QoS cgroups while syncing pod: %v", err)
 | |
| 				}
 | |
| 				if err := pcm.EnsureExists(pod); err != nil {
 | |
| 					kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToCreatePodContainer, "unable to ensure pod container exists: %v", err)
 | |
| 					return fmt.Errorf("failed to ensure that the pod: %v cgroups exist and are correctly applied: %v", pod.UID, err)
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Create Mirror Pod for Static Pod if it doesn't already exist
 | |
| 	if kubetypes.IsStaticPod(pod) {
 | |
| 		podFullName := kubecontainer.GetPodFullName(pod)
 | |
| 		deleted := false
 | |
| 		if mirrorPod != nil {
 | |
| 			if mirrorPod.DeletionTimestamp != nil || !kl.podManager.IsMirrorPodOf(mirrorPod, pod) {
 | |
| 				// The mirror pod is semantically different from the static pod. Remove
 | |
| 				// it. The mirror pod will get recreated later.
 | |
| 				klog.Infof("Trying to delete pod %s %v", podFullName, mirrorPod.ObjectMeta.UID)
 | |
| 				var err error
 | |
| 				deleted, err = kl.podManager.DeleteMirrorPod(podFullName, &mirrorPod.ObjectMeta.UID)
 | |
| 				if deleted {
 | |
| 					klog.Warningf("Deleted mirror pod %q because it is outdated", format.Pod(mirrorPod))
 | |
| 				} else if err != nil {
 | |
| 					klog.Errorf("Failed deleting mirror pod %q: %v", format.Pod(mirrorPod), err)
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		if mirrorPod == nil || deleted {
 | |
| 			node, err := kl.GetNode()
 | |
| 			if err != nil || node.DeletionTimestamp != nil {
 | |
| 				klog.V(4).Infof("No need to create a mirror pod, since node %q has been removed from the cluster", kl.nodeName)
 | |
| 			} else {
 | |
| 				klog.V(4).Infof("Creating a mirror pod for static pod %q", format.Pod(pod))
 | |
| 				if err := kl.podManager.CreateMirrorPod(pod); err != nil {
 | |
| 					klog.Errorf("Failed creating a mirror pod for %q: %v", format.Pod(pod), err)
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Make data directories for the pod
 | |
| 	if err := kl.makePodDataDirs(pod); err != nil {
 | |
| 		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToMakePodDataDirectories, "error making pod data directories: %v", err)
 | |
| 		klog.Errorf("Unable to make pod data directories for pod %q: %v", format.Pod(pod), err)
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// Volume manager will not mount volumes for terminated pods
 | |
| 	if !kl.podIsTerminated(pod) {
 | |
| 		// Wait for volumes to attach/mount
 | |
| 		if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
 | |
| 			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err)
 | |
| 			klog.Errorf("Unable to attach or mount volumes for pod %q: %v; skipping pod", format.Pod(pod), err)
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Fetch the pull secrets for the pod
 | |
| 	pullSecrets := kl.getPullSecretsForPod(pod)
 | |
| 
 | |
| 	// Call the container runtime's SyncPod callback
 | |
| 	result := kl.containerRuntime.SyncPod(pod, podStatus, pullSecrets, kl.backOff)
 | |
| 	kl.reasonCache.Update(pod.UID, result)
 | |
| 	if err := result.Error(); err != nil {
 | |
| 		// Do not return error if the only failures were pods in backoff
 | |
| 		for _, r := range result.SyncResults {
 | |
| 			if r.Error != kubecontainer.ErrCrashLoopBackOff && r.Error != images.ErrImagePullBackOff {
 | |
| 				// Do not record an event here, as we keep all event logging for sync pod failures
 | |
| 				// local to container runtime so we get better errors
 | |
| 				return err
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Get pods which should be resynchronized. Currently, the following pod should be resynchronized:
 | |
| //   * pod whose work is ready.
 | |
| //   * internal modules that request sync of a pod.
 | |
| func (kl *Kubelet) getPodsToSync() []*v1.Pod {
 | |
| 	allPods := kl.podManager.GetPods()
 | |
| 	podUIDs := kl.workQueue.GetWork()
 | |
| 	podUIDSet := sets.NewString()
 | |
| 	for _, podUID := range podUIDs {
 | |
| 		podUIDSet.Insert(string(podUID))
 | |
| 	}
 | |
| 	var podsToSync []*v1.Pod
 | |
| 	for _, pod := range allPods {
 | |
| 		if podUIDSet.Has(string(pod.UID)) {
 | |
| 			// The work of the pod is ready
 | |
| 			podsToSync = append(podsToSync, pod)
 | |
| 			continue
 | |
| 		}
 | |
| 		for _, podSyncLoopHandler := range kl.PodSyncLoopHandlers {
 | |
| 			if podSyncLoopHandler.ShouldSync(pod) {
 | |
| 				podsToSync = append(podsToSync, pod)
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return podsToSync
 | |
| }
 | |
| 
 | |
| // deletePod deletes the pod from the internal state of the kubelet by:
 | |
| // 1.  stopping the associated pod worker asynchronously
 | |
| // 2.  signaling to kill the pod by sending on the podKillingCh channel
 | |
| //
 | |
| // deletePod returns an error if not all sources are ready or the pod is not
 | |
| // found in the runtime cache.
 | |
| func (kl *Kubelet) deletePod(pod *v1.Pod) error {
 | |
| 	if pod == nil {
 | |
| 		return fmt.Errorf("deletePod does not allow nil pod")
 | |
| 	}
 | |
| 	if !kl.sourcesReady.AllReady() {
 | |
| 		// If the sources aren't ready, skip deletion, as we may accidentally delete pods
 | |
| 		// for sources that haven't reported yet.
 | |
| 		return fmt.Errorf("skipping delete because sources aren't ready yet")
 | |
| 	}
 | |
| 	kl.podWorkers.ForgetWorker(pod.UID)
 | |
| 
 | |
| 	// Runtime cache may not have been updated to with the pod, but it's okay
 | |
| 	// because the periodic cleanup routine will attempt to delete again later.
 | |
| 	runningPods, err := kl.runtimeCache.GetPods()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error listing containers: %v", err)
 | |
| 	}
 | |
| 	runningPod := kubecontainer.Pods(runningPods).FindPod("", pod.UID)
 | |
| 	if runningPod.IsEmpty() {
 | |
| 		return fmt.Errorf("pod not found")
 | |
| 	}
 | |
| 	podPair := kubecontainer.PodPair{APIPod: pod, RunningPod: &runningPod}
 | |
| 
 | |
| 	kl.podKillingCh <- &podPair
 | |
| 	// TODO: delete the mirror pod here?
 | |
| 
 | |
| 	// We leave the volume/directory cleanup to the periodic cleanup routine.
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // rejectPod records an event about the pod with the given reason and message,
 | |
| // and updates the pod to the failed phase in the status manage.
 | |
| func (kl *Kubelet) rejectPod(pod *v1.Pod, reason, message string) {
 | |
| 	kl.recorder.Eventf(pod, v1.EventTypeWarning, reason, message)
 | |
| 	kl.statusManager.SetPodStatus(pod, v1.PodStatus{
 | |
| 		Phase:   v1.PodFailed,
 | |
| 		Reason:  reason,
 | |
| 		Message: "Pod " + message})
 | |
| }
 | |
| 
 | |
| // canAdmitPod determines if a pod can be admitted, and gives a reason if it
 | |
| // cannot. "pod" is new pod, while "pods" are all admitted pods
 | |
| // The function returns a boolean value indicating whether the pod
 | |
| // can be admitted, a brief single-word reason and a message explaining why
 | |
| // the pod cannot be admitted.
 | |
| func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, string) {
 | |
| 	// the kubelet will invoke each pod admit handler in sequence
 | |
| 	// if any handler rejects, the pod is rejected.
 | |
| 	// TODO: move out of disk check into a pod admitter
 | |
| 	// TODO: out of resource eviction should have a pod admitter call-out
 | |
| 	attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods}
 | |
| 	for _, podAdmitHandler := range kl.admitHandlers {
 | |
| 		if result := podAdmitHandler.Admit(attrs); !result.Admit {
 | |
| 			return false, result.Reason, result.Message
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return true, "", ""
 | |
| }
 | |
| 
 | |
| func (kl *Kubelet) canRunPod(pod *v1.Pod) lifecycle.PodAdmitResult {
 | |
| 	attrs := &lifecycle.PodAdmitAttributes{Pod: pod}
 | |
| 	// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
 | |
| 	attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods())
 | |
| 
 | |
| 	for _, handler := range kl.softAdmitHandlers {
 | |
| 		if result := handler.Admit(attrs); !result.Admit {
 | |
| 			return result
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return lifecycle.PodAdmitResult{Admit: true}
 | |
| }
 | |
| 
 | |
| // syncLoop is the main loop for processing changes. It watches for changes from
 | |
| // three channels (file, apiserver, and http) and creates a union of them. For
 | |
| // any new change seen, will run a sync against desired state and running state. If
 | |
| // no changes are seen to the configuration, will synchronize the last known desired
 | |
| // state every sync-frequency seconds. Never returns.
 | |
| func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler) {
 | |
| 	klog.Info("Starting kubelet main sync loop.")
 | |
| 	// The syncTicker wakes up kubelet to checks if there are any pod workers
 | |
| 	// that need to be sync'd. A one-second period is sufficient because the
 | |
| 	// sync interval is defaulted to 10s.
 | |
| 	syncTicker := time.NewTicker(time.Second)
 | |
| 	defer syncTicker.Stop()
 | |
| 	housekeepingTicker := time.NewTicker(housekeepingPeriod)
 | |
| 	defer housekeepingTicker.Stop()
 | |
| 	plegCh := kl.pleg.Watch()
 | |
| 	const (
 | |
| 		base   = 100 * time.Millisecond
 | |
| 		max    = 5 * time.Second
 | |
| 		factor = 2
 | |
| 	)
 | |
| 	duration := base
 | |
| 	// Responsible for checking limits in resolv.conf
 | |
| 	// The limits do not have anything to do with individual pods
 | |
| 	// Since this is called in syncLoop, we don't need to call it anywhere else
 | |
| 	if kl.dnsConfigurer != nil && kl.dnsConfigurer.ResolverConfig != "" {
 | |
| 		kl.dnsConfigurer.CheckLimitsForResolvConf()
 | |
| 	}
 | |
| 
 | |
| 	for {
 | |
| 		if err := kl.runtimeState.runtimeErrors(); err != nil {
 | |
| 			klog.Errorf("skipping pod synchronization - %v", err)
 | |
| 			// exponential backoff
 | |
| 			time.Sleep(duration)
 | |
| 			duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
 | |
| 			continue
 | |
| 		}
 | |
| 		// reset backoff if we have a success
 | |
| 		duration = base
 | |
| 
 | |
| 		kl.syncLoopMonitor.Store(kl.clock.Now())
 | |
| 		if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
 | |
| 			break
 | |
| 		}
 | |
| 		kl.syncLoopMonitor.Store(kl.clock.Now())
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // syncLoopIteration reads from various channels and dispatches pods to the
 | |
| // given handler.
 | |
| //
 | |
| // Arguments:
 | |
| // 1.  configCh:       a channel to read config events from
 | |
| // 2.  handler:        the SyncHandler to dispatch pods to
 | |
| // 3.  syncCh:         a channel to read periodic sync events from
 | |
| // 4.  housekeepingCh: a channel to read housekeeping events from
 | |
| // 5.  plegCh:         a channel to read PLEG updates from
 | |
| //
 | |
| // Events are also read from the kubelet liveness manager's update channel.
 | |
| //
 | |
| // The workflow is to read from one of the channels, handle that event, and
 | |
| // update the timestamp in the sync loop monitor.
 | |
| //
 | |
| // Here is an appropriate place to note that despite the syntactical
 | |
| // similarity to the switch statement, the case statements in a select are
 | |
| // evaluated in a pseudorandom order if there are multiple channels ready to
 | |
| // read from when the select is evaluated.  In other words, case statements
 | |
| // are evaluated in random order, and you can not assume that the case
 | |
| // statements evaluate in order if multiple channels have events.
 | |
| //
 | |
| // With that in mind, in truly no particular order, the different channels
 | |
| // are handled as follows:
 | |
| //
 | |
| // * configCh: dispatch the pods for the config change to the appropriate
 | |
| //             handler callback for the event type
 | |
| // * plegCh: update the runtime cache; sync pod
 | |
| // * syncCh: sync all pods waiting for sync
 | |
| // * housekeepingCh: trigger cleanup of pods
 | |
| // * liveness manager: sync pods that have failed or in which one or more
 | |
| //                     containers have failed liveness checks
 | |
| func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
 | |
| 	syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {
 | |
| 	select {
 | |
| 	case u, open := <-configCh:
 | |
| 		// Update from a config source; dispatch it to the right handler
 | |
| 		// callback.
 | |
| 		if !open {
 | |
| 			klog.Errorf("Update channel is closed. Exiting the sync loop.")
 | |
| 			return false
 | |
| 		}
 | |
| 
 | |
| 		switch u.Op {
 | |
| 		case kubetypes.ADD:
 | |
| 			klog.V(2).Infof("SyncLoop (ADD, %q): %q", u.Source, format.Pods(u.Pods))
 | |
| 			// After restarting, kubelet will get all existing pods through
 | |
| 			// ADD as if they are new pods. These pods will then go through the
 | |
| 			// admission process and *may* be rejected. This can be resolved
 | |
| 			// once we have checkpointing.
 | |
| 			handler.HandlePodAdditions(u.Pods)
 | |
| 		case kubetypes.UPDATE:
 | |
| 			klog.V(2).Infof("SyncLoop (UPDATE, %q): %q", u.Source, format.PodsWithDeletionTimestamps(u.Pods))
 | |
| 			handler.HandlePodUpdates(u.Pods)
 | |
| 		case kubetypes.REMOVE:
 | |
| 			klog.V(2).Infof("SyncLoop (REMOVE, %q): %q", u.Source, format.Pods(u.Pods))
 | |
| 			handler.HandlePodRemoves(u.Pods)
 | |
| 		case kubetypes.RECONCILE:
 | |
| 			klog.V(4).Infof("SyncLoop (RECONCILE, %q): %q", u.Source, format.Pods(u.Pods))
 | |
| 			handler.HandlePodReconcile(u.Pods)
 | |
| 		case kubetypes.DELETE:
 | |
| 			klog.V(2).Infof("SyncLoop (DELETE, %q): %q", u.Source, format.Pods(u.Pods))
 | |
| 			// DELETE is treated as a UPDATE because of graceful deletion.
 | |
| 			handler.HandlePodUpdates(u.Pods)
 | |
| 		case kubetypes.RESTORE:
 | |
| 			klog.V(2).Infof("SyncLoop (RESTORE, %q): %q", u.Source, format.Pods(u.Pods))
 | |
| 			// These are pods restored from the checkpoint. Treat them as new
 | |
| 			// pods.
 | |
| 			handler.HandlePodAdditions(u.Pods)
 | |
| 		case kubetypes.SET:
 | |
| 			// TODO: Do we want to support this?
 | |
| 			klog.Errorf("Kubelet does not support snapshot update")
 | |
| 		}
 | |
| 
 | |
| 		if u.Op != kubetypes.RESTORE {
 | |
| 			// If the update type is RESTORE, it means that the update is from
 | |
| 			// the pod checkpoints and may be incomplete. Do not mark the
 | |
| 			// source as ready.
 | |
| 
 | |
| 			// Mark the source ready after receiving at least one update from the
 | |
| 			// source. Once all the sources are marked ready, various cleanup
 | |
| 			// routines will start reclaiming resources. It is important that this
 | |
| 			// takes place only after kubelet calls the update handler to process
 | |
| 			// the update to ensure the internal pod cache is up-to-date.
 | |
| 			kl.sourcesReady.AddSource(u.Source)
 | |
| 		}
 | |
| 	case e := <-plegCh:
 | |
| 		if isSyncPodWorthy(e) {
 | |
| 			// PLEG event for a pod; sync it.
 | |
| 			if pod, ok := kl.podManager.GetPodByUID(e.ID); ok {
 | |
| 				klog.V(2).Infof("SyncLoop (PLEG): %q, event: %#v", format.Pod(pod), e)
 | |
| 				handler.HandlePodSyncs([]*v1.Pod{pod})
 | |
| 			} else {
 | |
| 				// If the pod no longer exists, ignore the event.
 | |
| 				klog.V(4).Infof("SyncLoop (PLEG): ignore irrelevant event: %#v", e)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if e.Type == pleg.ContainerDied {
 | |
| 			if containerID, ok := e.Data.(string); ok {
 | |
| 				kl.cleanUpContainersInPod(e.ID, containerID)
 | |
| 			}
 | |
| 		}
 | |
| 	case <-syncCh:
 | |
| 		// Sync pods waiting for sync
 | |
| 		podsToSync := kl.getPodsToSync()
 | |
| 		if len(podsToSync) == 0 {
 | |
| 			break
 | |
| 		}
 | |
| 		klog.V(4).Infof("SyncLoop (SYNC): %d pods; %s", len(podsToSync), format.Pods(podsToSync))
 | |
| 		handler.HandlePodSyncs(podsToSync)
 | |
| 	case update := <-kl.livenessManager.Updates():
 | |
| 		if update.Result == proberesults.Failure {
 | |
| 			// The liveness manager detected a failure; sync the pod.
 | |
| 
 | |
| 			// We should not use the pod from livenessManager, because it is never updated after
 | |
| 			// initialization.
 | |
| 			pod, ok := kl.podManager.GetPodByUID(update.PodUID)
 | |
| 			if !ok {
 | |
| 				// If the pod no longer exists, ignore the update.
 | |
| 				klog.V(4).Infof("SyncLoop (container unhealthy): ignore irrelevant update: %#v", update)
 | |
| 				break
 | |
| 			}
 | |
| 			klog.V(1).Infof("SyncLoop (container unhealthy): %q", format.Pod(pod))
 | |
| 			handler.HandlePodSyncs([]*v1.Pod{pod})
 | |
| 		}
 | |
| 	case <-housekeepingCh:
 | |
| 		if !kl.sourcesReady.AllReady() {
 | |
| 			// If the sources aren't ready or volume manager has not yet synced the states,
 | |
| 			// skip housekeeping, as we may accidentally delete pods from unready sources.
 | |
| 			klog.V(4).Infof("SyncLoop (housekeeping, skipped): sources aren't ready yet.")
 | |
| 		} else {
 | |
| 			klog.V(4).Infof("SyncLoop (housekeeping)")
 | |
| 			if err := handler.HandlePodCleanups(); err != nil {
 | |
| 				klog.Errorf("Failed cleaning pods: %v", err)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // dispatchWork starts the asynchronous sync of the pod in a pod worker.
 | |
| // If the pod has completed termination, dispatchWork will perform no action.
 | |
| func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
 | |
| 	// check whether we are ready to delete the pod from the API server (all status up to date)
 | |
| 	containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
 | |
| 	if pod.DeletionTimestamp != nil && containersTerminal {
 | |
| 		klog.V(4).Infof("Pod %q has completed execution and should be deleted from the API server: %s", format.Pod(pod), syncType)
 | |
| 		kl.statusManager.TerminatePod(pod)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
 | |
| 	if podWorkerTerminal {
 | |
| 		klog.V(4).Infof("Pod %q has completed, ignoring remaining sync work: %s", format.Pod(pod), syncType)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// Run the sync in an async worker.
 | |
| 	kl.podWorkers.UpdatePod(&UpdatePodOptions{
 | |
| 		Pod:        pod,
 | |
| 		MirrorPod:  mirrorPod,
 | |
| 		UpdateType: syncType,
 | |
| 		OnCompleteFunc: func(err error) {
 | |
| 			if err != nil {
 | |
| 				metrics.PodWorkerDuration.WithLabelValues(syncType.String()).Observe(metrics.SinceInSeconds(start))
 | |
| 			}
 | |
| 		},
 | |
| 	})
 | |
| 	// Note the number of containers for new pods.
 | |
| 	if syncType == kubetypes.SyncPodCreate {
 | |
| 		metrics.ContainersPerPodCount.Observe(float64(len(pod.Spec.Containers)))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // TODO: handle mirror pods in a separate component (issue #17251)
 | |
| func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) {
 | |
| 	// Mirror pod ADD/UPDATE/DELETE operations are considered an UPDATE to the
 | |
| 	// corresponding static pod. Send update to the pod worker if the static
 | |
| 	// pod exists.
 | |
| 	if pod, ok := kl.podManager.GetPodByMirrorPod(mirrorPod); ok {
 | |
| 		kl.dispatchWork(pod, kubetypes.SyncPodUpdate, mirrorPod, start)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // HandlePodAdditions is the callback in SyncHandler for pods being added from
 | |
| // a config source.
 | |
| func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
 | |
| 	start := kl.clock.Now()
 | |
| 	sort.Sort(sliceutils.PodsByCreationTime(pods))
 | |
| 	for _, pod := range pods {
 | |
| 		existingPods := kl.podManager.GetPods()
 | |
| 		// Always add the pod to the pod manager. Kubelet relies on the pod
 | |
| 		// manager as the source of truth for the desired state. If a pod does
 | |
| 		// not exist in the pod manager, it means that it has been deleted in
 | |
| 		// the apiserver and no action (other than cleanup) is required.
 | |
| 		kl.podManager.AddPod(pod)
 | |
| 
 | |
| 		if kubetypes.IsMirrorPod(pod) {
 | |
| 			kl.handleMirrorPod(pod, start)
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if !kl.podIsTerminated(pod) {
 | |
| 			// Only go through the admission process if the pod is not
 | |
| 			// terminated.
 | |
| 
 | |
| 			// We failed pods that we rejected, so activePods include all admitted
 | |
| 			// pods that are alive.
 | |
| 			activePods := kl.filterOutTerminatedPods(existingPods)
 | |
| 
 | |
| 			// Check if we can admit the pod; if not, reject it.
 | |
| 			if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {
 | |
| 				kl.rejectPod(pod, reason, message)
 | |
| 				continue
 | |
| 			}
 | |
| 		}
 | |
| 		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
 | |
| 		kl.dispatchWork(pod, kubetypes.SyncPodCreate, mirrorPod, start)
 | |
| 		kl.probeManager.AddPod(pod)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // HandlePodUpdates is the callback in the SyncHandler interface for pods
 | |
| // being updated from a config source.
 | |
| func (kl *Kubelet) HandlePodUpdates(pods []*v1.Pod) {
 | |
| 	start := kl.clock.Now()
 | |
| 	for _, pod := range pods {
 | |
| 		kl.podManager.UpdatePod(pod)
 | |
| 		if kubetypes.IsMirrorPod(pod) {
 | |
| 			kl.handleMirrorPod(pod, start)
 | |
| 			continue
 | |
| 		}
 | |
| 		// TODO: Evaluate if we need to validate and reject updates.
 | |
| 
 | |
| 		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
 | |
| 		kl.dispatchWork(pod, kubetypes.SyncPodUpdate, mirrorPod, start)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // HandlePodRemoves is the callback in the SyncHandler interface for pods
 | |
| // being removed from a config source.
 | |
| func (kl *Kubelet) HandlePodRemoves(pods []*v1.Pod) {
 | |
| 	start := kl.clock.Now()
 | |
| 	for _, pod := range pods {
 | |
| 		kl.podManager.DeletePod(pod)
 | |
| 		if kubetypes.IsMirrorPod(pod) {
 | |
| 			kl.handleMirrorPod(pod, start)
 | |
| 			continue
 | |
| 		}
 | |
| 		// Deletion is allowed to fail because the periodic cleanup routine
 | |
| 		// will trigger deletion again.
 | |
| 		if err := kl.deletePod(pod); err != nil {
 | |
| 			klog.V(2).Infof("Failed to delete pod %q, err: %v", format.Pod(pod), err)
 | |
| 		}
 | |
| 		kl.probeManager.RemovePod(pod)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // HandlePodReconcile is the callback in the SyncHandler interface for pods
 | |
| // that should be reconciled.
 | |
| func (kl *Kubelet) HandlePodReconcile(pods []*v1.Pod) {
 | |
| 	start := kl.clock.Now()
 | |
| 	for _, pod := range pods {
 | |
| 		// Update the pod in pod manager, status manager will do periodically reconcile according
 | |
| 		// to the pod manager.
 | |
| 		kl.podManager.UpdatePod(pod)
 | |
| 
 | |
| 		// Reconcile Pod "Ready" condition if necessary. Trigger sync pod for reconciliation.
 | |
| 		if status.NeedToReconcilePodReadiness(pod) {
 | |
| 			mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
 | |
| 			kl.dispatchWork(pod, kubetypes.SyncPodSync, mirrorPod, start)
 | |
| 		}
 | |
| 
 | |
| 		// After an evicted pod is synced, all dead containers in the pod can be removed.
 | |
| 		if eviction.PodIsEvicted(pod.Status) {
 | |
| 			if podStatus, err := kl.podCache.Get(pod.UID); err == nil {
 | |
| 				kl.containerDeletor.deleteContainersInPod("", podStatus, true)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // HandlePodSyncs is the callback in the syncHandler interface for pods
 | |
| // that should be dispatched to pod workers for sync.
 | |
| func (kl *Kubelet) HandlePodSyncs(pods []*v1.Pod) {
 | |
| 	start := kl.clock.Now()
 | |
| 	for _, pod := range pods {
 | |
| 		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
 | |
| 		kl.dispatchWork(pod, kubetypes.SyncPodSync, mirrorPod, start)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // LatestLoopEntryTime returns the last time in the sync loop monitor.
 | |
| func (kl *Kubelet) LatestLoopEntryTime() time.Time {
 | |
| 	val := kl.syncLoopMonitor.Load()
 | |
| 	if val == nil {
 | |
| 		return time.Time{}
 | |
| 	}
 | |
| 	return val.(time.Time)
 | |
| }
 | |
| 
 | |
| // updateRuntimeUp calls the container runtime status callback, initializing
 | |
| // the runtime dependent modules when the container runtime first comes up,
 | |
| // and returns an error if the status check fails.  If the status check is OK,
 | |
| // update the container runtime uptime in the kubelet runtimeState.
 | |
| func (kl *Kubelet) updateRuntimeUp() {
 | |
| 	kl.updateRuntimeMux.Lock()
 | |
| 	defer kl.updateRuntimeMux.Unlock()
 | |
| 
 | |
| 	s, err := kl.containerRuntime.Status()
 | |
| 	if err != nil {
 | |
| 		klog.Errorf("Container runtime sanity check failed: %v", err)
 | |
| 		return
 | |
| 	}
 | |
| 	if s == nil {
 | |
| 		klog.Errorf("Container runtime status is nil")
 | |
| 		return
 | |
| 	}
 | |
| 	// Periodically log the whole runtime status for debugging.
 | |
| 	// TODO(random-liu): Consider to send node event when optional
 | |
| 	// condition is unmet.
 | |
| 	klog.V(4).Infof("Container runtime status: %v", s)
 | |
| 	networkReady := s.GetRuntimeCondition(kubecontainer.NetworkReady)
 | |
| 	if networkReady == nil || !networkReady.Status {
 | |
| 		klog.Errorf("Container runtime network not ready: %v", networkReady)
 | |
| 		kl.runtimeState.setNetworkState(fmt.Errorf("runtime network not ready: %v", networkReady))
 | |
| 	} else {
 | |
| 		// Set nil if the container runtime network is ready.
 | |
| 		kl.runtimeState.setNetworkState(nil)
 | |
| 	}
 | |
| 	// information in RuntimeReady condition will be propagated to NodeReady condition.
 | |
| 	runtimeReady := s.GetRuntimeCondition(kubecontainer.RuntimeReady)
 | |
| 	// If RuntimeReady is not set or is false, report an error.
 | |
| 	if runtimeReady == nil || !runtimeReady.Status {
 | |
| 		err := fmt.Errorf("Container runtime not ready: %v", runtimeReady)
 | |
| 		klog.Error(err)
 | |
| 		kl.runtimeState.setRuntimeState(err)
 | |
| 		return
 | |
| 	}
 | |
| 	kl.runtimeState.setRuntimeState(nil)
 | |
| 	kl.oneTimeInitializer.Do(kl.initializeRuntimeDependentModules)
 | |
| 	kl.runtimeState.setRuntimeSync(kl.clock.Now())
 | |
| }
 | |
| 
 | |
| // GetConfiguration returns the KubeletConfiguration used to configure the kubelet.
 | |
| func (kl *Kubelet) GetConfiguration() kubeletconfiginternal.KubeletConfiguration {
 | |
| 	return kl.kubeletConfiguration
 | |
| }
 | |
| 
 | |
| // BirthCry sends an event that the kubelet has started up.
 | |
| func (kl *Kubelet) BirthCry() {
 | |
| 	// Make an event that kubelet restarted.
 | |
| 	kl.recorder.Eventf(kl.nodeRef, v1.EventTypeNormal, events.StartingKubelet, "Starting kubelet.")
 | |
| }
 | |
| 
 | |
| // ResyncInterval returns the interval used for periodic syncs.
 | |
| func (kl *Kubelet) ResyncInterval() time.Duration {
 | |
| 	return kl.resyncInterval
 | |
| }
 | |
| 
 | |
| // ListenAndServe runs the kubelet HTTP server.
 | |
| func (kl *Kubelet) ListenAndServe(address net.IP, port uint, tlsOptions *server.TLSOptions, auth server.AuthInterface, enableCAdvisorJSONEndpoints, enableDebuggingHandlers, enableContentionProfiling bool) {
 | |
| 	server.ListenAndServeKubeletServer(kl, kl.resourceAnalyzer, address, port, tlsOptions, auth, enableCAdvisorJSONEndpoints, enableDebuggingHandlers, enableContentionProfiling, kl.redirectContainerStreaming, kl.criHandler)
 | |
| }
 | |
| 
 | |
| // ListenAndServeReadOnly runs the kubelet HTTP server in read-only mode.
 | |
| func (kl *Kubelet) ListenAndServeReadOnly(address net.IP, port uint, enableCAdvisorJSONEndpoints bool) {
 | |
| 	server.ListenAndServeKubeletReadOnlyServer(kl, kl.resourceAnalyzer, address, port, enableCAdvisorJSONEndpoints)
 | |
| }
 | |
| 
 | |
| // ListenAndServePodResources runs the kubelet podresources grpc service
 | |
| func (kl *Kubelet) ListenAndServePodResources() {
 | |
| 	socket, err := util.LocalEndpoint(kl.getPodResourcesDir(), podresources.Socket)
 | |
| 	if err != nil {
 | |
| 		klog.V(2).Infof("Failed to get local endpoint for PodResources endpoint: %v", err)
 | |
| 		return
 | |
| 	}
 | |
| 	server.ListenAndServePodResources(socket, kl.podManager, kl.containerManager)
 | |
| }
 | |
| 
 | |
| // Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
 | |
| func (kl *Kubelet) cleanUpContainersInPod(podID types.UID, exitedContainerID string) {
 | |
| 	if podStatus, err := kl.podCache.Get(podID); err == nil {
 | |
| 		removeAll := false
 | |
| 		if syncedPod, ok := kl.podManager.GetPodByUID(podID); ok {
 | |
| 			// generate the api status using the cached runtime status to get up-to-date ContainerStatuses
 | |
| 			apiPodStatus := kl.generateAPIPodStatus(syncedPod, podStatus)
 | |
| 			// When an evicted or deleted pod has already synced, all containers can be removed.
 | |
| 			removeAll = eviction.PodIsEvicted(syncedPod.Status) || (syncedPod.DeletionTimestamp != nil && notRunning(apiPodStatus.ContainerStatuses))
 | |
| 		}
 | |
| 		kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, removeAll)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // fastStatusUpdateOnce starts a loop that checks the internal node indexer cache for when a CIDR
 | |
| // is applied  and tries to update pod CIDR immediately. After pod CIDR is updated it fires off
 | |
| // a runtime update and a node status update. Function returns after one successful node status update.
 | |
| // Function is executed only during Kubelet start which improves latency to ready node by updating
 | |
| // pod CIDR, runtime status and node statuses ASAP.
 | |
| func (kl *Kubelet) fastStatusUpdateOnce() {
 | |
| 	for {
 | |
| 		time.Sleep(100 * time.Millisecond)
 | |
| 		node, err := kl.GetNode()
 | |
| 		if err != nil {
 | |
| 			klog.Errorf(err.Error())
 | |
| 			continue
 | |
| 		}
 | |
| 		if len(node.Spec.PodCIDRs) != 0 {
 | |
| 			podCIDRs := strings.Join(node.Spec.PodCIDRs, ",")
 | |
| 			if _, err := kl.updatePodCIDR(podCIDRs); err != nil {
 | |
| 				klog.Errorf("Pod CIDR update to %v failed %v", podCIDRs, err)
 | |
| 				continue
 | |
| 			}
 | |
| 			kl.updateRuntimeUp()
 | |
| 			kl.syncNodeStatus()
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // isSyncPodWorthy filters out events that are not worthy of pod syncing
 | |
| func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
 | |
| 	// ContainerRemoved doesn't affect pod state
 | |
| 	return event.Type != pleg.ContainerRemoved
 | |
| }
 | |
| 
 | |
| // Gets the streaming server configuration to use with in-process CRI shims.
 | |
| func getStreamingConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *Dependencies, crOptions *config.ContainerRuntimeOptions) *streaming.Config {
 | |
| 	config := &streaming.Config{
 | |
| 		StreamIdleTimeout:               kubeCfg.StreamingConnectionIdleTimeout.Duration,
 | |
| 		StreamCreationTimeout:           streaming.DefaultConfig.StreamCreationTimeout,
 | |
| 		SupportedRemoteCommandProtocols: streaming.DefaultConfig.SupportedRemoteCommandProtocols,
 | |
| 		SupportedPortForwardProtocols:   streaming.DefaultConfig.SupportedPortForwardProtocols,
 | |
| 	}
 | |
| 	if !crOptions.RedirectContainerStreaming {
 | |
| 		config.Addr = net.JoinHostPort("localhost", "0")
 | |
| 	} else {
 | |
| 		// Use a relative redirect (no scheme or host).
 | |
| 		config.BaseURL = &url.URL{
 | |
| 			Path: "/cri/",
 | |
| 		}
 | |
| 		if kubeDeps.TLSOptions != nil {
 | |
| 			config.TLSConfig = kubeDeps.TLSOptions.Config
 | |
| 		}
 | |
| 	}
 | |
| 	return config
 | |
| }
 |