diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index b37a714ec03..c85bd615475 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -21,10 +21,14 @@ import ( "io" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/apis/componentconfig" internalApi "k8s.io/kubernetes/pkg/kubelet/api" runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockertools" + "k8s.io/kubernetes/pkg/kubelet/network" + "k8s.io/kubernetes/pkg/kubelet/network/cni" + "k8s.io/kubernetes/pkg/kubelet/network/kubenet" "k8s.io/kubernetes/pkg/kubelet/server/streaming" "k8s.io/kubernetes/pkg/util/term" ) @@ -53,10 +57,40 @@ const ( sandboxIDLabelKey = "io.kubernetes.sandbox.id" ) +// NetworkPluginArgs is the subset of kubelet runtime args we pass +// to the container runtime shim so it can probe for network plugins. +// In the future we will feed these directly to a standalone container +// runtime process. +type NetworkPluginSettings struct { + // HairpinMode is best described by comments surrounding the kubelet arg + HairpinMode componentconfig.HairpinMode + // NonMasqueradeCIDR is the range of ips which should *not* be included + // in any MASQUERADE rules applied by the plugin + NonMasqueradeCIDR string + // PluginName is the name of the plugin, runtime shim probes for + PluginName string + // PluginBinDir is the directory in which the binaries for the plugin with + // PluginName is kept. The admin is responsible for provisioning these + // binaries before-hand. + PluginBinDir string + // PluginConfDir is the directory in which the admin places a CNI conf. + // Depending on the plugin, this may be an optional field, eg: kubenet + // generates its own plugin conf. + PluginConfDir string + // MTU is the desired MTU for network devices created by the plugin. + MTU int + + // RuntimeHost is an interface that serves as a trap-door from plugin back + // into the kubelet. + // TODO: This shouldn't be required, remove once we move host + // ports into CNI. + RuntimeHost network.Host +} + var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPathLabelKey, sandboxIDLabelKey} // NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process. -func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config) (DockerService, error) { +func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings) (DockerService, error) { ds := &dockerService{ seccompProfileRoot: seccompProfileRoot, client: dockertools.NewInstrumentedDockerInterface(client), @@ -76,6 +110,14 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str return nil, err } } + // dockershim currently only supports CNI plugins. + cniPlugins := cni.ProbeNetworkPlugins(pluginSettings.PluginConfDir, pluginSettings.PluginBinDir) + cniPlugins = append(cniPlugins, kubenet.NewPlugin(pluginSettings.PluginBinDir)) + plug, err := network.InitNetworkPlugin(cniPlugins, pluginSettings.PluginName, pluginSettings.RuntimeHost, pluginSettings.HairpinMode, pluginSettings.NonMasqueradeCIDR, pluginSettings.MTU) + if err != nil { + return nil, fmt.Errorf("didn't find compatible CNI plugin with given settings %+v: %v", pluginSettings, err) + } + ds.networkPlugin = plug return ds, nil } @@ -105,6 +147,7 @@ type dockerService struct { podSandboxImage string streamingRuntime *streamingRuntime streamingServer streaming.Server + networkPlugin network.NetworkPlugin } // Version returns the runtime name, runtime version and runtime API version diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 372e8577ddd..647ae4913d4 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -482,6 +482,20 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub } } + binDir := kubeCfg.CNIBinDir + if binDir == "" { + binDir = kubeCfg.NetworkPluginDir + } + pluginSettings := dockershim.NetworkPluginSettings{ + HairpinMode: klet.hairpinMode, + NonMasqueradeCIDR: klet.nonMasqueradeCIDR, + PluginName: kubeCfg.NetworkPluginName, + PluginConfDir: kubeCfg.CNIConfDir, + PluginBinDir: binDir, + MTU: int(kubeCfg.NetworkPluginMTU), + RuntimeHost: &networkHost{klet}, + } + // Initialize the runtime. switch kubeCfg.ContainerRuntime { case "docker": @@ -489,10 +503,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub case "cri": // Use the new CRI shim for docker. This is needed for testing the // docker integration through CRI, and may be removed in the future. - dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil) - if err != nil { - return nil, err - } + dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings) runtimeService := dockerService.(internalApi.RuntimeService) imageService := dockerService.(internalApi.ImageManagerService) @@ -520,6 +531,13 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub return nil, err } } + + // kubelet defers to the runtime shim to setup networking. Setting + // this to nil will prevent it from trying to invoke the plugin. + // It's easier to always probe and initialize plugins till cri + // becomes the default. + klet.networkPlugin = nil + klet.containerRuntime, err = kuberuntime.NewKubeGenericRuntimeManager( kubecontainer.FilterEventRecorder(kubeDeps.Recorder), klet.livenessManager, @@ -1202,6 +1220,13 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) { kl.syncLoop(updates, kl) } +// GetKubeClient returns the Kubernetes client. +// TODO: This is currently only required by network plugins. Replace +// with more specific methods. +func (kl *Kubelet) GetKubeClient() clientset.Interface { + return kl.kubeClient +} + // GetClusterDNS returns a list of the DNS servers and a list of the DNS search // domains of the cluster. func (kl *Kubelet) GetClusterDNS(pod *api.Pod) ([]string, []string, error) { diff --git a/pkg/kubelet/kubelet_network.go b/pkg/kubelet/kubelet_network.go index 753630a1017..18c8600c3e0 100644 --- a/pkg/kubelet/kubelet_network.go +++ b/pkg/kubelet/kubelet_network.go @@ -191,7 +191,17 @@ func (kl *Kubelet) cleanupBandwidthLimits(allPods []*api.Pod) error { // syncNetworkStatus updates the network state func (kl *Kubelet) syncNetworkStatus() { - kl.runtimeState.setNetworkState(kl.networkPlugin.Status()) + // TODO(#35701): cri shim handles network plugin but we currently + // don't have a cri status hook, so network plugin status isn't + // reported if --experimental-runtime-integration=cri. This isn't + // too bad, because kubenet is the only network plugin that + // implements status(), and it just checks for plugin binaries + // on the filesystem. + if kl.networkPlugin != nil { + kl.runtimeState.setNetworkState(kl.networkPlugin.Status()) + } else { + kl.runtimeState.setNetworkState(nil) + } } // updatePodCIDR updates the pod CIDR in the runtime state if it is different @@ -219,6 +229,16 @@ func (kl *Kubelet) shapingEnabled() bool { if kl.networkPlugin != nil && kl.networkPlugin.Capabilities().Has(network.NET_PLUGIN_CAPABILITY_SHAPING) { return false } + // This is not strictly true but we need to figure out how to handle + // bandwidth shaping anyway. If the kubelet doesn't have a networkPlugin, + // it could mean: + // a. the kubelet is responsible for bandwidth shaping + // b. the kubelet is using cri, and the cri has a network plugin + // Today, the only plugin that understands bandwidth shaping is kubenet, and + // it doesn't support bandwidth shaping when invoked through cri, so it + // effectively boils down to letting the kubelet decide how to handle + // shaping annotations. The combination of (cri + network plugin that + // handles bandwidth shaping) may not work because of this. return true }