From ddb1c6a127bf50267a92949a47e36b19f7253691 Mon Sep 17 00:00:00 2001 From: liucimin Date: Thu, 19 Sep 2019 22:56:03 +0800 Subject: [PATCH] fix cni timeout --- pkg/kubelet/dockershim/network/cni/cni.go | 22 ++++++++++++------- .../dockershim/network/cni/cni_test.go | 4 ++-- .../dockershim/network/cni/cni_windows.go | 9 ++++++-- .../network/kubenet/kubenet_linux.go | 13 ++++++++--- .../network/kubenet/kubenet_linux_test.go | 4 ++-- pkg/kubelet/dockershim/network/network.go | 3 +++ 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/pkg/kubelet/dockershim/network/cni/cni.go b/pkg/kubelet/dockershim/network/cni/cni.go index 48d95b5a8a4..cb353219a06 100644 --- a/pkg/kubelet/dockershim/network/cni/cni.go +++ b/pkg/kubelet/dockershim/network/cni/cni.go @@ -306,14 +306,17 @@ func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubec return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err) } + // Todo get the timeout from parent ctx + cniTimeoutCtx, cancelFunc := context.WithTimeout(context.Background(), network.CNITimeoutSec*time.Second) + defer cancelFunc() // Windows doesn't have loNetwork. It comes only with Linux if plugin.loNetwork != nil { - if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath, annotations, options); err != nil { + if _, err = plugin.addToNetwork(cniTimeoutCtx, plugin.loNetwork, name, namespace, id, netnsPath, annotations, options); err != nil { return err } } - _, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath, annotations, options) + _, err = plugin.addToNetwork(cniTimeoutCtx, plugin.getDefaultNetwork(), name, namespace, id, netnsPath, annotations, options) return err } @@ -328,22 +331,25 @@ func (plugin *cniNetworkPlugin) TearDownPod(namespace string, name string, id ku klog.Warningf("CNI failed to retrieve network namespace path: %v", err) } + // Todo get the timeout from parent ctx + cniTimeoutCtx, cancelFunc := context.WithTimeout(context.Background(), network.CNITimeoutSec*time.Second) + defer cancelFunc() // Windows doesn't have loNetwork. It comes only with Linux if plugin.loNetwork != nil { // Loopback network deletion failure should not be fatal on teardown - if err := plugin.deleteFromNetwork(plugin.loNetwork, name, namespace, id, netnsPath, nil); err != nil { + if err := plugin.deleteFromNetwork(cniTimeoutCtx, plugin.loNetwork, name, namespace, id, netnsPath, nil); err != nil { klog.Warningf("CNI failed to delete loopback network: %v", err) } } - return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath, nil) + return plugin.deleteFromNetwork(cniTimeoutCtx, plugin.getDefaultNetwork(), name, namespace, id, netnsPath, nil) } func podDesc(namespace, name string, id kubecontainer.ContainerID) string { return fmt.Sprintf("%s_%s/%s", namespace, name, id.ID) } -func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (cnitypes.Result, error) { +func (plugin *cniNetworkPlugin) addToNetwork(ctx context.Context, network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (cnitypes.Result, error) { rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations, options) if err != nil { klog.Errorf("Error adding network when building cni runtime conf: %v", err) @@ -353,7 +359,7 @@ func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string pdesc := podDesc(podNamespace, podName, podSandboxID) netConf, cniNet := network.NetworkConfig, network.CNIConfig klog.V(4).Infof("Adding %s to network %s/%s netns %q", pdesc, netConf.Plugins[0].Network.Type, netConf.Name, podNetnsPath) - res, err := cniNet.AddNetworkList(context.TODO(), netConf, rt) + res, err := cniNet.AddNetworkList(ctx, netConf, rt) if err != nil { klog.Errorf("Error adding %s to network %s/%s: %v", pdesc, netConf.Plugins[0].Network.Type, netConf.Name, err) return nil, err @@ -362,7 +368,7 @@ func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string return res, nil } -func (plugin *cniNetworkPlugin) deleteFromNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations map[string]string) error { +func (plugin *cniNetworkPlugin) deleteFromNetwork(ctx context.Context, network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations map[string]string) error { rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations, nil) if err != nil { klog.Errorf("Error deleting network when building cni runtime conf: %v", err) @@ -372,7 +378,7 @@ func (plugin *cniNetworkPlugin) deleteFromNetwork(network *cniNetwork, podName s pdesc := podDesc(podNamespace, podName, podSandboxID) netConf, cniNet := network.NetworkConfig, network.CNIConfig klog.V(4).Infof("Deleting %s from network %s/%s netns %q", pdesc, netConf.Plugins[0].Network.Type, netConf.Name, podNetnsPath) - err = cniNet.DelNetworkList(context.TODO(), netConf, rt) + err = cniNet.DelNetworkList(ctx, netConf, rt) // The pod may not get deleted successfully at the first time. // Ignore "no such file or directory" error in case the network has already been deleted in previous attempts. if err != nil && !strings.Contains(err.Error(), "no such file or directory") { diff --git a/pkg/kubelet/dockershim/network/cni/cni_test.go b/pkg/kubelet/dockershim/network/cni/cni_test.go index 4ed0dec0d05..d7253e9b734 100644 --- a/pkg/kubelet/dockershim/network/cni/cni_test.go +++ b/pkg/kubelet/dockershim/network/cni/cni_test.go @@ -228,8 +228,8 @@ func TestCNIPlugin(t *testing.T) { cniPlugin.execer = fexec cniPlugin.loNetwork.CNIConfig = mockLoCNI - mockLoCNI.On("AddNetworkList", mock.AnythingOfType("*context.emptyCtx"), cniPlugin.loNetwork.NetworkConfig, mock.AnythingOfType("*libcni.RuntimeConf")).Return(&types020.Result{IP4: &types020.IPConfig{IP: net.IPNet{IP: []byte{127, 0, 0, 1}}}}, nil) - mockLoCNI.On("DelNetworkList", mock.AnythingOfType("*context.emptyCtx"), cniPlugin.loNetwork.NetworkConfig, mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) + mockLoCNI.On("AddNetworkList", mock.AnythingOfType("*context.timerCtx"), cniPlugin.loNetwork.NetworkConfig, mock.AnythingOfType("*libcni.RuntimeConf")).Return(&types020.Result{IP4: &types020.IPConfig{IP: net.IPNet{IP: []byte{127, 0, 0, 1}}}}, nil) + mockLoCNI.On("DelNetworkList", mock.AnythingOfType("*context.timerCtx"), cniPlugin.loNetwork.NetworkConfig, mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) // Check that status returns an error if err := cniPlugin.Status(); err == nil { diff --git a/pkg/kubelet/dockershim/network/cni/cni_windows.go b/pkg/kubelet/dockershim/network/cni/cni_windows.go index e39e878e1ce..f3ba62c70de 100644 --- a/pkg/kubelet/dockershim/network/cni/cni_windows.go +++ b/pkg/kubelet/dockershim/network/cni/cni_windows.go @@ -19,7 +19,9 @@ limitations under the License. package cni import ( + "context" "fmt" + "time" cniTypes020 "github.com/containernetworking/cni/pkg/types/020" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" @@ -43,8 +45,11 @@ func (plugin *cniNetworkPlugin) GetPodNetworkStatus(namespace string, name strin return nil, fmt.Errorf("CNI failed to retrieve network namespace path: %v", err) } - result, err := plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath, nil, nil) - + // Because the default remote runtime request timeout is 4 min,so set slightly less than 240 seconds + // Todo get the timeout from parent ctx + cniTimeoutCtx, cancelFunc := context.WithTimeout(context.Background(), network.CNITimeoutSec*time.Second) + defer cancelFunc() + result, err := plugin.addToNetwork(cniTimeoutCtx, plugin.getDefaultNetwork(), name, namespace, id, netnsPath, nil, nil) klog.V(5).Infof("GetPodNetworkStatus result %+v", result) if err != nil { klog.Errorf("error while adding to cni network: %s", err) diff --git a/pkg/kubelet/dockershim/network/kubenet/kubenet_linux.go b/pkg/kubelet/dockershim/network/kubenet/kubenet_linux.go index df6145a50bb..98e1b8e9084 100644 --- a/pkg/kubelet/dockershim/network/kubenet/kubenet_linux.go +++ b/pkg/kubelet/dockershim/network/kubenet/kubenet_linux.go @@ -690,8 +690,11 @@ func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.Network } klog.V(3).Infof("Adding %s/%s to '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt) - - res, err := plugin.cniConfig.AddNetwork(context.TODO(), config, rt) + // Because the default remote runtime request timeout is 4 min,so set slightly less than 240 seconds + // Todo get the timeout from parent ctx + cniTimeoutCtx, cancelFunc := context.WithTimeout(context.Background(), network.CNITimeoutSec*time.Second) + defer cancelFunc() + res, err := plugin.cniConfig.AddNetwork(cniTimeoutCtx, config, rt) if err != nil { return nil, fmt.Errorf("error adding container to network: %v", err) } @@ -705,7 +708,11 @@ func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.Netwo } klog.V(3).Infof("Removing %s/%s from '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt) - err = plugin.cniConfig.DelNetwork(context.TODO(), config, rt) + // Because the default remote runtime request timeout is 4 min,so set slightly less than 240 seconds + // Todo get the timeout from parent ctx + cniTimeoutCtx, cancelFunc := context.WithTimeout(context.Background(), network.CNITimeoutSec*time.Second) + defer cancelFunc() + err = plugin.cniConfig.DelNetwork(cniTimeoutCtx, config, rt) // The pod may not get deleted successfully at the first time. // Ignore "no such file or directory" error in case the network has already been deleted in previous attempts. if err != nil && !strings.Contains(err.Error(), "no such file or directory") { diff --git a/pkg/kubelet/dockershim/network/kubenet/kubenet_linux_test.go b/pkg/kubelet/dockershim/network/kubenet/kubenet_linux_test.go index bfe5eadf170..7ebec13803e 100644 --- a/pkg/kubelet/dockershim/network/kubenet/kubenet_linux_test.go +++ b/pkg/kubelet/dockershim/network/kubenet/kubenet_linux_test.go @@ -172,7 +172,7 @@ func TestTeardownCallsShaper(t *testing.T) { kubenet.bandwidthShaper = fshaper kubenet.hostportSyncer = hostporttest.NewFakeHostportSyncer() - mockcni.On("DelNetwork", mock.AnythingOfType("*context.emptyCtx"), mock.AnythingOfType("*libcni.NetworkConfig"), mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) + mockcni.On("DelNetwork", mock.AnythingOfType("*context.timerCtx"), mock.AnythingOfType("*libcni.NetworkConfig"), mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) details := make(map[string]interface{}) details[network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR] = "10.0.0.1/24" @@ -301,7 +301,7 @@ func TestTearDownWithoutRuntime(t *testing.T) { existingContainerID := kubecontainer.BuildContainerID("docker", "123") kubenet.podIPs[existingContainerID] = utilsets.NewString(tc.ip) - mockcni.On("DelNetwork", mock.AnythingOfType("*context.emptyCtx"), mock.AnythingOfType("*libcni.NetworkConfig"), mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) + mockcni.On("DelNetwork", mock.AnythingOfType("*context.timerCtx"), mock.AnythingOfType("*libcni.NetworkConfig"), mock.AnythingOfType("*libcni.RuntimeConf")).Return(nil) if err := kubenet.TearDownPod("namespace", "name", existingContainerID); err != nil { t.Fatalf("Unexpected error in TearDownPod: %v", err) diff --git a/pkg/kubelet/dockershim/network/network.go b/pkg/kubelet/dockershim/network/network.go index 217fc762525..fbd1ba12550 100644 --- a/pkg/kubelet/dockershim/network/network.go +++ b/pkg/kubelet/dockershim/network/network.go @@ -19,6 +19,9 @@ package network // TODO: Consider making this value configurable. const DefaultInterfaceName = "eth0" +// CNITimeoutSec is set to be slightly less than 240sec/4mins, which is the default remote runtime request timeout. +const CNITimeoutSec = 220 + // UseDefaultMTU is a marker value that indicates the plugin should determine its own MTU // It is the zero value, so a non-initialized value will mean "UseDefault" const UseDefaultMTU = 0