From 2036868a7d8b1345b37de56ad7d83bec42cf8c1e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Mar 2017 14:04:15 -0500 Subject: [PATCH 1/3] Revert "Return empty network namespace if the infra container has exited" This reverts commit fee4c9a7d94e4a44a3b54f04f4e6e17fe2dde3f5. This is not the correct fix for the problem; and it causes other problems like continuous: docker_sandbox.go:234] NetworkPlugin cni failed on the status hook for pod "someotherdc-1-deploy_default": Unexpected command output nsenter: cannot open : No such file or directory with error: exit status 1 Because GetNetNS() is returning an empty network namespace. That is not helpful nor should really be allowed; that's what the error return from GetNetNS() is for. --- pkg/kubelet/dockershim/helpers.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index bb4acd53cbc..e488c79fe15 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -264,12 +264,6 @@ func getApparmorSecurityOpts(sc *runtimeapi.LinuxContainerSecurityContext, separ } func getNetworkNamespace(c *dockertypes.ContainerJSON) string { - if c.State.Pid == 0 { - // Docker reports pid 0 for an exited container. We can't use it to - // check the network namespace, so return an empty string instead. - glog.V(4).Infof("Cannot find network namespace for the terminated container %q", c.ID) - return "" - } return fmt.Sprintf(dockerNetNSFmt, c.State.Pid) } From 9e1385b4508e8b44fffc6061a931010ac3781b86 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Mar 2017 17:33:58 -0500 Subject: [PATCH 2/3] kubelet/network: return but tolerate errors returned from GetNetNS() Runtimes should never return "" and nil errors, since network plugin drivers need to treat netns differently in different cases. So return errors when we can't get the netns, and fix up the plugins to do the right thing. Namely, we don't need a NetNS on pod network teardown. We do need a netns for pod Status checks and for network setup. --- pkg/kubelet/dockershim/docker_service.go | 2 +- pkg/kubelet/dockershim/helpers.go | 8 ++++++-- pkg/kubelet/network/cni/cni.go | 4 +++- pkg/kubelet/network/kubenet/kubenet_linux.go | 8 ++++---- pkg/kubelet/network/plugins.go | 2 ++ 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index 8e3043b8516..2818e43f936 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -315,7 +315,7 @@ func (ds *dockerService) GetNetNS(podSandboxID string) (string, error) { if err != nil { return "", err } - return getNetworkNamespace(r), nil + return getNetworkNamespace(r) } // GetPodPortMappings returns the port mappings of the given podSandbox ID. diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index e488c79fe15..12110edb1eb 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -263,8 +263,12 @@ func getApparmorSecurityOpts(sc *runtimeapi.LinuxContainerSecurityContext, separ return fmtOpts, nil } -func getNetworkNamespace(c *dockertypes.ContainerJSON) string { - return fmt.Sprintf(dockerNetNSFmt, c.State.Pid) +func getNetworkNamespace(c *dockertypes.ContainerJSON) (string, error) { + if c.State.Pid == 0 { + // Docker reports pid 0 for an exited container. + return "", fmt.Errorf("Cannot find network namespace for the terminated container %q", c.ID) + } + return fmt.Sprintf(dockerNetNSFmt, c.State.Pid), nil } // dockerFilter wraps around dockerfilters.Args and provides methods to modify diff --git a/pkg/kubelet/network/cni/cni.go b/pkg/kubelet/network/cni/cni.go index d3c19f62410..f42ba334644 100644 --- a/pkg/kubelet/network/cni/cni.go +++ b/pkg/kubelet/network/cni/cni.go @@ -251,9 +251,11 @@ func (plugin *cniNetworkPlugin) TearDownPod(namespace string, name string, id ku if err := plugin.checkInitialized(); err != nil { return err } + + // Lack of namespace should not be fatal on teardown netnsPath, err := plugin.host.GetNetNS(id.ID) if err != nil { - return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err) + glog.Warningf("CNI failed to retrieve network namespace path: %v", err) } return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath) diff --git a/pkg/kubelet/network/kubenet/kubenet_linux.go b/pkg/kubelet/network/kubenet/kubenet_linux.go index 86224c3e46e..a321a63189e 100644 --- a/pkg/kubelet/network/kubenet/kubenet_linux.go +++ b/pkg/kubelet/network/kubenet/kubenet_linux.go @@ -739,9 +739,9 @@ func podIsExited(p *kubecontainer.Pod) bool { return true } -func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID) (*libcni.RuntimeConf, error) { +func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID, needNetNs bool) (*libcni.RuntimeConf, error) { netnsPath, err := plugin.host.GetNetNS(id.ID) - if err != nil { + if needNetNs && err != nil { glog.Errorf("Kubenet failed to retrieve network namespace path: %v", err) } @@ -753,7 +753,7 @@ func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubeco } func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) { - rt, err := plugin.buildCNIRuntimeConf(ifName, id) + rt, err := plugin.buildCNIRuntimeConf(ifName, id, true) if err != nil { return nil, fmt.Errorf("Error building CNI config: %v", err) } @@ -767,7 +767,7 @@ func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.Network } func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error { - rt, err := plugin.buildCNIRuntimeConf(ifName, id) + rt, err := plugin.buildCNIRuntimeConf(ifName, id, false) if err != nil { return fmt.Errorf("Error building CNI config: %v", err) } diff --git a/pkg/kubelet/network/plugins.go b/pkg/kubelet/network/plugins.go index 859ff46e567..a71f7ef837f 100644 --- a/pkg/kubelet/network/plugins.go +++ b/pkg/kubelet/network/plugins.go @@ -144,6 +144,8 @@ type Host interface { // CNI plugin wrappers like kubenet. type NamespaceGetter interface { // GetNetNS returns network namespace information for the given containerID. + // Runtimes should *never* return an empty namespace and nil error for + // a container; if error is nil then the namespace string must be valid. GetNetNS(containerID string) (string, error) } From 02200ba7521ad9fda33c0878c9d6db5f7ed8e33f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 4 Apr 2017 19:31:54 -0500 Subject: [PATCH 3/3] dockershim: don't spam logs with pod IP errors before networking is ready GenericPLEG's 1s relist() loop races against pod network setup. It may be called after the infra container has started but before network setup is done, since PLEG and the runtime's SyncPod() run in different goroutines. Track network setup status and don't bother trying to read the pod's IP address if networking is not yet ready. See also: https://bugzilla.redhat.com/show_bug.cgi?id=1434950 Mar 22 12:18:17 ip-172-31-43-89 atomic-openshift-node: E0322 12:18:17.651013 25624 docker_manager.go:378] NetworkPlugin cni failed on the status hook for pod 'pausepods22' - Unexpected command output Device "eth0" does not exist. --- pkg/kubelet/dockershim/docker_sandbox.go | 63 +++++++++++++++---- pkg/kubelet/dockershim/docker_sandbox_test.go | 45 +++++++++++++ pkg/kubelet/dockershim/docker_service.go | 11 +++- pkg/kubelet/dockershim/docker_service_test.go | 10 ++- 4 files changed, 113 insertions(+), 16 deletions(-) diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 1f6b9b426a2..2c72296596e 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -48,6 +48,26 @@ const ( runtimeName = "docker" ) +// Returns whether the sandbox network is ready, and whether the sandbox is known +func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) { + ds.networkReadyLock.Lock() + defer ds.networkReadyLock.Unlock() + ready, ok := ds.networkReady[podSandboxID] + return ready, ok +} + +func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) { + ds.networkReadyLock.Lock() + defer ds.networkReadyLock.Unlock() + ds.networkReady[podSandboxID] = ready +} + +func (ds *dockerService) clearNetworkReady(podSandboxID string) { + ds.networkReadyLock.Lock() + defer ds.networkReadyLock.Unlock() + delete(ds.networkReady, podSandboxID) +} + // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure // the sandbox is in ready state. // For docker, PodSandbox is implemented by a container holding the network @@ -82,6 +102,8 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err) } + ds.setNetworkReady(createResp.ID, false) + // Step 3: Create Sandbox Checkpoint. if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil { return createResp.ID, err @@ -114,6 +136,7 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str // Do not invoke network plugins if in hostNetwork mode. if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork { + ds.setNetworkReady(createResp.ID, true) return createResp.ID, nil } @@ -126,12 +149,15 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str // recognized by the CNI standard yet. cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID) err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations) - if err != nil { + if err == nil { + ds.setNetworkReady(createResp.ID, true) + } else { // TODO(random-liu): Do we need to teardown network here? if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil { glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err) } } + return createResp.ID, err } @@ -199,7 +225,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error { errList := []error{} if needNetworkTearDown { cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID) - if err := ds.network.TearDownPod(namespace, name, cID); err != nil { + err := ds.network.TearDownPod(namespace, name, cID) + if err == nil { + ds.setNetworkReady(podSandboxID, false) + } else { errList = append(errList, err) } } @@ -241,6 +270,8 @@ func (ds *dockerService) RemovePodSandbox(podSandboxID string) error { errs = append(errs, err) } + ds.clearNetworkReady(podSandboxID) + // Remove the checkpoint of the sandbox. if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil { errs = append(errs, err) @@ -258,9 +289,6 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID) networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID) if err != nil { - // This might be a sandbox that somehow ended up without a default - // interface (eth0). We can't distinguish this from a more serious - // error, so callers should probably treat it as non-fatal. return "", err } if networkStatus == nil { @@ -272,7 +300,7 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st // getIP returns the ip given the output of `docker inspect` on a pod sandbox, // first interrogating any registered plugins, then simply trusting the ip // in the sandbox itself. We look for an ipv4 address before ipv6. -func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, error) { +func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) (string, error) { if sandbox.NetworkSettings == nil { return "", nil } @@ -281,11 +309,18 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro // reporting the IP. return "", nil } - if IP, err := ds.getIPFromPlugin(sandbox); err != nil { - glog.Warningf("%v", err) - } else if IP != "" { - return IP, nil + + // Don't bother getting IP if the pod is known and networking isn't ready + ready, ok := ds.getNetworkReady(podSandboxID) + if ok && !ready { + return "", nil } + + ip, err := ds.getIPFromPlugin(sandbox) + if err == nil { + return ip, nil + } + // TODO: trusting the docker ip is not a great idea. However docker uses // eth0 by default and so does CNI, so if we find a docker IP here, we // conclude that the plugin must have failed setup, or forgotten its ip. @@ -294,7 +329,11 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro if sandbox.NetworkSettings.IPAddress != "" { return sandbox.NetworkSettings.IPAddress, nil } - return sandbox.NetworkSettings.GlobalIPv6Address, nil + if sandbox.NetworkSettings.GlobalIPv6Address != "" { + return sandbox.NetworkSettings.GlobalIPv6Address, nil + } + + return "", fmt.Errorf("failed to read pod IP from plugin/docker: %v", err) } // PodSandboxStatus returns the status of the PodSandbox. @@ -317,7 +356,7 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS if r.State.Running { state = runtimeapi.PodSandboxState_SANDBOX_READY } - IP, err := ds.getIP(r) + IP, err := ds.getIP(podSandboxID, r) if err != nil { return nil, err } diff --git a/pkg/kubelet/dockershim/docker_sandbox_test.go b/pkg/kubelet/dockershim/docker_sandbox_test.go index 64de76f7b3b..0ce30c8d601 100644 --- a/pkg/kubelet/dockershim/docker_sandbox_test.go +++ b/pkg/kubelet/dockershim/docker_sandbox_test.go @@ -133,6 +133,8 @@ func TestSandboxStatus(t *testing.T) { expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY err = ds.StopPodSandbox(id) assert.NoError(t, err) + // IP not valid after sandbox stop + expected.Network.Ip = "" status, err = ds.PodSandboxStatus(id) assert.Equal(t, expected, status) @@ -143,6 +145,49 @@ func TestSandboxStatus(t *testing.T) { assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status)) } +// TestSandboxStatusAfterRestart tests that retrieving sandbox status returns +// an IP address even if RunPodSandbox() was not yet called for this pod, as +// would happen on kubelet restart +func TestSandboxStatusAfterRestart(t *testing.T) { + ds, _, fClock := newTestDockerService() + config := makeSandboxConfig("foo", "bar", "1", 0) + + // TODO: The following variables depend on the internal + // implementation of FakeDockerClient, and should be fixed. + fakeIP := "2.3.4.5" + + state := runtimeapi.PodSandboxState_SANDBOX_READY + ct := int64(0) + hostNetwork := false + expected := &runtimeapi.PodSandboxStatus{ + State: state, + CreatedAt: ct, + Metadata: config.Metadata, + Network: &runtimeapi.PodSandboxNetworkStatus{Ip: fakeIP}, + Linux: &runtimeapi.LinuxPodSandboxStatus{Namespaces: &runtimeapi.Namespace{Options: &runtimeapi.NamespaceOption{HostNetwork: hostNetwork}}}, + Labels: map[string]string{}, + Annotations: map[string]string{}, + } + + // Create the sandbox. + fClock.SetTime(time.Now()) + expected.CreatedAt = fClock.Now().UnixNano() + + createConfig, err := ds.makeSandboxDockerConfig(config, defaultSandboxImage) + assert.NoError(t, err) + + createResp, err := ds.client.CreateContainer(*createConfig) + assert.NoError(t, err) + err = ds.client.StartContainer(createResp.ID) + assert.NoError(t, err) + + // Check status without RunPodSandbox() having set up networking + expected.Id = createResp.ID // ID is only known after the creation. + status, err := ds.PodSandboxStatus(createResp.ID) + assert.NoError(t, err) + assert.Equal(t, expected, status) +} + // TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod // calls are made when we run/stop a sandbox. func TestNetworkPluginInvocation(t *testing.T) { diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index 2818e43f936..bb2cfa267ef 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -21,6 +21,7 @@ import ( "io" "net/http" "strconv" + "sync" "time" "github.com/blang/semver" @@ -175,6 +176,7 @@ func NewDockerService(client libdocker.Interface, seccompProfileRoot string, pod containerManager: cm.NewContainerManager(cgroupsName, client), checkpointHandler: checkpointHandler, disableSharedPID: disableSharedPID, + networkReady: make(map[string]bool), } // check docker version compatibility. @@ -248,8 +250,13 @@ type dockerService struct { podSandboxImage string streamingRuntime *streamingRuntime streamingServer streaming.Server - network *network.PluginManager - containerManager cm.ContainerManager + + network *network.PluginManager + // Map of podSandboxID :: network-is-ready + networkReady map[string]bool + networkReadyLock sync.Mutex + + containerManager cm.ContainerManager // cgroup driver used by Docker runtime. cgroupDriver string checkpointHandler CheckpointHandler diff --git a/pkg/kubelet/dockershim/docker_service_test.go b/pkg/kubelet/dockershim/docker_service_test.go index 9969bffa968..634c2d4675b 100644 --- a/pkg/kubelet/dockershim/docker_service_test.go +++ b/pkg/kubelet/dockershim/docker_service_test.go @@ -46,8 +46,14 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock fakeClock := clock.NewFakeClock(time.Time{}) c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23") pm := network.NewPluginManager(&network.NoopNetworkPlugin{}) - return &dockerService{client: c, os: &containertest.FakeOS{}, network: pm, - legacyCleanup: legacyCleanupFlag{done: 1}, checkpointHandler: NewTestPersistentCheckpointHandler()}, c, fakeClock + return &dockerService{ + client: c, + os: &containertest.FakeOS{}, + network: pm, + legacyCleanup: legacyCleanupFlag{done: 1}, + checkpointHandler: NewTestPersistentCheckpointHandler(), + networkReady: make(map[string]bool), + }, c, fakeClock } func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {