Merge pull request #43879 from dcbw/fix-up-runtime-GetNetNS

Automatic merge from submit-queue (batch tested with PRs 46076, 43879, 44897, 46556, 46654)

kubelet/network: report but tolerate errors returned from GetNetNS()

Runtimes should never return "" and nil errors, since network plugin
drivers need to treat netns differently in different cases.  So return
errors when we can't get the netns, and fix up the plugins to do the
right thing.

Namely, we don't need a NetNS on pod network teardown.  We do need
a netns for pod Status checks and for network setup.

@kubernetes/rh-networking @kubernetes/sig-network-bugs @DirectXMan12
This commit is contained in:
Kubernetes Submit Queue 2017-05-30 23:19:59 -07:00 committed by GitHub
commit 3702352aa9
8 changed files with 127 additions and 28 deletions

View File

@ -48,6 +48,26 @@ const (
runtimeName = "docker" runtimeName = "docker"
) )
// Returns whether the sandbox network is ready, and whether the sandbox is known
func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
ready, ok := ds.networkReady[podSandboxID]
return ready, ok
}
func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
ds.networkReady[podSandboxID] = ready
}
func (ds *dockerService) clearNetworkReady(podSandboxID string) {
ds.networkReadyLock.Lock()
defer ds.networkReadyLock.Unlock()
delete(ds.networkReady, podSandboxID)
}
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state. // the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network // For docker, PodSandbox is implemented by a container holding the network
@ -82,6 +102,8 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err) return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
} }
ds.setNetworkReady(createResp.ID, false)
// Step 3: Create Sandbox Checkpoint. // Step 3: Create Sandbox Checkpoint.
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil { if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
return createResp.ID, err return createResp.ID, err
@ -114,6 +136,7 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
// Do not invoke network plugins if in hostNetwork mode. // Do not invoke network plugins if in hostNetwork mode.
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork { if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
ds.setNetworkReady(createResp.ID, true)
return createResp.ID, nil return createResp.ID, nil
} }
@ -126,12 +149,15 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
// recognized by the CNI standard yet. // recognized by the CNI standard yet.
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID) cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations) err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
if err != nil { if err == nil {
ds.setNetworkReady(createResp.ID, true)
} else {
// TODO(random-liu): Do we need to teardown network here? // TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil { if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err) glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
} }
} }
return createResp.ID, err return createResp.ID, err
} }
@ -199,7 +225,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
errList := []error{} errList := []error{}
if needNetworkTearDown { if needNetworkTearDown {
cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID) cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
if err := ds.network.TearDownPod(namespace, name, cID); err != nil { err := ds.network.TearDownPod(namespace, name, cID)
if err == nil {
ds.setNetworkReady(podSandboxID, false)
} else {
errList = append(errList, err) errList = append(errList, err)
} }
} }
@ -241,6 +270,8 @@ func (ds *dockerService) RemovePodSandbox(podSandboxID string) error {
errs = append(errs, err) errs = append(errs, err)
} }
ds.clearNetworkReady(podSandboxID)
// Remove the checkpoint of the sandbox. // Remove the checkpoint of the sandbox.
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil { if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
errs = append(errs, err) errs = append(errs, err)
@ -258,9 +289,6 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID) cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID) networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
if err != nil { if err != nil {
// This might be a sandbox that somehow ended up without a default
// interface (eth0). We can't distinguish this from a more serious
// error, so callers should probably treat it as non-fatal.
return "", err return "", err
} }
if networkStatus == nil { if networkStatus == nil {
@ -272,7 +300,7 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
// getIP returns the ip given the output of `docker inspect` on a pod sandbox, // getIP returns the ip given the output of `docker inspect` on a pod sandbox,
// first interrogating any registered plugins, then simply trusting the ip // first interrogating any registered plugins, then simply trusting the ip
// in the sandbox itself. We look for an ipv4 address before ipv6. // in the sandbox itself. We look for an ipv4 address before ipv6.
func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, error) { func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) (string, error) {
if sandbox.NetworkSettings == nil { if sandbox.NetworkSettings == nil {
return "", nil return "", nil
} }
@ -281,11 +309,18 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro
// reporting the IP. // reporting the IP.
return "", nil return "", nil
} }
if IP, err := ds.getIPFromPlugin(sandbox); err != nil {
glog.Warningf("%v", err) // Don't bother getting IP if the pod is known and networking isn't ready
} else if IP != "" { ready, ok := ds.getNetworkReady(podSandboxID)
return IP, nil if ok && !ready {
return "", nil
} }
ip, err := ds.getIPFromPlugin(sandbox)
if err == nil {
return ip, nil
}
// TODO: trusting the docker ip is not a great idea. However docker uses // TODO: trusting the docker ip is not a great idea. However docker uses
// eth0 by default and so does CNI, so if we find a docker IP here, we // eth0 by default and so does CNI, so if we find a docker IP here, we
// conclude that the plugin must have failed setup, or forgotten its ip. // conclude that the plugin must have failed setup, or forgotten its ip.
@ -294,7 +329,11 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro
if sandbox.NetworkSettings.IPAddress != "" { if sandbox.NetworkSettings.IPAddress != "" {
return sandbox.NetworkSettings.IPAddress, nil return sandbox.NetworkSettings.IPAddress, nil
} }
if sandbox.NetworkSettings.GlobalIPv6Address != "" {
return sandbox.NetworkSettings.GlobalIPv6Address, nil return sandbox.NetworkSettings.GlobalIPv6Address, nil
}
return "", fmt.Errorf("failed to read pod IP from plugin/docker: %v", err)
} }
// PodSandboxStatus returns the status of the PodSandbox. // PodSandboxStatus returns the status of the PodSandbox.
@ -317,7 +356,7 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS
if r.State.Running { if r.State.Running {
state = runtimeapi.PodSandboxState_SANDBOX_READY state = runtimeapi.PodSandboxState_SANDBOX_READY
} }
IP, err := ds.getIP(r) IP, err := ds.getIP(podSandboxID, r)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -133,6 +133,8 @@ func TestSandboxStatus(t *testing.T) {
expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
err = ds.StopPodSandbox(id) err = ds.StopPodSandbox(id)
assert.NoError(t, err) assert.NoError(t, err)
// IP not valid after sandbox stop
expected.Network.Ip = ""
status, err = ds.PodSandboxStatus(id) status, err = ds.PodSandboxStatus(id)
assert.Equal(t, expected, status) assert.Equal(t, expected, status)
@ -143,6 +145,49 @@ func TestSandboxStatus(t *testing.T) {
assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status)) assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status))
} }
// TestSandboxStatusAfterRestart tests that retrieving sandbox status returns
// an IP address even if RunPodSandbox() was not yet called for this pod, as
// would happen on kubelet restart
func TestSandboxStatusAfterRestart(t *testing.T) {
ds, _, fClock := newTestDockerService()
config := makeSandboxConfig("foo", "bar", "1", 0)
// TODO: The following variables depend on the internal
// implementation of FakeDockerClient, and should be fixed.
fakeIP := "2.3.4.5"
state := runtimeapi.PodSandboxState_SANDBOX_READY
ct := int64(0)
hostNetwork := false
expected := &runtimeapi.PodSandboxStatus{
State: state,
CreatedAt: ct,
Metadata: config.Metadata,
Network: &runtimeapi.PodSandboxNetworkStatus{Ip: fakeIP},
Linux: &runtimeapi.LinuxPodSandboxStatus{Namespaces: &runtimeapi.Namespace{Options: &runtimeapi.NamespaceOption{HostNetwork: hostNetwork}}},
Labels: map[string]string{},
Annotations: map[string]string{},
}
// Create the sandbox.
fClock.SetTime(time.Now())
expected.CreatedAt = fClock.Now().UnixNano()
createConfig, err := ds.makeSandboxDockerConfig(config, defaultSandboxImage)
assert.NoError(t, err)
createResp, err := ds.client.CreateContainer(*createConfig)
assert.NoError(t, err)
err = ds.client.StartContainer(createResp.ID)
assert.NoError(t, err)
// Check status without RunPodSandbox() having set up networking
expected.Id = createResp.ID // ID is only known after the creation.
status, err := ds.PodSandboxStatus(createResp.ID)
assert.NoError(t, err)
assert.Equal(t, expected, status)
}
// TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod // TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod
// calls are made when we run/stop a sandbox. // calls are made when we run/stop a sandbox.
func TestNetworkPluginInvocation(t *testing.T) { func TestNetworkPluginInvocation(t *testing.T) {

View File

@ -21,6 +21,7 @@ import (
"io" "io"
"net/http" "net/http"
"strconv" "strconv"
"sync"
"time" "time"
"github.com/blang/semver" "github.com/blang/semver"
@ -175,6 +176,7 @@ func NewDockerService(client libdocker.Interface, seccompProfileRoot string, pod
containerManager: cm.NewContainerManager(cgroupsName, client), containerManager: cm.NewContainerManager(cgroupsName, client),
checkpointHandler: checkpointHandler, checkpointHandler: checkpointHandler,
disableSharedPID: disableSharedPID, disableSharedPID: disableSharedPID,
networkReady: make(map[string]bool),
} }
// check docker version compatibility. // check docker version compatibility.
@ -248,7 +250,12 @@ type dockerService struct {
podSandboxImage string podSandboxImage string
streamingRuntime *streamingRuntime streamingRuntime *streamingRuntime
streamingServer streaming.Server streamingServer streaming.Server
network *network.PluginManager network *network.PluginManager
// Map of podSandboxID :: network-is-ready
networkReady map[string]bool
networkReadyLock sync.Mutex
containerManager cm.ContainerManager containerManager cm.ContainerManager
// cgroup driver used by Docker runtime. // cgroup driver used by Docker runtime.
cgroupDriver string cgroupDriver string
@ -315,7 +322,7 @@ func (ds *dockerService) GetNetNS(podSandboxID string) (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
return getNetworkNamespace(r), nil return getNetworkNamespace(r)
} }
// GetPodPortMappings returns the port mappings of the given podSandbox ID. // GetPodPortMappings returns the port mappings of the given podSandbox ID.

View File

@ -46,8 +46,14 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock
fakeClock := clock.NewFakeClock(time.Time{}) fakeClock := clock.NewFakeClock(time.Time{})
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23") c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23")
pm := network.NewPluginManager(&network.NoopNetworkPlugin{}) pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
return &dockerService{client: c, os: &containertest.FakeOS{}, network: pm, return &dockerService{
legacyCleanup: legacyCleanupFlag{done: 1}, checkpointHandler: NewTestPersistentCheckpointHandler()}, c, fakeClock client: c,
os: &containertest.FakeOS{},
network: pm,
legacyCleanup: legacyCleanupFlag{done: 1},
checkpointHandler: NewTestPersistentCheckpointHandler(),
networkReady: make(map[string]bool),
}, c, fakeClock
} }
func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) { func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {

View File

@ -266,14 +266,12 @@ func getApparmorSecurityOpts(sc *runtimeapi.LinuxContainerSecurityContext, separ
return fmtOpts, nil return fmtOpts, nil
} }
func getNetworkNamespace(c *dockertypes.ContainerJSON) string { func getNetworkNamespace(c *dockertypes.ContainerJSON) (string, error) {
if c.State.Pid == 0 { if c.State.Pid == 0 {
// Docker reports pid 0 for an exited container. We can't use it to // Docker reports pid 0 for an exited container.
// check the network namespace, so return an empty string instead. return "", fmt.Errorf("Cannot find network namespace for the terminated container %q", c.ID)
glog.V(4).Infof("Cannot find network namespace for the terminated container %q", c.ID)
return ""
} }
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid) return fmt.Sprintf(dockerNetNSFmt, c.State.Pid), nil
} }
// dockerFilter wraps around dockerfilters.Args and provides methods to modify // dockerFilter wraps around dockerfilters.Args and provides methods to modify

View File

@ -251,9 +251,11 @@ func (plugin *cniNetworkPlugin) TearDownPod(namespace string, name string, id ku
if err := plugin.checkInitialized(); err != nil { if err := plugin.checkInitialized(); err != nil {
return err return err
} }
// Lack of namespace should not be fatal on teardown
netnsPath, err := plugin.host.GetNetNS(id.ID) netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil { if err != nil {
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err) glog.Warningf("CNI failed to retrieve network namespace path: %v", err)
} }
return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath) return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)

View File

@ -741,9 +741,9 @@ func podIsExited(p *kubecontainer.Pod) bool {
return true return true
} }
func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID) (*libcni.RuntimeConf, error) { func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID, needNetNs bool) (*libcni.RuntimeConf, error) {
netnsPath, err := plugin.host.GetNetNS(id.ID) netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil { if needNetNs && err != nil {
glog.Errorf("Kubenet failed to retrieve network namespace path: %v", err) glog.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
} }
@ -755,7 +755,7 @@ func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubeco
} }
func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) { func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) {
rt, err := plugin.buildCNIRuntimeConf(ifName, id) rt, err := plugin.buildCNIRuntimeConf(ifName, id, true)
if err != nil { if err != nil {
return nil, fmt.Errorf("Error building CNI config: %v", err) return nil, fmt.Errorf("Error building CNI config: %v", err)
} }
@ -769,7 +769,7 @@ func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.Network
} }
func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error { func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error {
rt, err := plugin.buildCNIRuntimeConf(ifName, id) rt, err := plugin.buildCNIRuntimeConf(ifName, id, false)
if err != nil { if err != nil {
return fmt.Errorf("Error building CNI config: %v", err) return fmt.Errorf("Error building CNI config: %v", err)
} }

View File

@ -144,6 +144,8 @@ type Host interface {
// CNI plugin wrappers like kubenet. // CNI plugin wrappers like kubenet.
type NamespaceGetter interface { type NamespaceGetter interface {
// GetNetNS returns network namespace information for the given containerID. // GetNetNS returns network namespace information for the given containerID.
// Runtimes should *never* return an empty namespace and nil error for
// a container; if error is nil then the namespace string must be valid.
GetNetNS(containerID string) (string, error) GetNetNS(containerID string) (string, error)
} }