Merge pull request #25062 from dcbw/kubenet-rkt

Automatic merge from submit-queue

Hook rkt kubelet runtime up to network plugins
This commit is contained in:
k8s-merge-robot 2016-05-21 00:45:19 -07:00
commit 423a4154be
11 changed files with 343 additions and 96 deletions

View File

@ -97,6 +97,12 @@ type Runtime interface {
RemoveImage(image ImageSpec) error RemoveImage(image ImageSpec) error
// Returns Image statistics. // Returns Image statistics.
ImageStats() (*ImageStats, error) ImageStats() (*ImageStats, error)
// Returns the filesystem path of the pod's network namespace; if the
// runtime does not handle namespace creation itself, or cannot return
// the network namespace path, it should return an error.
// TODO: Change ContainerID to a Pod ID since the namespace is shared
// by all containers in the pod.
GetNetNS(containerID ContainerID) (string, error)
// TODO(vmarmol): Unify pod and containerID args. // TODO(vmarmol): Unify pod and containerID args.
// GetContainerLogs returns logs of a specific container. By // GetContainerLogs returns logs of a specific container. By
// default, it returns a snapshot of the container log. Set 'follow' to true to // default, it returns a snapshot of the container log. Set 'follow' to true to

View File

@ -338,6 +338,14 @@ func (f *FakeRuntime) PortForward(pod *Pod, port uint16, stream io.ReadWriteClos
return f.Err return f.Err
} }
func (f *FakeRuntime) GetNetNS(containerID ContainerID) (string, error) {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "GetNetNS")
return "", f.Err
}
func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy) error { func (f *FakeRuntime) GarbageCollect(gcPolicy ContainerGCPolicy) error {
f.Lock() f.Lock()
defer f.Unlock() defer f.Unlock()

View File

@ -128,6 +128,11 @@ func (r *Mock) PortForward(pod *Pod, port uint16, stream io.ReadWriteCloser) err
return args.Error(0) return args.Error(0)
} }
func (r *Mock) GetNetNS(containerID ContainerID) (string, error) {
args := r.Called(containerID)
return "", args.Error(0)
}
func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy) error { func (r *Mock) GarbageCollect(gcPolicy ContainerGCPolicy) error {
args := r.Called(gcPolicy) args := r.Called(gcPolicy)
return args.Error(0) return args.Error(0)

View File

@ -1959,7 +1959,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubec
} }
if dm.configureHairpinMode { if dm.configureHairpinMode {
if err = hairpin.SetUpContainer(podInfraContainer.State.Pid, network.DefaultInterfaceName); err != nil { if err = hairpin.SetUpContainerPid(podInfraContainer.State.Pid, network.DefaultInterfaceName); err != nil {
glog.Warningf("Hairpin setup failed for pod %q: %v", format.Pod(pod), err) glog.Warningf("Hairpin setup failed for pod %q: %v", format.Pod(pod), err)
} }
} }

View File

@ -439,6 +439,8 @@ func NewMainKubelet(
klet.livenessManager, klet.livenessManager,
klet.volumeManager, klet.volumeManager,
klet.httpClient, klet.httpClient,
klet.networkPlugin,
klet.hairpinMode == componentconfig.HairpinVeth,
utilexec.New(), utilexec.New(),
kubecontainer.RealOS{}, kubecontainer.RealOS{},
imageBackOff, imageBackOff,

View File

@ -40,13 +40,23 @@ var (
ethtoolOutputRegex = regexp.MustCompile("peer_ifindex: (\\d+)") ethtoolOutputRegex = regexp.MustCompile("peer_ifindex: (\\d+)")
) )
func SetUpContainer(containerPid int, containerInterfaceName string) error { func SetUpContainerPid(containerPid int, containerInterfaceName string) error {
e := exec.New() pidStr := fmt.Sprintf("%d", containerPid)
return setUpContainerInternal(e, containerPid, containerInterfaceName) nsenterArgs := []string{"-t", pidStr, "-n"}
return setUpContainerInternal(containerInterfaceName, pidStr, nsenterArgs)
} }
func setUpContainerInternal(e exec.Interface, containerPid int, containerInterfaceName string) error { func SetUpContainerPath(netnsPath string, containerInterfaceName string) error {
hostIfName, err := findPairInterfaceOfContainerInterface(e, containerPid, containerInterfaceName) if netnsPath[0] != '/' {
return fmt.Errorf("netnsPath path '%s' was invalid", netnsPath)
}
nsenterArgs := []string{"-n", netnsPath}
return setUpContainerInternal(containerInterfaceName, netnsPath, nsenterArgs)
}
func setUpContainerInternal(containerInterfaceName, containerDesc string, nsenterArgs []string) error {
e := exec.New()
hostIfName, err := findPairInterfaceOfContainerInterface(e, containerInterfaceName, containerDesc, nsenterArgs)
if err != nil { if err != nil {
glog.Infof("Unable to find pair interface, setting up all interfaces: %v", err) glog.Infof("Unable to find pair interface, setting up all interfaces: %v", err)
return setUpAllInterfaces() return setUpAllInterfaces()
@ -54,7 +64,7 @@ func setUpContainerInternal(e exec.Interface, containerPid int, containerInterfa
return setUpInterface(hostIfName) return setUpInterface(hostIfName)
} }
func findPairInterfaceOfContainerInterface(e exec.Interface, containerPid int, containerInterfaceName string) (string, error) { func findPairInterfaceOfContainerInterface(e exec.Interface, containerInterfaceName, containerDesc string, nsenterArgs []string) (string, error) {
nsenterPath, err := e.LookPath("nsenter") nsenterPath, err := e.LookPath("nsenter")
if err != nil { if err != nil {
return "", err return "", err
@ -63,15 +73,16 @@ func findPairInterfaceOfContainerInterface(e exec.Interface, containerPid int, c
if err != nil { if err != nil {
return "", err return "", err
} }
// Get container's interface index
output, err := e.Command(nsenterPath, "-t", fmt.Sprintf("%d", containerPid), "-n", "-F", "--", ethtoolPath, "--statistics", containerInterfaceName).CombinedOutput() nsenterArgs = append(nsenterArgs, "-F", "--", ethtoolPath, "--statistics", containerInterfaceName)
output, err := e.Command(nsenterPath, nsenterArgs...).CombinedOutput()
if err != nil { if err != nil {
return "", fmt.Errorf("Unable to query interface %s of container %d: %v: %s", containerInterfaceName, containerPid, err, string(output)) return "", fmt.Errorf("Unable to query interface %s of container %s: %v: %s", containerInterfaceName, containerDesc, err, string(output))
} }
// look for peer_ifindex // look for peer_ifindex
match := ethtoolOutputRegex.FindSubmatch(output) match := ethtoolOutputRegex.FindSubmatch(output)
if match == nil { if match == nil {
return "", fmt.Errorf("No peer_ifindex in interface statistics for %s of container %d", containerInterfaceName, containerPid) return "", fmt.Errorf("No peer_ifindex in interface statistics for %s of container %s", containerInterfaceName, containerDesc)
} }
peerIfIndex, err := strconv.Atoi(string(match[1])) peerIfIndex, err := strconv.Atoi(string(match[1]))
if err != nil { // seems impossible (\d+ not numeric) if err != nil { // seems impossible (\d+ not numeric)

View File

@ -69,7 +69,8 @@ func TestFindPairInterfaceOfContainerInterface(t *testing.T) {
return fmt.Sprintf("/fake-bin/%s", file), nil return fmt.Sprintf("/fake-bin/%s", file), nil
}, },
} }
name, err := findPairInterfaceOfContainerInterface(&fexec, 123, "eth0") nsenterArgs := []string{"-t", "123", "-n"}
name, err := findPairInterfaceOfContainerInterface(&fexec, "eth0", "123", nsenterArgs)
if test.expectErr { if test.expectErr {
if err == nil { if err == nil {
t.Errorf("unexpected non-error") t.Errorf("unexpected non-error")

View File

@ -31,10 +31,10 @@ import (
"github.com/vishvananda/netlink/nl" "github.com/vishvananda/netlink/nl"
"github.com/appc/cni/libcni" "github.com/appc/cni/libcni"
cnitypes "github.com/appc/cni/pkg/types"
"github.com/golang/glog" "github.com/golang/glog"
"k8s.io/kubernetes/pkg/apis/componentconfig" "k8s.io/kubernetes/pkg/apis/componentconfig"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/util/bandwidth" "k8s.io/kubernetes/pkg/util/bandwidth"
utilexec "k8s.io/kubernetes/pkg/util/exec" utilexec "k8s.io/kubernetes/pkg/util/exec"
@ -55,6 +55,7 @@ type kubenetNetworkPlugin struct {
host network.Host host network.Host
netConfig *libcni.NetworkConfig netConfig *libcni.NetworkConfig
loConfig *libcni.NetworkConfig
cniConfig *libcni.CNIConfig cniConfig *libcni.CNIConfig
shaper bandwidth.BandwidthShaper shaper bandwidth.BandwidthShaper
podCIDRs map[kubecontainer.ContainerID]string podCIDRs map[kubecontainer.ContainerID]string
@ -94,10 +95,20 @@ func (plugin *kubenetNetworkPlugin) Init(host network.Host, hairpinMode componen
// was built-in, we simply ignore the error here. A better thing to do is // was built-in, we simply ignore the error here. A better thing to do is
// to check the kernel version in the future. // to check the kernel version in the future.
plugin.execer.Command("modprobe", "br-netfilter").CombinedOutput() plugin.execer.Command("modprobe", "br-netfilter").CombinedOutput()
if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil { err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1)
if err != nil {
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err) glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
} }
plugin.loConfig, err = libcni.ConfFromBytes([]byte(`{
"cniVersion": "0.1.0",
"name": "kubenet-loopback",
"type": "loopback"
}`))
if err != nil {
return fmt.Errorf("Failed to generate loopback config: %v", err)
}
return nil return nil
} }
@ -250,7 +261,7 @@ func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id k
start := time.Now() start := time.Now()
defer func() { defer func() {
glog.V(4).Infof("TearDownPod took %v for %s/%s", time.Since(start), namespace, name) glog.V(4).Infof("SetUpPod took %v for %s/%s", time.Since(start), namespace, name)
}() }()
pod, ok := plugin.host.GetPodByName(namespace, name) pod, ok := plugin.host.GetPodByName(namespace, name)
@ -266,24 +277,21 @@ func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id k
return fmt.Errorf("Kubenet cannot SetUpPod: %v", err) return fmt.Errorf("Kubenet cannot SetUpPod: %v", err)
} }
runtime, ok := plugin.host.GetRuntime().(*dockertools.DockerManager) // Bring up container loopback interface
if !ok { if _, err := plugin.addContainerToNetwork(plugin.loConfig, "lo", namespace, name, id); err != nil {
return fmt.Errorf("Kubenet execution called on non-docker runtime")
}
netnsPath, err := runtime.GetNetNS(id)
if err != nil {
return fmt.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
}
rt := buildCNIRuntimeConf(name, namespace, id, netnsPath)
if err != nil {
return fmt.Errorf("Error building CNI config: %v", err)
}
if err = plugin.addContainerToNetwork(id, rt); err != nil {
return err return err
} }
// Hook container up with our bridge
res, err := plugin.addContainerToNetwork(plugin.netConfig, network.DefaultInterfaceName, namespace, name, id)
if err != nil {
return err
}
if res.IP4 == nil || res.IP4.IP.String() == "" {
return fmt.Errorf("CNI plugin reported no IPv4 address for container %v.", id)
}
plugin.podCIDRs[id] = res.IP4.IP.String()
// Put the container bridge into promiscuous mode to force it to accept hairpin packets. // Put the container bridge into promiscuous mode to force it to accept hairpin packets.
// TODO: Remove this once the kernel bug (#20096) is fixed. // TODO: Remove this once the kernel bug (#20096) is fixed.
// TODO: check and set promiscuous mode with netlink once vishvananda/netlink supports it // TODO: check and set promiscuous mode with netlink once vishvananda/netlink supports it
@ -330,20 +338,6 @@ func (plugin *kubenetNetworkPlugin) TearDownPod(namespace string, name string, i
return fmt.Errorf("Kubenet needs a PodCIDR to tear down pods") return fmt.Errorf("Kubenet needs a PodCIDR to tear down pods")
} }
runtime, ok := plugin.host.GetRuntime().(*dockertools.DockerManager)
if !ok {
return fmt.Errorf("Kubenet execution called on non-docker runtime")
}
netnsPath, err := runtime.GetNetNS(id)
if err != nil {
return err
}
rt := buildCNIRuntimeConf(name, namespace, id, netnsPath)
if err != nil {
return fmt.Errorf("Error building CNI config: %v", err)
}
// no cached CIDR is Ok during teardown // no cached CIDR is Ok during teardown
if cidr, ok := plugin.podCIDRs[id]; ok { if cidr, ok := plugin.podCIDRs[id]; ok {
glog.V(5).Infof("Removing pod CIDR %s from shaper", cidr) glog.V(5).Infof("Removing pod CIDR %s from shaper", cidr)
@ -354,9 +348,10 @@ func (plugin *kubenetNetworkPlugin) TearDownPod(namespace string, name string, i
} }
} }
} }
if err = plugin.delContainerFromNetwork(id, rt); err != nil { if err := plugin.delContainerFromNetwork(plugin.netConfig, network.DefaultInterfaceName, namespace, name, id); err != nil {
return err return err
} }
delete(plugin.podCIDRs, id)
return nil return nil
} }
@ -373,12 +368,8 @@ func (plugin *kubenetNetworkPlugin) GetPodNetworkStatus(namespace string, name s
return &network.PodNetworkStatus{IP: ip}, nil return &network.PodNetworkStatus{IP: ip}, nil
} }
} }
// TODO: remove type conversion once kubenet supports multiple runtime
runtime, ok := plugin.host.GetRuntime().(*dockertools.DockerManager) netnsPath, err := plugin.host.GetRuntime().GetNetNS(id)
if !ok {
return nil, fmt.Errorf("Kubenet execution called on non-docker runtime")
}
netnsPath, err := runtime.GetNetNS(id)
if err != nil { if err != nil {
return nil, fmt.Errorf("Kubenet failed to retrieve network namespace path: %v", err) return nil, fmt.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
} }
@ -412,37 +403,43 @@ func (plugin *kubenetNetworkPlugin) Status() error {
return nil return nil
} }
func buildCNIRuntimeConf(podName string, podNs string, podInfraContainerID kubecontainer.ContainerID, podNetnsPath string) *libcni.RuntimeConf { func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID) (*libcni.RuntimeConf, error) {
glog.V(4).Infof("Kubenet: using netns path %v", podNetnsPath) netnsPath, err := plugin.host.GetRuntime().GetNetNS(id)
glog.V(4).Infof("Kubenet: using podns path %v", podNs) if err != nil {
return nil, fmt.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
}
return &libcni.RuntimeConf{ return &libcni.RuntimeConf{
ContainerID: podInfraContainerID.ID, ContainerID: id.ID,
NetNS: podNetnsPath, NetNS: netnsPath,
IfName: network.DefaultInterfaceName, IfName: ifName,
} }, nil
} }
func (plugin *kubenetNetworkPlugin) addContainerToNetwork(id kubecontainer.ContainerID, rt *libcni.RuntimeConf) error { func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (*cnitypes.Result, error) {
glog.V(3).Infof("Calling cni plugins to add container to network with cni runtime: %+v", rt) rt, err := plugin.buildCNIRuntimeConf(ifName, id)
res, err := plugin.cniConfig.AddNetwork(plugin.netConfig, rt)
if err != nil { if err != nil {
return fmt.Errorf("Error adding container to network: %v", err) return nil, fmt.Errorf("Error building CNI config: %v", err)
}
if res.IP4 == nil || res.IP4.IP.String() == "" {
return fmt.Errorf("CNI plugin reported no IPv4 address for container %v.", id)
} }
plugin.podCIDRs[id] = res.IP4.IP.String() glog.V(3).Infof("Adding %s/%s to '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt)
return nil res, err := plugin.cniConfig.AddNetwork(config, rt)
if err != nil {
return nil, fmt.Errorf("Error adding container to network: %v", err)
}
return res, nil
} }
func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(id kubecontainer.ContainerID, rt *libcni.RuntimeConf) error { func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error {
glog.V(3).Infof("Calling cni plugins to remove container from network with cni runtime: %+v", rt) rt, err := plugin.buildCNIRuntimeConf(ifName, id)
if err := plugin.cniConfig.DelNetwork(plugin.netConfig, rt); err != nil { if err != nil {
return fmt.Errorf("Error building CNI config: %v", err)
}
glog.V(3).Infof("Removing %s/%s from '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt)
if err := plugin.cniConfig.DelNetwork(config, rt); err != nil {
return fmt.Errorf("Error removing container from network: %v", err) return fmt.Errorf("Error removing container from network: %v", err)
} }
delete(plugin.podCIDRs, id)
return nil return nil
} }

View File

@ -17,6 +17,8 @@ limitations under the License.
package kubenet package kubenet
import ( import (
"fmt"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/kubelet/network"
nettest "k8s.io/kubernetes/pkg/kubelet/network/testing" nettest "k8s.io/kubernetes/pkg/kubelet/network/testing"
@ -38,9 +40,6 @@ func TestGetPodNetworkStatus(t *testing.T) {
podIPMap[kubecontainer.ContainerID{ID: "1"}] = "10.245.0.2/32" podIPMap[kubecontainer.ContainerID{ID: "1"}] = "10.245.0.2/32"
podIPMap[kubecontainer.ContainerID{ID: "2"}] = "10.245.0.3/32" podIPMap[kubecontainer.ContainerID{ID: "2"}] = "10.245.0.3/32"
fhost := nettest.NewFakeHost(nil)
fakeKubenet := newFakeKubenetPlugin(podIPMap, nil, fhost)
testCases := []struct { testCases := []struct {
id string id string
expectError bool expectError bool
@ -66,6 +65,34 @@ func TestGetPodNetworkStatus(t *testing.T) {
//TODO: add test cases for retrieving ip inside container network namespace //TODO: add test cases for retrieving ip inside container network namespace
} }
fakeCmds := make([]exec.FakeCommandAction, 0)
for _, t := range testCases {
// the fake commands return the IP from the given index, or an error
fCmd := exec.FakeCmd{
CombinedOutputScript: []exec.FakeCombinedOutputAction{
func() ([]byte, error) {
ip, ok := podIPMap[kubecontainer.ContainerID{ID: t.id}]
if !ok {
return nil, fmt.Errorf("Pod IP %q not found", t.id)
}
return []byte(ip), nil
},
},
}
fakeCmds = append(fakeCmds, func(cmd string, args ...string) exec.Cmd {
return exec.InitFakeCmd(&fCmd, cmd, args...)
})
}
fexec := exec.FakeExec{
CommandScript: fakeCmds,
LookPathFunc: func(file string) (string, error) {
return fmt.Sprintf("/fake-bin/%s", file), nil
},
}
fhost := nettest.NewFakeHost(nil)
fakeKubenet := newFakeKubenetPlugin(podIPMap, &fexec, fhost)
for i, tc := range testCases { for i, tc := range testCases {
out, err := fakeKubenet.GetPodNetworkStatus("", "", kubecontainer.ContainerID{ID: tc.id}) out, err := fakeKubenet.GetPodNetworkStatus("", "", kubecontainer.ContainerID{ID: tc.id})
if tc.expectError { if tc.expectError {

View File

@ -45,6 +45,8 @@ import (
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/network/hairpin"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/kubelet/util/format"
@ -74,8 +76,9 @@ const (
kubernetesUnitPrefix = "k8s_" kubernetesUnitPrefix = "k8s_"
unitKubernetesSection = "X-Kubernetes" unitKubernetesSection = "X-Kubernetes"
unitPodName = "POD" unitPodUID = "PodUID"
unitRktID = "RktID" unitPodName = "PodName"
unitPodNamespace = "PodNamespace"
unitRestartCount = "RestartCount" unitRestartCount = "RestartCount"
k8sRktKubeletAnno = "rkt.kubernetes.io/managed-by-kubelet" k8sRktKubeletAnno = "rkt.kubernetes.io/managed-by-kubelet"
@ -133,8 +136,17 @@ type Runtime struct {
execer utilexec.Interface execer utilexec.Interface
os kubecontainer.OSInterface os kubecontainer.OSInterface
// Network plugin.
networkPlugin network.NetworkPlugin
// If true, the "hairpin mode" flag is set on container interfaces.
// A false value means the kubelet just backs off from setting it,
// it might already be true.
configureHairpinMode bool
// used for a systemd Exec, which requires the full path. // used for a systemd Exec, which requires the full path.
touchPath string touchPath string
nsenterPath string
versions versions versions versions
} }
@ -171,6 +183,8 @@ func New(
livenessManager proberesults.Manager, livenessManager proberesults.Manager,
volumeGetter volumeGetter, volumeGetter volumeGetter,
httpClient kubetypes.HttpGetter, httpClient kubetypes.HttpGetter,
networkPlugin network.NetworkPlugin,
hairpinMode bool,
execer utilexec.Interface, execer utilexec.Interface,
os kubecontainer.OSInterface, os kubecontainer.OSInterface,
imageBackOff *flowcontrol.Backoff, imageBackOff *flowcontrol.Backoff,
@ -203,6 +217,11 @@ func New(
return nil, fmt.Errorf("cannot find touch binary: %v", err) return nil, fmt.Errorf("cannot find touch binary: %v", err)
} }
nsenterPath, err := execer.LookPath("nsenter")
if err != nil {
return nil, fmt.Errorf("cannot find nsenter binary: %v", err)
}
rkt := &Runtime{ rkt := &Runtime{
os: kubecontainer.RealOS{}, os: kubecontainer.RealOS{},
systemd: systemd, systemd: systemd,
@ -216,8 +235,10 @@ func New(
recorder: recorder, recorder: recorder,
livenessManager: livenessManager, livenessManager: livenessManager,
volumeGetter: volumeGetter, volumeGetter: volumeGetter,
networkPlugin: networkPlugin,
execer: execer, execer: execer,
touchPath: touchPath, touchPath: touchPath,
nsenterPath: nsenterPath,
} }
rkt.config, err = rkt.getConfig(rkt.config) rkt.config, err = rkt.getConfig(rkt.config)
@ -862,23 +883,22 @@ func serviceFilePath(serviceName string) string {
} }
// generateRunCommand crafts a 'rkt run-prepared' command with necessary parameters. // generateRunCommand crafts a 'rkt run-prepared' command with necessary parameters.
func (r *Runtime) generateRunCommand(pod *api.Pod, uuid string) (string, error) { func (r *Runtime) generateRunCommand(pod *api.Pod, uuid, netnsName string) (string, error) {
runPrepared := r.buildCommand("run-prepared").Args runPrepared := r.buildCommand("run-prepared").Args
// Network namespace set up in kubelet; rkt networking not used
runPrepared = append(runPrepared, "--net=host")
var hostname string var hostname string
var err error var err error
// Setup network configuration. // Setup DNS and hostname configuration.
if kubecontainer.IsHostNetworkPod(pod) { if len(netnsName) == 0 {
runPrepared = append(runPrepared, "--net=host")
// TODO(yifan): Let runtimeHelper.GeneratePodHostNameAndDomain() to handle this. // TODO(yifan): Let runtimeHelper.GeneratePodHostNameAndDomain() to handle this.
hostname, err = r.os.Hostname() hostname, err = r.os.Hostname()
if err != nil { if err != nil {
return "", err return "", err
} }
} else { } else {
runPrepared = append(runPrepared, fmt.Sprintf("--net=%s", defaultNetworkName))
// Setup DNS. // Setup DNS.
dnsServers, dnsSearches, err := r.runtimeHelper.GetClusterDNS(pod) dnsServers, dnsSearches, err := r.runtimeHelper.GetClusterDNS(pod)
if err != nil { if err != nil {
@ -899,12 +919,37 @@ func (r *Runtime) generateRunCommand(pod *api.Pod, uuid string) (string, error)
if err != nil { if err != nil {
return "", err return "", err
} }
// Drop the `rkt run-prepared` into the network namespace we
// created.
// TODO: switch to 'ip netns exec' once we can depend on a new
// enough version that doesn't have bugs like
// https://bugzilla.redhat.com/show_bug.cgi?id=882047
nsenterExec := []string{r.nsenterPath, "--net=\"" + netnsPathFromName(netnsName) + "\"", "--"}
runPrepared = append(nsenterExec, runPrepared...)
} }
runPrepared = append(runPrepared, fmt.Sprintf("--hostname=%s", hostname)) runPrepared = append(runPrepared, fmt.Sprintf("--hostname=%s", hostname))
runPrepared = append(runPrepared, uuid) runPrepared = append(runPrepared, uuid)
return strings.Join(runPrepared, " "), nil return strings.Join(runPrepared, " "), nil
} }
func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
glog.V(3).Infof("Calling network plugin %s to tear down pod for %s", r.networkPlugin.Name(), format.Pod(pod))
var teardownErr error
containerID := kubecontainer.ContainerID{ID: string(pod.UID)}
if err := r.networkPlugin.TearDownPod(pod.Namespace, pod.Name, containerID); err != nil {
teardownErr = fmt.Errorf("rkt: failed to tear down network for pod %s: %v", format.Pod(pod), err)
}
if _, err := r.execer.Command("ip", "netns", "del", makePodNetnsName(pod.UID)).Output(); err != nil {
return fmt.Errorf("rkt: Failed to remove network namespace for pod %s: %v", format.Pod(pod), err)
}
return teardownErr
}
// preparePod will: // preparePod will:
// //
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid. // 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
@ -912,7 +957,7 @@ func (r *Runtime) generateRunCommand(pod *api.Pod, uuid string) (string, error)
// //
// On success, it will return a string that represents name of the unit file // On success, it will return a string that represents name of the unit file
// and the runtime pod. // and the runtime pod.
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *kubecontainer.Pod, error) { func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) {
// Generate the pod manifest from the pod spec. // Generate the pod manifest from the pod spec.
manifest, err := r.makePodManifest(pod, pullSecrets) manifest, err := r.makePodManifest(pod, pullSecrets)
if err != nil { if err != nil {
@ -957,7 +1002,7 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *k
glog.V(4).Infof("'rkt prepare' returns %q", uuid) glog.V(4).Infof("'rkt prepare' returns %q", uuid)
// Create systemd service file for the rkt pod. // Create systemd service file for the rkt pod.
runPrepared, err := r.generateRunCommand(pod, uuid) runPrepared, err := r.generateRunCommand(pod, uuid, netnsName)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("failed to generate 'rkt run-prepared' command: %v", err) return "", nil, fmt.Errorf("failed to generate 'rkt run-prepared' command: %v", err)
} }
@ -972,6 +1017,10 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *k
newUnitOption("Service", "ExecStopPost", markPodFinished), newUnitOption("Service", "ExecStopPost", markPodFinished),
// This enables graceful stop. // This enables graceful stop.
newUnitOption("Service", "KillMode", "mixed"), newUnitOption("Service", "KillMode", "mixed"),
// Track pod info for garbage collection
newUnitOption(unitKubernetesSection, unitPodUID, string(pod.UID)),
newUnitOption(unitKubernetesSection, unitPodName, pod.Name),
newUnitOption(unitKubernetesSection, unitPodNamespace, pod.Namespace),
} }
serviceName := makePodServiceFileName(uuid) serviceName := makePodServiceFileName(uuid)
@ -1024,12 +1073,57 @@ func (r *Runtime) generateEvents(runtimePod *kubecontainer.Pod, reason string, f
return return
} }
func makePodNetnsName(podID types.UID) string {
return fmt.Sprintf("%s_%s", kubernetesUnitPrefix, string(podID))
}
func netnsPathFromName(netnsName string) string {
return fmt.Sprintf("/var/run/netns/%s", netnsName)
}
func (r *Runtime) setupPodNetwork(pod *api.Pod) (string, error) {
netnsName := makePodNetnsName(pod.UID)
// Create a new network namespace for the pod
r.execer.Command("ip", "netns", "del", netnsName).Output()
_, err := r.execer.Command("ip", "netns", "add", netnsName).Output()
if err != nil {
return "", fmt.Errorf("failed to create pod network namespace: %v", err)
}
// Set up networking with the network plugin
glog.V(3).Infof("Calling network plugin %s to setup pod for %s", r.networkPlugin.Name(), format.Pod(pod))
containerID := kubecontainer.ContainerID{ID: string(pod.UID)}
err = r.networkPlugin.SetUpPod(pod.Namespace, pod.Name, containerID)
if err != nil {
return "", fmt.Errorf("failed to set up pod network: %v", err)
}
if r.configureHairpinMode {
if err = hairpin.SetUpContainerPath(netnsPathFromName(netnsName), network.DefaultInterfaceName); err != nil {
glog.Warningf("Hairpin setup failed for pod %q: %v", format.Pod(pod), err)
}
}
return netnsName, nil
}
// RunPod first creates the unit file for a pod, and then // RunPod first creates the unit file for a pod, and then
// starts the unit over d-bus. // starts the unit over d-bus.
func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error { func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
glog.V(4).Infof("Rkt starts to run pod: name %q.", format.Pod(pod)) glog.V(4).Infof("Rkt starts to run pod: name %q.", format.Pod(pod))
name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets) var err error
var netnsName string
if !kubecontainer.IsHostNetworkPod(pod) {
netnsName, err = r.setupPodNetwork(pod)
if err != nil {
r.cleanupPodNetwork(pod)
return err
}
}
name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets, netnsName)
// Set container references and generate events. // Set container references and generate events.
// If preparedPod fails, then send out 'failed' events for each container. // If preparedPod fails, then send out 'failed' events for each container.
@ -1049,6 +1143,7 @@ func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
} }
if prepareErr != nil { if prepareErr != nil {
r.cleanupPodNetwork(pod)
return prepareErr return prepareErr
} }
@ -1057,9 +1152,10 @@ func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
// RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart // RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart
// a unit if the unit file is changed and reloaded. // a unit if the unit file is changed and reloaded.
reschan := make(chan string) reschan := make(chan string)
_, err := r.systemd.RestartUnit(name, "replace", reschan) _, err = r.systemd.RestartUnit(name, "replace", reschan)
if err != nil { if err != nil {
r.generateEvents(runtimePod, "Failed", err) r.generateEvents(runtimePod, "Failed", err)
r.cleanupPodNetwork(pod)
return err return err
} }
@ -1067,6 +1163,7 @@ func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
if res != "done" { if res != "done" {
err := fmt.Errorf("Failed to restart unit %q: %s", name, res) err := fmt.Errorf("Failed to restart unit %q: %s", name, res)
r.generateEvents(runtimePod, "Failed", err) r.generateEvents(runtimePod, "Failed", err)
r.cleanupPodNetwork(pod)
return err return err
} }
@ -1078,6 +1175,7 @@ func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
if errKill := r.KillPod(pod, *runtimePod, nil); errKill != nil { if errKill := r.KillPod(pod, *runtimePod, nil); errKill != nil {
return errors.NewAggregate([]error{err, errKill}) return errors.NewAggregate([]error{err, errKill})
} }
r.cleanupPodNetwork(pod)
return err return err
} }
@ -1364,6 +1462,21 @@ func (r *Runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gracePerio
return err return err
} }
// Clean up networking; use running pod details since 'pod' can be nil
if pod == nil || !kubecontainer.IsHostNetworkPod(pod) {
err := r.cleanupPodNetwork(&api.Pod{
ObjectMeta: api.ObjectMeta{
UID: runningPod.ID,
Name: runningPod.Name,
Namespace: runningPod.Namespace,
},
})
if err != nil {
glog.Errorf("rkt: failed to tear down network for unit %q: %v", serviceName, err)
return err
}
}
return nil return nil
} }
@ -1488,6 +1601,50 @@ func podIsActive(pod *rktapi.Pod) bool {
pod.State == rktapi.PodState_POD_STATE_RUNNING pod.State == rktapi.PodState_POD_STATE_RUNNING
} }
// GetNetNS returns the network namespace path for the given container
func (r *Runtime) GetNetNS(containerID kubecontainer.ContainerID) (string, error) {
// This is a slight hack, kubenet shouldn't be asking us about a container id
// but a pod id. This is because it knows too much about the infra container.
// We pretend the pod.UID is an infra container ID.
// This deception is only possible because we played the same trick in
// `networkPlugin.SetUpPod` and `networkPlugin.TearDownPod`.
return netnsPathFromName(makePodNetnsName(types.UID(containerID.ID))), nil
}
func podDetailsFromServiceFile(serviceFilePath string) (string, string, string, error) {
f, err := os.Open(serviceFilePath)
if err != nil {
return "", "", "", err
}
defer f.Close()
opts, err := unit.Deserialize(f)
if err != nil {
return "", "", "", err
}
var id, name, namespace string
for _, o := range opts {
if o.Section != unitKubernetesSection {
continue
}
switch o.Name {
case unitPodUID:
id = o.Value
case unitPodName:
name = o.Value
case unitPodNamespace:
namespace = o.Value
}
if id != "" && name != "" && namespace != "" {
return id, name, namespace, nil
}
}
return "", "", "", fmt.Errorf("failed to parse pod from file %s", serviceFilePath)
}
// GarbageCollect collects the pods/containers. // GarbageCollect collects the pods/containers.
// After one GC iteration: // After one GC iteration:
// - The deleted pods will be removed. // - The deleted pods will be removed.
@ -1548,7 +1705,13 @@ func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error
rktUUID := getRktUUIDFromServiceFileName(serviceName) rktUUID := getRktUUIDFromServiceFileName(serviceName)
if _, ok := allPods[rktUUID]; !ok { if _, ok := allPods[rktUUID]; !ok {
glog.V(4).Infof("rkt: No rkt pod found for service file %q, will remove it", serviceName) glog.V(4).Infof("rkt: No rkt pod found for service file %q, will remove it", serviceName)
if err := r.os.Remove(serviceFilePath(serviceName)); err != nil {
serviceFile := serviceFilePath(serviceName)
// Network may not be around anymore so errors are ignored
r.cleanupPodNetworkFromServiceFile(serviceFile)
if err := r.os.Remove(serviceFile); err != nil {
errlist = append(errlist, fmt.Errorf("rkt: Failed to remove service file %q: %v", serviceName, err)) errlist = append(errlist, fmt.Errorf("rkt: Failed to remove service file %q: %v", serviceName, err))
} }
} }
@ -1581,18 +1744,39 @@ func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error
return errors.NewAggregate(errlist) return errors.NewAggregate(errlist)
} }
// Read kubernetes pod UUID, namespace, and name from systemd service file and
// use that to clean up any pod network that may still exist.
func (r *Runtime) cleanupPodNetworkFromServiceFile(serviceFilePath string) {
id, name, namespace, err := podDetailsFromServiceFile(serviceFilePath)
if err == nil {
r.cleanupPodNetwork(&api.Pod{
ObjectMeta: api.ObjectMeta{
UID: types.UID(id),
Name: name,
Namespace: namespace,
},
})
}
}
// removePod calls 'rkt rm $UUID' to delete a rkt pod, it also remove the systemd service file // removePod calls 'rkt rm $UUID' to delete a rkt pod, it also remove the systemd service file
// related to the pod. // related to the pod.
func (r *Runtime) removePod(uuid string) error { func (r *Runtime) removePod(uuid string) error {
var errlist []error var errlist []error
glog.V(4).Infof("rkt: GC is removing pod %q", uuid) glog.V(4).Infof("rkt: GC is removing pod %q", uuid)
serviceName := makePodServiceFileName(uuid)
serviceFile := serviceFilePath(serviceName)
// Network may not be around anymore so errors are ignored
r.cleanupPodNetworkFromServiceFile(serviceFile)
if _, err := r.cli.RunCommand("rm", uuid); err != nil { if _, err := r.cli.RunCommand("rm", uuid); err != nil {
errlist = append(errlist, fmt.Errorf("rkt: Failed to remove pod %q: %v", uuid, err)) errlist = append(errlist, fmt.Errorf("rkt: Failed to remove pod %q: %v", uuid, err))
} }
// GC systemd service files as well. // GC systemd service files as well.
serviceName := makePodServiceFileName(uuid) if err := r.os.Remove(serviceFile); err != nil {
if err := r.os.Remove(serviceFilePath(serviceName)); err != nil {
errlist = append(errlist, fmt.Errorf("rkt: Failed to remove service file %q for pod %q: %v", serviceName, uuid, err)) errlist = append(errlist, fmt.Errorf("rkt: Failed to remove service file %q for pod %q: %v", serviceName, uuid, err))
} }

View File

@ -1076,8 +1076,9 @@ func TestSetApp(t *testing.T) {
func TestGenerateRunCommand(t *testing.T) { func TestGenerateRunCommand(t *testing.T) {
hostName := "test-hostname" hostName := "test-hostname"
tests := []struct { tests := []struct {
pod *api.Pod pod *api.Pod
uuid string uuid string
netnsName string
dnsServers []string dnsServers []string
dnsSearches []string dnsSearches []string
@ -1095,6 +1096,7 @@ func TestGenerateRunCommand(t *testing.T) {
Spec: api.PodSpec{}, Spec: api.PodSpec{},
}, },
"rkt-uuid-foo", "rkt-uuid-foo",
"default",
[]string{}, []string{},
[]string{}, []string{},
"", "",
@ -1109,11 +1111,12 @@ func TestGenerateRunCommand(t *testing.T) {
}, },
}, },
"rkt-uuid-foo", "rkt-uuid-foo",
"default",
[]string{}, []string{},
[]string{}, []string{},
"pod-hostname-foo", "pod-hostname-foo",
nil, nil,
"/bin/rkt/rkt --insecure-options=image,ondisk --local-config=/var/rkt/local/data --dir=/var/data run-prepared --net=rkt.kubernetes.io --hostname=pod-hostname-foo rkt-uuid-foo", " --net=\"/var/run/netns/default\" -- /bin/rkt/rkt --insecure-options=image,ondisk --local-config=/var/rkt/local/data --dir=/var/data run-prepared --net=host --hostname=pod-hostname-foo rkt-uuid-foo",
}, },
// Case #2, returns no dns, with host-net. // Case #2, returns no dns, with host-net.
{ {
@ -1128,6 +1131,7 @@ func TestGenerateRunCommand(t *testing.T) {
}, },
}, },
"rkt-uuid-foo", "rkt-uuid-foo",
"",
[]string{}, []string{},
[]string{}, []string{},
"", "",
@ -1147,11 +1151,12 @@ func TestGenerateRunCommand(t *testing.T) {
}, },
}, },
"rkt-uuid-foo", "rkt-uuid-foo",
"default",
[]string{"127.0.0.1"}, []string{"127.0.0.1"},
[]string{"."}, []string{"."},
"pod-hostname-foo", "pod-hostname-foo",
nil, nil,
"/bin/rkt/rkt --insecure-options=image,ondisk --local-config=/var/rkt/local/data --dir=/var/data run-prepared --net=rkt.kubernetes.io --dns=127.0.0.1 --dns-search=. --dns-opt=ndots:5 --hostname=pod-hostname-foo rkt-uuid-foo", " --net=\"/var/run/netns/default\" -- /bin/rkt/rkt --insecure-options=image,ondisk --local-config=/var/rkt/local/data --dir=/var/data run-prepared --net=host --dns=127.0.0.1 --dns-search=. --dns-opt=ndots:5 --hostname=pod-hostname-foo rkt-uuid-foo",
}, },
// Case #4, returns no dns, dns searches, with host-network. // Case #4, returns no dns, dns searches, with host-network.
{ {
@ -1166,6 +1171,7 @@ func TestGenerateRunCommand(t *testing.T) {
}, },
}, },
"rkt-uuid-foo", "rkt-uuid-foo",
"",
[]string{"127.0.0.1"}, []string{"127.0.0.1"},
[]string{"."}, []string{"."},
"pod-hostname-foo", "pod-hostname-foo",
@ -1189,7 +1195,7 @@ func TestGenerateRunCommand(t *testing.T) {
testCaseHint := fmt.Sprintf("test case #%d", i) testCaseHint := fmt.Sprintf("test case #%d", i)
rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err} rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err}
result, err := rkt.generateRunCommand(tt.pod, tt.uuid) result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName)
assert.Equal(t, tt.err, err, testCaseHint) assert.Equal(t, tt.err, err, testCaseHint)
assert.Equal(t, tt.expect, result, testCaseHint) assert.Equal(t, tt.expect, result, testCaseHint)
} }