sort and filter exposed Pod IPs

runtimes may return an arbitrary number of Pod IPs, however, kubernetes
only takes into consideration the first one of each IP family.

The order of the IPs are the one defined by the Kubelet:
- default prefer IPv4
- if NodeIPs are defined, matching the first nodeIP family

PodIP is always the first IP of PodIPs.

The downward API must expose the same IPs and in the same order than
the pod.Status API object.
This commit is contained in:
Antonio Ojea 2021-06-29 21:07:10 +02:00
parent 1151dc1ee5
commit a7469cf680
2 changed files with 255 additions and 20 deletions

View File

@ -809,6 +809,13 @@ func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod
if err != nil {
return "", err
}
// make podIPs order match node IP family preference #97979
podIPs = kl.sortPodIPs(podIPs)
if len(podIPs) > 0 {
podIP = podIPs[0]
}
switch internalFieldPath {
case "spec.nodeName":
return pod.Spec.NodeName, nil
@ -1570,16 +1577,14 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
return *s
}
// convertStatusToAPIStatus creates an api PodStatus for the given pod from
// the given internal pod status. It is purely transformative and does not
// alter the kubelet state at all.
func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus {
var apiPodStatus v1.PodStatus
// The runtime pod status may have an arbitrary number of IPs, in an arbitrary
// order. Pick out the first returned IP of the same IP family as the node IP
// first, followed by the first IP of the opposite IP family (if any).
podIPs := make([]v1.PodIP, 0, len(podStatus.IPs))
// sortPodIPs return the PodIPs sorted and truncated by the cluster IP family preference.
// The runtime pod status may have an arbitrary number of IPs, in an arbitrary order.
// PodIPs are obtained by: func (m *kubeGenericRuntimeManager) determinePodSandboxIPs()
// Pick out the first returned IP of the same IP family as the node IP
// first, followed by the first IP of the opposite IP family (if any)
// and use them for the Pod.Status.PodIPs and the Downward API environment variables
func (kl *Kubelet) sortPodIPs(podIPs []string) []string {
ips := make([]string, 0, 2)
var validPrimaryIP, validSecondaryIP func(ip string) bool
if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) {
validPrimaryIP = utilnet.IsIPv4String
@ -1588,21 +1593,40 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontaine
validPrimaryIP = utilnet.IsIPv6String
validSecondaryIP = utilnet.IsIPv4String
}
for _, ip := range podStatus.IPs {
for _, ip := range podIPs {
if validPrimaryIP(ip) {
podIPs = append(podIPs, v1.PodIP{IP: ip})
ips = append(ips, ip)
break
}
}
for _, ip := range podStatus.IPs {
for _, ip := range podIPs {
if validSecondaryIP(ip) {
podIPs = append(podIPs, v1.PodIP{IP: ip})
ips = append(ips, ip)
break
}
}
apiPodStatus.PodIPs = podIPs
if len(podIPs) > 0 {
apiPodStatus.PodIP = podIPs[0].IP
return ips
}
// convertStatusToAPIStatus creates an api PodStatus for the given pod from
// the given internal pod status. It is purely transformative and does not
// alter the kubelet state at all.
func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus {
var apiPodStatus v1.PodStatus
// copy pod status IPs to avoid race conditions with PodStatus #102806
podIPs := make([]string, len(podStatus.IPs))
for j, ip := range podStatus.IPs {
podIPs[j] = ip
}
// make podIPs order match node IP family preference #97979
podIPs = kl.sortPodIPs(podIPs)
for _, ip := range podIPs {
apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip})
}
if len(apiPodStatus.PodIPs) > 0 {
apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP
}
// set status for Pods created on versions of kube older than 1.6

View File

@ -461,6 +461,7 @@ func TestMakeEnvironmentVariables(t *testing.T) {
unsyncedServices bool // whether the services should NOT be synced
configMap *v1.ConfigMap // an optional ConfigMap to pull from
secret *v1.Secret // an optional Secret to pull from
podIPs []string // the pod IPs
expectedEnvs []kubecontainer.EnvVar // a set of expected environment vars
expectedError bool // does the test fail
expectedEvent string // does the test emit an event
@ -766,6 +767,7 @@ func TestMakeEnvironmentVariables(t *testing.T) {
},
},
},
podIPs: []string{"1.2.3.4", "fd00::6"},
masterServiceNs: "nothing",
nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{
@ -778,6 +780,94 @@ func TestMakeEnvironmentVariables(t *testing.T) {
{Name: "HOST_IP", Value: testKubeletHostIP},
},
},
{
name: "downward api pod ips reverse order",
ns: "downward-api",
enableServiceLinks: &falseValue,
container: &v1.Container{
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIP",
},
},
},
{
Name: "POD_IPS",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIPs",
},
},
},
{
Name: "HOST_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.hostIP",
},
},
},
},
},
podIPs: []string{"fd00::6", "1.2.3.4"},
masterServiceNs: "nothing",
nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{
{Name: "POD_IP", Value: "1.2.3.4"},
{Name: "POD_IPS", Value: "1.2.3.4,fd00::6"},
{Name: "HOST_IP", Value: testKubeletHostIP},
},
},
{
name: "downward api pod ips multiple ips",
ns: "downward-api",
enableServiceLinks: &falseValue,
container: &v1.Container{
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIP",
},
},
},
{
Name: "POD_IPS",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIPs",
},
},
},
{
Name: "HOST_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.hostIP",
},
},
},
},
},
podIPs: []string{"1.2.3.4", "192.168.1.1.", "fd00::6"},
masterServiceNs: "nothing",
nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{
{Name: "POD_IP", Value: "1.2.3.4"},
{Name: "POD_IPS", Value: "1.2.3.4,fd00::6"},
{Name: "HOST_IP", Value: testKubeletHostIP},
},
},
{
name: "env expansion",
ns: "test1",
@ -1685,13 +1775,15 @@ func TestMakeEnvironmentVariables(t *testing.T) {
EnableServiceLinks: tc.enableServiceLinks,
},
}
podIP := "1.2.3.4"
podIPs := []string{"1.2.3.4,fd00::6"}
podIP := ""
if len(tc.podIPs) > 0 {
podIP = tc.podIPs[0]
}
if tc.staticPod {
testPod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file"
}
result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, podIPs)
result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, tc.podIPs)
select {
case e := <-fakeRecorder.Events:
assert.Equal(t, tc.expectedEvent, e)
@ -2837,6 +2929,33 @@ func TestGenerateAPIPodStatusHostNetworkPodIPs(t *testing.T) {
{IP: "192.168.0.1"},
},
},
{
name: "CRI dual-stack PodIPs override NodeAddresses",
nodeAddresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "10.0.0.1"},
{Type: v1.NodeInternalIP, Address: "fd01::1234"},
},
dualStack: true,
criPodIPs: []string{"192.168.0.1", "2001:db8::2"},
podIPs: []v1.PodIP{
{IP: "192.168.0.1"},
{IP: "2001:db8::2"},
},
},
{
// by default the cluster prefers IPv4
name: "CRI dual-stack PodIPs override NodeAddresses prefer IPv4",
nodeAddresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "10.0.0.1"},
{Type: v1.NodeInternalIP, Address: "fd01::1234"},
},
dualStack: true,
criPodIPs: []string{"2001:db8::2", "192.168.0.1"},
podIPs: []v1.PodIP{
{IP: "192.168.0.1"},
{IP: "2001:db8::2"},
},
},
}
for _, tc := range testcases {
@ -3009,3 +3128,95 @@ func TestGenerateAPIPodStatusPodIPs(t *testing.T) {
})
}
}
func TestSortPodIPs(t *testing.T) {
testcases := []struct {
name string
nodeIP string
podIPs []string
expectedIPs []string
}{
{
name: "Simple",
nodeIP: "",
podIPs: []string{"10.0.0.1"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "Dual-stack",
nodeIP: "",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with explicit node IP",
nodeIP: "192.168.1.1",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with CRI returning wrong family first",
nodeIP: "",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with explicit node IP with CRI returning wrong family first",
nodeIP: "192.168.1.1",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with IPv6 node IP",
nodeIP: "fd00::5678",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"fd01::1234", "10.0.0.1"},
},
{
name: "Dual-stack with IPv6 node IP, other CRI order",
nodeIP: "fd00::5678",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"fd01::1234", "10.0.0.1"},
},
{
name: "No Pod IP matching Node IP",
nodeIP: "fd00::5678",
podIPs: []string{"10.0.0.1"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "No Pod IP matching (unspecified) Node IP",
nodeIP: "",
podIPs: []string{"fd01::1234"},
expectedIPs: []string{"fd01::1234"},
},
{
name: "Multiple IPv4 IPs",
nodeIP: "",
podIPs: []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "Multiple Dual-Stack IPs",
nodeIP: "",
podIPs: []string{"10.0.0.1", "10.0.0.2", "fd01::1234", "10.0.0.3", "fd01::5678"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kl := testKubelet.kubelet
if tc.nodeIP != "" {
kl.nodeIPs = []net.IP{net.ParseIP(tc.nodeIP)}
}
podIPs := kl.sortPodIPs(tc.podIPs)
if !reflect.DeepEqual(podIPs, tc.expectedIPs) {
t.Fatalf("Expected PodIPs %#v, got %#v", tc.expectedIPs, podIPs)
}
})
}
}