sort and filter exposed Pod IPs

runtimes may return an arbitrary number of Pod IPs, however, kubernetes
only takes into consideration the first one of each IP family.

The order of the IPs are the one defined by the Kubelet:
- default prefer IPv4
- if NodeIPs are defined, matching the first nodeIP family

PodIP is always the first IP of PodIPs.

The downward API must expose the same IPs and in the same order than
the pod.Status API object.
This commit is contained in:
Antonio Ojea 2021-06-29 21:07:10 +02:00
parent 1151dc1ee5
commit a7469cf680
2 changed files with 255 additions and 20 deletions

View File

@ -809,6 +809,13 @@ func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod
if err != nil { if err != nil {
return "", err return "", err
} }
// make podIPs order match node IP family preference #97979
podIPs = kl.sortPodIPs(podIPs)
if len(podIPs) > 0 {
podIP = podIPs[0]
}
switch internalFieldPath { switch internalFieldPath {
case "spec.nodeName": case "spec.nodeName":
return pod.Spec.NodeName, nil return pod.Spec.NodeName, nil
@ -1570,16 +1577,14 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
return *s return *s
} }
// convertStatusToAPIStatus creates an api PodStatus for the given pod from // sortPodIPs return the PodIPs sorted and truncated by the cluster IP family preference.
// the given internal pod status. It is purely transformative and does not // The runtime pod status may have an arbitrary number of IPs, in an arbitrary order.
// alter the kubelet state at all. // PodIPs are obtained by: func (m *kubeGenericRuntimeManager) determinePodSandboxIPs()
func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus { // Pick out the first returned IP of the same IP family as the node IP
var apiPodStatus v1.PodStatus // first, followed by the first IP of the opposite IP family (if any)
// and use them for the Pod.Status.PodIPs and the Downward API environment variables
// The runtime pod status may have an arbitrary number of IPs, in an arbitrary func (kl *Kubelet) sortPodIPs(podIPs []string) []string {
// order. Pick out the first returned IP of the same IP family as the node IP ips := make([]string, 0, 2)
// first, followed by the first IP of the opposite IP family (if any).
podIPs := make([]v1.PodIP, 0, len(podStatus.IPs))
var validPrimaryIP, validSecondaryIP func(ip string) bool var validPrimaryIP, validSecondaryIP func(ip string) bool
if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) { if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) {
validPrimaryIP = utilnet.IsIPv4String validPrimaryIP = utilnet.IsIPv4String
@ -1588,21 +1593,40 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontaine
validPrimaryIP = utilnet.IsIPv6String validPrimaryIP = utilnet.IsIPv6String
validSecondaryIP = utilnet.IsIPv4String validSecondaryIP = utilnet.IsIPv4String
} }
for _, ip := range podStatus.IPs { for _, ip := range podIPs {
if validPrimaryIP(ip) { if validPrimaryIP(ip) {
podIPs = append(podIPs, v1.PodIP{IP: ip}) ips = append(ips, ip)
break break
} }
} }
for _, ip := range podStatus.IPs { for _, ip := range podIPs {
if validSecondaryIP(ip) { if validSecondaryIP(ip) {
podIPs = append(podIPs, v1.PodIP{IP: ip}) ips = append(ips, ip)
break break
} }
} }
apiPodStatus.PodIPs = podIPs return ips
if len(podIPs) > 0 { }
apiPodStatus.PodIP = podIPs[0].IP
// convertStatusToAPIStatus creates an api PodStatus for the given pod from
// the given internal pod status. It is purely transformative and does not
// alter the kubelet state at all.
func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus {
var apiPodStatus v1.PodStatus
// copy pod status IPs to avoid race conditions with PodStatus #102806
podIPs := make([]string, len(podStatus.IPs))
for j, ip := range podStatus.IPs {
podIPs[j] = ip
}
// make podIPs order match node IP family preference #97979
podIPs = kl.sortPodIPs(podIPs)
for _, ip := range podIPs {
apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip})
}
if len(apiPodStatus.PodIPs) > 0 {
apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP
} }
// set status for Pods created on versions of kube older than 1.6 // set status for Pods created on versions of kube older than 1.6

View File

@ -461,6 +461,7 @@ func TestMakeEnvironmentVariables(t *testing.T) {
unsyncedServices bool // whether the services should NOT be synced unsyncedServices bool // whether the services should NOT be synced
configMap *v1.ConfigMap // an optional ConfigMap to pull from configMap *v1.ConfigMap // an optional ConfigMap to pull from
secret *v1.Secret // an optional Secret to pull from secret *v1.Secret // an optional Secret to pull from
podIPs []string // the pod IPs
expectedEnvs []kubecontainer.EnvVar // a set of expected environment vars expectedEnvs []kubecontainer.EnvVar // a set of expected environment vars
expectedError bool // does the test fail expectedError bool // does the test fail
expectedEvent string // does the test emit an event expectedEvent string // does the test emit an event
@ -766,6 +767,7 @@ func TestMakeEnvironmentVariables(t *testing.T) {
}, },
}, },
}, },
podIPs: []string{"1.2.3.4", "fd00::6"},
masterServiceNs: "nothing", masterServiceNs: "nothing",
nilLister: true, nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{ expectedEnvs: []kubecontainer.EnvVar{
@ -778,6 +780,94 @@ func TestMakeEnvironmentVariables(t *testing.T) {
{Name: "HOST_IP", Value: testKubeletHostIP}, {Name: "HOST_IP", Value: testKubeletHostIP},
}, },
}, },
{
name: "downward api pod ips reverse order",
ns: "downward-api",
enableServiceLinks: &falseValue,
container: &v1.Container{
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIP",
},
},
},
{
Name: "POD_IPS",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIPs",
},
},
},
{
Name: "HOST_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.hostIP",
},
},
},
},
},
podIPs: []string{"fd00::6", "1.2.3.4"},
masterServiceNs: "nothing",
nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{
{Name: "POD_IP", Value: "1.2.3.4"},
{Name: "POD_IPS", Value: "1.2.3.4,fd00::6"},
{Name: "HOST_IP", Value: testKubeletHostIP},
},
},
{
name: "downward api pod ips multiple ips",
ns: "downward-api",
enableServiceLinks: &falseValue,
container: &v1.Container{
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIP",
},
},
},
{
Name: "POD_IPS",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.podIPs",
},
},
},
{
Name: "HOST_IP",
ValueFrom: &v1.EnvVarSource{
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "status.hostIP",
},
},
},
},
},
podIPs: []string{"1.2.3.4", "192.168.1.1.", "fd00::6"},
masterServiceNs: "nothing",
nilLister: true,
expectedEnvs: []kubecontainer.EnvVar{
{Name: "POD_IP", Value: "1.2.3.4"},
{Name: "POD_IPS", Value: "1.2.3.4,fd00::6"},
{Name: "HOST_IP", Value: testKubeletHostIP},
},
},
{ {
name: "env expansion", name: "env expansion",
ns: "test1", ns: "test1",
@ -1685,13 +1775,15 @@ func TestMakeEnvironmentVariables(t *testing.T) {
EnableServiceLinks: tc.enableServiceLinks, EnableServiceLinks: tc.enableServiceLinks,
}, },
} }
podIP := "1.2.3.4" podIP := ""
podIPs := []string{"1.2.3.4,fd00::6"} if len(tc.podIPs) > 0 {
podIP = tc.podIPs[0]
}
if tc.staticPod { if tc.staticPod {
testPod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file" testPod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file"
} }
result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, podIPs) result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, tc.podIPs)
select { select {
case e := <-fakeRecorder.Events: case e := <-fakeRecorder.Events:
assert.Equal(t, tc.expectedEvent, e) assert.Equal(t, tc.expectedEvent, e)
@ -2837,6 +2929,33 @@ func TestGenerateAPIPodStatusHostNetworkPodIPs(t *testing.T) {
{IP: "192.168.0.1"}, {IP: "192.168.0.1"},
}, },
}, },
{
name: "CRI dual-stack PodIPs override NodeAddresses",
nodeAddresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "10.0.0.1"},
{Type: v1.NodeInternalIP, Address: "fd01::1234"},
},
dualStack: true,
criPodIPs: []string{"192.168.0.1", "2001:db8::2"},
podIPs: []v1.PodIP{
{IP: "192.168.0.1"},
{IP: "2001:db8::2"},
},
},
{
// by default the cluster prefers IPv4
name: "CRI dual-stack PodIPs override NodeAddresses prefer IPv4",
nodeAddresses: []v1.NodeAddress{
{Type: v1.NodeInternalIP, Address: "10.0.0.1"},
{Type: v1.NodeInternalIP, Address: "fd01::1234"},
},
dualStack: true,
criPodIPs: []string{"2001:db8::2", "192.168.0.1"},
podIPs: []v1.PodIP{
{IP: "192.168.0.1"},
{IP: "2001:db8::2"},
},
},
} }
for _, tc := range testcases { for _, tc := range testcases {
@ -3009,3 +3128,95 @@ func TestGenerateAPIPodStatusPodIPs(t *testing.T) {
}) })
} }
} }
func TestSortPodIPs(t *testing.T) {
testcases := []struct {
name string
nodeIP string
podIPs []string
expectedIPs []string
}{
{
name: "Simple",
nodeIP: "",
podIPs: []string{"10.0.0.1"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "Dual-stack",
nodeIP: "",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with explicit node IP",
nodeIP: "192.168.1.1",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with CRI returning wrong family first",
nodeIP: "",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with explicit node IP with CRI returning wrong family first",
nodeIP: "192.168.1.1",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
{
name: "Dual-stack with IPv6 node IP",
nodeIP: "fd00::5678",
podIPs: []string{"10.0.0.1", "fd01::1234"},
expectedIPs: []string{"fd01::1234", "10.0.0.1"},
},
{
name: "Dual-stack with IPv6 node IP, other CRI order",
nodeIP: "fd00::5678",
podIPs: []string{"fd01::1234", "10.0.0.1"},
expectedIPs: []string{"fd01::1234", "10.0.0.1"},
},
{
name: "No Pod IP matching Node IP",
nodeIP: "fd00::5678",
podIPs: []string{"10.0.0.1"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "No Pod IP matching (unspecified) Node IP",
nodeIP: "",
podIPs: []string{"fd01::1234"},
expectedIPs: []string{"fd01::1234"},
},
{
name: "Multiple IPv4 IPs",
nodeIP: "",
podIPs: []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"},
expectedIPs: []string{"10.0.0.1"},
},
{
name: "Multiple Dual-Stack IPs",
nodeIP: "",
podIPs: []string{"10.0.0.1", "10.0.0.2", "fd01::1234", "10.0.0.3", "fd01::5678"},
expectedIPs: []string{"10.0.0.1", "fd01::1234"},
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kl := testKubelet.kubelet
if tc.nodeIP != "" {
kl.nodeIPs = []net.IP{net.ParseIP(tc.nodeIP)}
}
podIPs := kl.sortPodIPs(tc.podIPs)
if !reflect.DeepEqual(podIPs, tc.expectedIPs) {
t.Fatalf("Expected PodIPs %#v, got %#v", tc.expectedIPs, podIPs)
}
})
}
}