diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index f78fde8ee3c..f3ab4de692f 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -809,6 +809,13 @@ func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod if err != nil { return "", err } + + // make podIPs order match node IP family preference #97979 + podIPs = kl.sortPodIPs(podIPs) + if len(podIPs) > 0 { + podIP = podIPs[0] + } + switch internalFieldPath { case "spec.nodeName": return pod.Spec.NodeName, nil @@ -1570,16 +1577,14 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po return *s } -// convertStatusToAPIStatus creates an api PodStatus for the given pod from -// the given internal pod status. It is purely transformative and does not -// alter the kubelet state at all. -func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus { - var apiPodStatus v1.PodStatus - - // The runtime pod status may have an arbitrary number of IPs, in an arbitrary - // order. Pick out the first returned IP of the same IP family as the node IP - // first, followed by the first IP of the opposite IP family (if any). - podIPs := make([]v1.PodIP, 0, len(podStatus.IPs)) +// sortPodIPs return the PodIPs sorted and truncated by the cluster IP family preference. +// The runtime pod status may have an arbitrary number of IPs, in an arbitrary order. +// PodIPs are obtained by: func (m *kubeGenericRuntimeManager) determinePodSandboxIPs() +// Pick out the first returned IP of the same IP family as the node IP +// first, followed by the first IP of the opposite IP family (if any) +// and use them for the Pod.Status.PodIPs and the Downward API environment variables +func (kl *Kubelet) sortPodIPs(podIPs []string) []string { + ips := make([]string, 0, 2) var validPrimaryIP, validSecondaryIP func(ip string) bool if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) { validPrimaryIP = utilnet.IsIPv4String @@ -1588,21 +1593,40 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontaine validPrimaryIP = utilnet.IsIPv6String validSecondaryIP = utilnet.IsIPv4String } - for _, ip := range podStatus.IPs { + for _, ip := range podIPs { if validPrimaryIP(ip) { - podIPs = append(podIPs, v1.PodIP{IP: ip}) + ips = append(ips, ip) break } } - for _, ip := range podStatus.IPs { + for _, ip := range podIPs { if validSecondaryIP(ip) { - podIPs = append(podIPs, v1.PodIP{IP: ip}) + ips = append(ips, ip) break } } - apiPodStatus.PodIPs = podIPs - if len(podIPs) > 0 { - apiPodStatus.PodIP = podIPs[0].IP + return ips +} + +// convertStatusToAPIStatus creates an api PodStatus for the given pod from +// the given internal pod status. It is purely transformative and does not +// alter the kubelet state at all. +func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus { + var apiPodStatus v1.PodStatus + + // copy pod status IPs to avoid race conditions with PodStatus #102806 + podIPs := make([]string, len(podStatus.IPs)) + for j, ip := range podStatus.IPs { + podIPs[j] = ip + } + + // make podIPs order match node IP family preference #97979 + podIPs = kl.sortPodIPs(podIPs) + for _, ip := range podIPs { + apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip}) + } + if len(apiPodStatus.PodIPs) > 0 { + apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP } // set status for Pods created on versions of kube older than 1.6 diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 102761b5768..0bb28d26a02 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -461,6 +461,7 @@ func TestMakeEnvironmentVariables(t *testing.T) { unsyncedServices bool // whether the services should NOT be synced configMap *v1.ConfigMap // an optional ConfigMap to pull from secret *v1.Secret // an optional Secret to pull from + podIPs []string // the pod IPs expectedEnvs []kubecontainer.EnvVar // a set of expected environment vars expectedError bool // does the test fail expectedEvent string // does the test emit an event @@ -766,6 +767,7 @@ func TestMakeEnvironmentVariables(t *testing.T) { }, }, }, + podIPs: []string{"1.2.3.4", "fd00::6"}, masterServiceNs: "nothing", nilLister: true, expectedEnvs: []kubecontainer.EnvVar{ @@ -778,6 +780,94 @@ func TestMakeEnvironmentVariables(t *testing.T) { {Name: "HOST_IP", Value: testKubeletHostIP}, }, }, + { + name: "downward api pod ips reverse order", + ns: "downward-api", + enableServiceLinks: &falseValue, + container: &v1.Container{ + Env: []v1.EnvVar{ + { + Name: "POD_IP", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIP", + }, + }, + }, + { + Name: "POD_IPS", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIPs", + }, + }, + }, + { + Name: "HOST_IP", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.hostIP", + }, + }, + }, + }, + }, + podIPs: []string{"fd00::6", "1.2.3.4"}, + masterServiceNs: "nothing", + nilLister: true, + expectedEnvs: []kubecontainer.EnvVar{ + {Name: "POD_IP", Value: "1.2.3.4"}, + {Name: "POD_IPS", Value: "1.2.3.4,fd00::6"}, + {Name: "HOST_IP", Value: testKubeletHostIP}, + }, + }, + { + name: "downward api pod ips multiple ips", + ns: "downward-api", + enableServiceLinks: &falseValue, + container: &v1.Container{ + Env: []v1.EnvVar{ + { + Name: "POD_IP", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIP", + }, + }, + }, + { + Name: "POD_IPS", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.podIPs", + }, + }, + }, + { + Name: "HOST_IP", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "status.hostIP", + }, + }, + }, + }, + }, + podIPs: []string{"1.2.3.4", "192.168.1.1.", "fd00::6"}, + masterServiceNs: "nothing", + nilLister: true, + expectedEnvs: []kubecontainer.EnvVar{ + {Name: "POD_IP", Value: "1.2.3.4"}, + {Name: "POD_IPS", Value: "1.2.3.4,fd00::6"}, + {Name: "HOST_IP", Value: testKubeletHostIP}, + }, + }, { name: "env expansion", ns: "test1", @@ -1685,13 +1775,15 @@ func TestMakeEnvironmentVariables(t *testing.T) { EnableServiceLinks: tc.enableServiceLinks, }, } - podIP := "1.2.3.4" - podIPs := []string{"1.2.3.4,fd00::6"} + podIP := "" + if len(tc.podIPs) > 0 { + podIP = tc.podIPs[0] + } if tc.staticPod { testPod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file" } - result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, podIPs) + result, err := kl.makeEnvironmentVariables(testPod, tc.container, podIP, tc.podIPs) select { case e := <-fakeRecorder.Events: assert.Equal(t, tc.expectedEvent, e) @@ -2837,6 +2929,33 @@ func TestGenerateAPIPodStatusHostNetworkPodIPs(t *testing.T) { {IP: "192.168.0.1"}, }, }, + { + name: "CRI dual-stack PodIPs override NodeAddresses", + nodeAddresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeInternalIP, Address: "fd01::1234"}, + }, + dualStack: true, + criPodIPs: []string{"192.168.0.1", "2001:db8::2"}, + podIPs: []v1.PodIP{ + {IP: "192.168.0.1"}, + {IP: "2001:db8::2"}, + }, + }, + { + // by default the cluster prefers IPv4 + name: "CRI dual-stack PodIPs override NodeAddresses prefer IPv4", + nodeAddresses: []v1.NodeAddress{ + {Type: v1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: v1.NodeInternalIP, Address: "fd01::1234"}, + }, + dualStack: true, + criPodIPs: []string{"2001:db8::2", "192.168.0.1"}, + podIPs: []v1.PodIP{ + {IP: "192.168.0.1"}, + {IP: "2001:db8::2"}, + }, + }, } for _, tc := range testcases { @@ -3009,3 +3128,95 @@ func TestGenerateAPIPodStatusPodIPs(t *testing.T) { }) } } + +func TestSortPodIPs(t *testing.T) { + testcases := []struct { + name string + nodeIP string + podIPs []string + expectedIPs []string + }{ + { + name: "Simple", + nodeIP: "", + podIPs: []string{"10.0.0.1"}, + expectedIPs: []string{"10.0.0.1"}, + }, + { + name: "Dual-stack", + nodeIP: "", + podIPs: []string{"10.0.0.1", "fd01::1234"}, + expectedIPs: []string{"10.0.0.1", "fd01::1234"}, + }, + { + name: "Dual-stack with explicit node IP", + nodeIP: "192.168.1.1", + podIPs: []string{"10.0.0.1", "fd01::1234"}, + expectedIPs: []string{"10.0.0.1", "fd01::1234"}, + }, + { + name: "Dual-stack with CRI returning wrong family first", + nodeIP: "", + podIPs: []string{"fd01::1234", "10.0.0.1"}, + expectedIPs: []string{"10.0.0.1", "fd01::1234"}, + }, + { + name: "Dual-stack with explicit node IP with CRI returning wrong family first", + nodeIP: "192.168.1.1", + podIPs: []string{"fd01::1234", "10.0.0.1"}, + expectedIPs: []string{"10.0.0.1", "fd01::1234"}, + }, + { + name: "Dual-stack with IPv6 node IP", + nodeIP: "fd00::5678", + podIPs: []string{"10.0.0.1", "fd01::1234"}, + expectedIPs: []string{"fd01::1234", "10.0.0.1"}, + }, + { + name: "Dual-stack with IPv6 node IP, other CRI order", + nodeIP: "fd00::5678", + podIPs: []string{"fd01::1234", "10.0.0.1"}, + expectedIPs: []string{"fd01::1234", "10.0.0.1"}, + }, + { + name: "No Pod IP matching Node IP", + nodeIP: "fd00::5678", + podIPs: []string{"10.0.0.1"}, + expectedIPs: []string{"10.0.0.1"}, + }, + { + name: "No Pod IP matching (unspecified) Node IP", + nodeIP: "", + podIPs: []string{"fd01::1234"}, + expectedIPs: []string{"fd01::1234"}, + }, + { + name: "Multiple IPv4 IPs", + nodeIP: "", + podIPs: []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"}, + expectedIPs: []string{"10.0.0.1"}, + }, + { + name: "Multiple Dual-Stack IPs", + nodeIP: "", + podIPs: []string{"10.0.0.1", "10.0.0.2", "fd01::1234", "10.0.0.3", "fd01::5678"}, + expectedIPs: []string{"10.0.0.1", "fd01::1234"}, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kl := testKubelet.kubelet + if tc.nodeIP != "" { + kl.nodeIPs = []net.IP{net.ParseIP(tc.nodeIP)} + } + + podIPs := kl.sortPodIPs(tc.podIPs) + if !reflect.DeepEqual(podIPs, tc.expectedIPs) { + t.Fatalf("Expected PodIPs %#v, got %#v", tc.expectedIPs, podIPs) + } + }) + } +}