From 8ea9417a37af62f341f94201692e1a516fb8e316 Mon Sep 17 00:00:00 2001
From: Yang Guo <ygg@google.com>
Date: Thu, 9 Nov 2017 17:04:43 -0800
Subject: [PATCH 01/18] Adjust GKE spec to validate images with kernel version
 4.10+

---
 test/e2e_node/BUILD                   |  1 +
 test/e2e_node/gke_environment_test.go | 28 +++++++++++++++++++++++++++
 test/e2e_node/system/specs/gke.yaml   |  4 +++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD
index 8dc19032913..e22300efd97 100644
--- a/test/e2e_node/BUILD
+++ b/test/e2e_node/BUILD
@@ -135,6 +135,7 @@ go_test(
         "//test/e2e_node/services:go_default_library",
         "//test/e2e_node/system:go_default_library",
         "//test/utils/image:go_default_library",
+        "//vendor/github.com/blang/semver:go_default_library",
         "//vendor/github.com/coreos/go-systemd/util:go_default_library",
         "//vendor/github.com/davecgh/go-spew/spew:go_default_library",
         "//vendor/github.com/golang/glog:go_default_library",
diff --git a/test/e2e_node/gke_environment_test.go b/test/e2e_node/gke_environment_test.go
index 1b48614a628..bd52964d1fd 100644
--- a/test/e2e_node/gke_environment_test.go
+++ b/test/e2e_node/gke_environment_test.go
@@ -28,6 +28,7 @@ import (
 
 	"k8s.io/kubernetes/test/e2e/framework"
 
+	"github.com/blang/semver"
 	. "github.com/onsi/ginkgo"
 )
 
@@ -122,6 +123,18 @@ func checkDockerConfig() error {
 		}
 		missing = map[string]bool{}
 	)
+
+	// Whitelists CONFIG_DEVPTS_MULTIPLE_INSTANCES (meaning allowing it to be
+	// absent) if the kernel version is >= 4.8, because this option has been
+	// removed from the 4.8 kernel.
+	kernelVersion, err := getKernelVersion()
+	if err != nil {
+		return err
+	}
+	if kernelVersion.GTE(semver.MustParse("4.8.0")) {
+		whitelist["CONFIG_DEVPTS_MULTIPLE_INSTANCES"] = true
+	}
+
 	for _, bin := range bins {
 		if _, err := os.Stat(bin); os.IsNotExist(err) {
 			continue
@@ -400,3 +413,18 @@ func getCmdToProcessMap() (map[string][]process, error) {
 	}
 	return result, nil
 }
+
+// getKernelVersion returns the kernel version in the semantic version format.
+func getKernelVersion() (*semver.Version, error) {
+	output, err := runCommand("uname", "-r")
+	if err != nil {
+		return nil, err
+	}
+	// An example 'output' could be "4.13.0-1001-gke".
+	v := strings.TrimSpace(strings.Split(output, "-")[0])
+	kernelVersion, err := semver.Make(v)
+	if err != nil {
+		return nil, fmt.Errorf("failed to convert %q to semantic version: %s", v, err)
+	}
+	return &kernelVersion, nil
+}
diff --git a/test/e2e_node/system/specs/gke.yaml b/test/e2e_node/system/specs/gke.yaml
index 3863634765f..0e795d4edfa 100644
--- a/test/e2e_node/system/specs/gke.yaml
+++ b/test/e2e_node/system/specs/gke.yaml
@@ -5,7 +5,9 @@ os: Linux
 kernelSpec:
   versions:
   # GKE requires kernel version 4.4+.
-  - 4\.[4-9].*
+  - '4\.[4-9].*'
+  - '4\.[1-9][0-9].*'
+  - '[5-9].*'
 
   # Required kernel configurations -- the configuration must be set to "y" or
   # "m".

From 2827b7ffb7ecd5894307a772d74a3bcca6a4dc44 Mon Sep 17 00:00:00 2001
From: Dane LeBlanc <leblancd@cisco.com>
Date: Tue, 19 Sep 2017 17:29:29 -0400
Subject: [PATCH 02/18] Add brackets around IPv6 addrs in e2e test IP:port
 endpoints

There are several locations in the e2e tests where endpoints of the
form IP:port use IPv6 addresses directly, without surrounding brackets.
Brackets are required around IPv6 addresses in this case, in order to
distinguish the colons in the IPv6 address from the colon immediately
preceding the port.

Also, wherever the curl command might be used with an IPv6 address
surrounded in brackets, the "-g" argument is added to the curl
command line arguments so that the brackets can be interpreted
correctly.

fixes #52746
---
 test/e2e/framework/networking_utils.go | 42 +++++++++++++++++---------
 test/e2e/framework/service_util.go     | 15 ++++++---
 test/e2e/framework/util.go             |  2 +-
 test/e2e/network/service.go            | 15 ++++++---
 test/images/nettest/nettest.go         |  4 ++-
 5 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/test/e2e/framework/networking_utils.go b/test/e2e/framework/networking_utils.go
index 643805b4fc5..66d7e255271 100644
--- a/test/e2e/framework/networking_utils.go
+++ b/test/e2e/framework/networking_utils.go
@@ -179,9 +179,12 @@ func (config *NetworkingTestConfig) EndpointHostnames() sets.String {
 // more for maxTries. Use this if you want to eg: fail a readiness check on a
 // pod and confirm it doesn't show up as an endpoint.
 func (config *NetworkingTestConfig) DialFromContainer(protocol, containerIP, targetIP string, containerHttpPort, targetPort, maxTries, minTries int, expectedEps sets.String) {
-	cmd := fmt.Sprintf("curl -q -s 'http://%s:%d/dial?request=hostName&protocol=%s&host=%s&port=%d&tries=1'",
-		containerIP,
-		containerHttpPort,
+	ipPort := net.JoinHostPort(containerIP, strconv.Itoa(containerHttpPort))
+	// The current versions of curl included in CentOS and RHEL distros
+	// misinterpret square brackets around IPv6 as globbing, so use the -g
+	// argument to disable globbing to handle the IPv6 case.
+	cmd := fmt.Sprintf("curl -g -q -s 'http://%s/dial?request=hostName&protocol=%s&host=%s&port=%d&tries=1'",
+		ipPort,
 		protocol,
 		targetIP,
 		targetPort)
@@ -234,9 +237,12 @@ func (config *NetworkingTestConfig) GetEndpointsFromTestContainer(protocol, targ
 // - tries is the number of curl attempts. If this many attempts pass and
 //   we don't see any endpoints, the test fails.
 func (config *NetworkingTestConfig) GetEndpointsFromContainer(protocol, containerIP, targetIP string, containerHttpPort, targetPort, tries int) (sets.String, error) {
-	cmd := fmt.Sprintf("curl -q -s 'http://%s:%d/dial?request=hostName&protocol=%s&host=%s&port=%d&tries=1'",
-		containerIP,
-		containerHttpPort,
+	ipPort := net.JoinHostPort(containerIP, strconv.Itoa(containerHttpPort))
+	// The current versions of curl included in CentOS and RHEL distros
+	// misinterpret square brackets around IPv6 as globbing, so use the -g
+	// argument to disable globbing to handle the IPv6 case.
+	cmd := fmt.Sprintf("curl -g -q -s 'http://%s/dial?request=hostName&protocol=%s&host=%s&port=%d&tries=1'",
+		ipPort,
 		protocol,
 		targetIP,
 		targetPort)
@@ -289,7 +295,11 @@ func (config *NetworkingTestConfig) DialFromNode(protocol, targetIP string, targ
 		// busybox timeout doesn't support non-integer values.
 		cmd = fmt.Sprintf("echo 'hostName' | timeout -t 2 nc -w 1 -u %s %d", targetIP, targetPort)
 	} else {
-		cmd = fmt.Sprintf("timeout -t 15 curl -q -s --connect-timeout 1 http://%s:%d/hostName", targetIP, targetPort)
+		ipPort := net.JoinHostPort(targetIP, strconv.Itoa(targetPort))
+		// The current versions of curl included in CentOS and RHEL distros
+		// misinterpret square brackets around IPv6 as globbing, so use the -g
+		// argument to disable globbing to handle the IPv6 case.
+		cmd = fmt.Sprintf("timeout -t 15 curl -g -q -s --connect-timeout 1 http://%s/hostName", ipPort)
 	}
 
 	// TODO: This simply tells us that we can reach the endpoints. Check that
@@ -736,7 +746,8 @@ func TestReachableHTTPWithContentTimeout(ip string, port int, request string, ex
 
 func TestReachableHTTPWithContentTimeoutWithRetriableErrorCodes(ip string, port int, request string, expect string, content *bytes.Buffer, retriableErrCodes []int, timeout time.Duration) (bool, error) {
 
-	url := fmt.Sprintf("http://%s:%d%s", ip, port, request)
+	ipPort := net.JoinHostPort(ip, strconv.Itoa(port))
+	url := fmt.Sprintf("http://%s%s", ipPort, request)
 	if ip == "" {
 		Failf("Got empty IP for reachability check (%s)", url)
 		return false, nil
@@ -783,7 +794,8 @@ func TestNotReachableHTTP(ip string, port int) (bool, error) {
 }
 
 func TestNotReachableHTTPTimeout(ip string, port int, timeout time.Duration) (bool, error) {
-	url := fmt.Sprintf("http://%s:%d", ip, port)
+	ipPort := net.JoinHostPort(ip, strconv.Itoa(port))
+	url := fmt.Sprintf("http://%s", ipPort)
 	if ip == "" {
 		Failf("Got empty IP for non-reachability check (%s)", url)
 		return false, nil
@@ -805,7 +817,8 @@ func TestNotReachableHTTPTimeout(ip string, port int, timeout time.Duration) (bo
 }
 
 func TestReachableUDP(ip string, port int, request string, expect string) (bool, error) {
-	uri := fmt.Sprintf("udp://%s:%d", ip, port)
+	ipPort := net.JoinHostPort(ip, strconv.Itoa(port))
+	uri := fmt.Sprintf("udp://%s", ipPort)
 	if ip == "" {
 		Failf("Got empty IP for reachability check (%s)", uri)
 		return false, nil
@@ -817,9 +830,9 @@ func TestReachableUDP(ip string, port int, request string, expect string) (bool,
 
 	Logf("Testing UDP reachability of %v", uri)
 
-	con, err := net.Dial("udp", ip+":"+strconv.Itoa(port))
+	con, err := net.Dial("udp", ipPort)
 	if err != nil {
-		return false, fmt.Errorf("Failed to dial %s:%d: %v", ip, port, err)
+		return false, fmt.Errorf("Failed to dial %s: %v", ipPort, err)
 	}
 
 	_, err = con.Write([]byte(fmt.Sprintf("%s\n", request)))
@@ -848,7 +861,8 @@ func TestReachableUDP(ip string, port int, request string, expect string) (bool,
 }
 
 func TestNotReachableUDP(ip string, port int, request string) (bool, error) {
-	uri := fmt.Sprintf("udp://%s:%d", ip, port)
+	ipPort := net.JoinHostPort(ip, strconv.Itoa(port))
+	uri := fmt.Sprintf("udp://%s", ipPort)
 	if ip == "" {
 		Failf("Got empty IP for reachability check (%s)", uri)
 		return false, nil
@@ -860,7 +874,7 @@ func TestNotReachableUDP(ip string, port int, request string) (bool, error) {
 
 	Logf("Testing UDP non-reachability of %v", uri)
 
-	con, err := net.Dial("udp", ip+":"+strconv.Itoa(port))
+	con, err := net.Dial("udp", ipPort)
 	if err != nil {
 		Logf("Confirmed that %s is not reachable", uri)
 		return true, nil
diff --git a/test/e2e/framework/service_util.go b/test/e2e/framework/service_util.go
index cfb3ebdf278..934c044a7e8 100644
--- a/test/e2e/framework/service_util.go
+++ b/test/e2e/framework/service_util.go
@@ -19,6 +19,7 @@ package framework
 import (
 	"bytes"
 	"fmt"
+	"net"
 	"sort"
 	"strconv"
 	"strings"
@@ -878,7 +879,8 @@ func (j *ServiceTestJig) GetHTTPContent(host string, port int, timeout time.Dura
 }
 
 func testHTTPHealthCheckNodePort(ip string, port int, request string) (bool, error) {
-	url := fmt.Sprintf("http://%s:%d%s", ip, port, request)
+	ipPort := net.JoinHostPort(ip, strconv.Itoa(port))
+	url := fmt.Sprintf("http://%s%s", ipPort, request)
 	if ip == "" || port == 0 {
 		Failf("Got empty IP for reachability check (%s)", url)
 		return false, fmt.Errorf("Invalid input ip or port")
@@ -1292,8 +1294,9 @@ func VerifyServeHostnameServiceUp(c clientset.Interface, ns, host string, expect
 	// Loop a bunch of times - the proxy is randomized, so we want a good
 	// chance of hitting each backend at least once.
 	buildCommand := func(wget string) string {
-		return fmt.Sprintf("for i in $(seq 1 %d); do %s http://%s:%d 2>&1 || true; echo; done",
-			50*len(expectedPods), wget, serviceIP, servicePort)
+		serviceIPPort := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
+		return fmt.Sprintf("for i in $(seq 1 %d); do %s http://%s 2>&1 || true; echo; done",
+			50*len(expectedPods), wget, serviceIPPort)
 	}
 	commands := []func() string{
 		// verify service from node
@@ -1360,8 +1363,12 @@ func VerifyServeHostnameServiceUp(c clientset.Interface, ns, host string, expect
 }
 
 func VerifyServeHostnameServiceDown(c clientset.Interface, host string, serviceIP string, servicePort int) error {
+	ipPort := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
+	// The current versions of curl included in CentOS and RHEL distros
+	// misinterpret square brackets around IPv6 as globbing, so use the -g
+	// argument to disable globbing to handle the IPv6 case.
 	command := fmt.Sprintf(
-		"curl -s --connect-timeout 2 http://%s:%d && exit 99", serviceIP, servicePort)
+		"curl -g -s --connect-timeout 2 http://%s && exit 99", ipPort)
 
 	for start := time.Now(); time.Since(start) < time.Minute; time.Sleep(5 * time.Second) {
 		result, err := SSH(command, host, TestContext.Provider)
diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go
index bddeeeb3637..4105e90cdda 100644
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@@ -4496,7 +4496,7 @@ func LaunchWebserverPod(f *Framework, podName, nodeName string) (ip string) {
 	ExpectNoError(f.WaitForPodRunning(podName))
 	createdPod, err := podClient.Get(podName, metav1.GetOptions{})
 	ExpectNoError(err)
-	ip = fmt.Sprintf("%s:%d", createdPod.Status.PodIP, port)
+	ip = net.JoinHostPort(createdPod.Status.PodIP, strconv.Itoa(port))
 	Logf("Target pod IP:port is %s", ip)
 	return
 }
diff --git a/test/e2e/network/service.go b/test/e2e/network/service.go
index 5bf65d1a806..2a9ec84eb5f 100644
--- a/test/e2e/network/service.go
+++ b/test/e2e/network/service.go
@@ -20,6 +20,8 @@ import (
 	"bytes"
 	"fmt"
 	"math/rand"
+	"net"
+	"strconv"
 	"strings"
 	"time"
 
@@ -1641,7 +1643,9 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
 				// Confirm traffic can reach backend through LB before checking healthcheck nodeport.
 				jig.TestReachableHTTP(ingressIP, svcTCPPort, framework.KubeProxyLagTimeout)
 				expectedSuccess := nodes.Items[n].Name == endpointNodeName
-				framework.Logf("Health checking %s, http://%s:%d%s, expectedSuccess %v", nodes.Items[n].Name, publicIP, healthCheckNodePort, path, expectedSuccess)
+				port := strconv.Itoa(healthCheckNodePort)
+				ipPort := net.JoinHostPort(publicIP, port)
+				framework.Logf("Health checking %s, http://%s%s, expectedSuccess %v", nodes.Items[n].Name, ipPort, path, expectedSuccess)
 				Expect(jig.TestHTTPHealthCheckNodePort(publicIP, healthCheckNodePort, path, framework.KubeProxyEndpointLagTimeout, expectedSuccess, threshold)).NotTo(HaveOccurred())
 			}
 			framework.ExpectNoError(framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, namespace, serviceName))
@@ -1662,7 +1666,9 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
 		}()
 
 		ingressIP := framework.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
-		path := fmt.Sprintf("%s:%d/clientip", ingressIP, int(svc.Spec.Ports[0].Port))
+		port := strconv.Itoa(int(svc.Spec.Ports[0].Port))
+		ipPort := net.JoinHostPort(ingressIP, port)
+		path := fmt.Sprintf("%s/clientip", ipPort)
 		nodeName := nodes.Items[0].Name
 		podName := "execpod-sourceip"
 
@@ -1813,9 +1819,10 @@ func execSourceipTest(f *framework.Framework, c clientset.Interface, ns, nodeNam
 	framework.ExpectNoError(err)
 
 	var stdout string
+	serviceIPPort := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
 	timeout := 2 * time.Minute
-	framework.Logf("Waiting up to %v wget %s:%d", timeout, serviceIP, servicePort)
-	cmd := fmt.Sprintf(`wget -T 30 -qO- %s:%d | grep client_address`, serviceIP, servicePort)
+	framework.Logf("Waiting up to %v wget %s", timeout, serviceIPPort)
+	cmd := fmt.Sprintf(`wget -T 30 -qO- %s | grep client_address`, serviceIPPort)
 	for start := time.Now(); time.Since(start) < timeout; time.Sleep(2) {
 		stdout, err = framework.RunHostCmd(execPod.Namespace, execPod.Name, cmd)
 		if err != nil {
diff --git a/test/images/nettest/nettest.go b/test/images/nettest/nettest.go
index bc065f83f95..09f92f47888 100644
--- a/test/images/nettest/nettest.go
+++ b/test/images/nettest/nettest.go
@@ -36,6 +36,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"log"
+	"net"
 	"net/http"
 	"os"
 	"os/signal"
@@ -278,7 +279,8 @@ func getWebserverEndpoints(client clientset.Interface) sets.String {
 	for _, ss := range endpoints.Subsets {
 		for _, a := range ss.Addresses {
 			for _, p := range ss.Ports {
-				eps.Insert(fmt.Sprintf("http://%s:%d", a.IP, p.Port))
+				ipPort := net.JoinHostPort(a.IP, fmt.Sprint(p.Port))
+				eps.Insert(fmt.Sprintf("http://%s", ipPort))
 			}
 		}
 	}

From e08c98b171ff9dd3982377103f9e43c311c7a78d Mon Sep 17 00:00:00 2001
From: Mike Danese <mikedanese@google.com>
Date: Mon, 27 Nov 2017 10:59:56 -0800
Subject: [PATCH 03/18] certs: add month buckets

---
 .../k8s.io/apiserver/pkg/authentication/request/x509/x509.go  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/staging/src/k8s.io/apiserver/pkg/authentication/request/x509/x509.go b/staging/src/k8s.io/apiserver/pkg/authentication/request/x509/x509.go
index d583a321846..708a89e9eac 100644
--- a/staging/src/k8s.io/apiserver/pkg/authentication/request/x509/x509.go
+++ b/staging/src/k8s.io/apiserver/pkg/authentication/request/x509/x509.go
@@ -47,6 +47,10 @@ var clientCertificateExpirationHistogram = prometheus.NewHistogram(
 			(2 * 24 * time.Hour).Seconds(),
 			(4 * 24 * time.Hour).Seconds(),
 			(7 * 24 * time.Hour).Seconds(),
+			(30 * 24 * time.Hour).Seconds(),
+			(3 * 30 * 24 * time.Hour).Seconds(),
+			(6 * 30 * 24 * time.Hour).Seconds(),
+			(12 * 30 * 24 * time.Hour).Seconds(),
 		},
 	},
 )

From 2ca9a3185ca23a86b6c40f3a8d9eaea738183459 Mon Sep 17 00:00:00 2001
From: Isaac Hollander McCreery <ihmccreery@google.com>
Date: Wed, 8 Nov 2017 14:12:02 -0800
Subject: [PATCH 04/18] Configure metadata concealment iptables rules in node
 startup.

---
 .../addons/metadata-proxy/gce/metadata-proxy.yaml  |  2 +-
 cluster/gce/configure-vm.sh                        |  8 +++-----
 cluster/gce/gci/configure-helper.sh                | 14 ++++++--------
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/cluster/addons/metadata-proxy/gce/metadata-proxy.yaml b/cluster/addons/metadata-proxy/gce/metadata-proxy.yaml
index 376fb65f174..767322549c0 100644
--- a/cluster/addons/metadata-proxy/gce/metadata-proxy.yaml
+++ b/cluster/addons/metadata-proxy/gce/metadata-proxy.yaml
@@ -38,7 +38,7 @@ spec:
       dnsPolicy: Default
       containers:
       - name: metadata-proxy
-        image: gcr.io/google_containers/metadata-proxy:v0.1.4
+        image: gcr.io/google_containers/metadata-proxy:v0.1.5
         securityContext:
           privileged: true
         resources:
diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh
index 124cff8e9ce..4bc6dc5a67a 100755
--- a/cluster/gce/configure-vm.sh
+++ b/cluster/gce/configure-vm.sh
@@ -86,11 +86,9 @@ ensure-local-disks() {
 function config-ip-firewall {
   echo "Configuring IP firewall rules"
 
-  iptables -N KUBE-METADATA-SERVER
-  iptables -I FORWARD -p tcp -d 169.254.169.254 --dport 80 -j KUBE-METADATA-SERVER
-
   if [[ "${ENABLE_METADATA_CONCEALMENT:-}" == "true" ]]; then
-    iptables -A KUBE-METADATA-SERVER -j DROP
+    echo "Add rule for metadata concealment"
+    iptables -w -t nat -I PREROUTING -p tcp -d 169.254.169.254 --dport 80 -m comment --comment "metadata-concealment: bridge traffic to metadata server goes to metadata proxy" -j DNAT --to-destination 127.0.0.1:988
   fi
 }
 
@@ -856,7 +854,6 @@ fi
 if [[ -z "${is_push}" ]]; then
   echo "== kube-up node config starting =="
   set-broken-motd
-  config-ip-firewall
   ensure-basic-networking
   fix-apt-sources
   ensure-install-dir
@@ -873,6 +870,7 @@ if [[ -z "${is_push}" ]]; then
   download-release
   configure-salt
   remove-docker-artifacts
+  config-ip-firewall
   run-salt
   reset-motd
 
diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh
index cc0b3d20452..8b8488db324 100644
--- a/cluster/gce/gci/configure-helper.sh
+++ b/cluster/gce/gci/configure-helper.sh
@@ -57,18 +57,11 @@ function config-ip-firewall {
     iptables -A FORWARD -w -p ICMP -j ACCEPT
   fi
 
-  iptables -w -N KUBE-METADATA-SERVER
-  iptables -w -I FORWARD -p tcp -d 169.254.169.254 --dport 80 -j KUBE-METADATA-SERVER
-
-  if [[ "${ENABLE_METADATA_CONCEALMENT:-}" == "true" ]]; then
-    iptables -w -A KUBE-METADATA-SERVER -j DROP
-  fi
-
   # Flush iptables nat table
   iptables -w -t nat -F || true
 
-  echo "Add rules for ip masquerade"
   if [[ "${NON_MASQUERADE_CIDR:-}" == "0.0.0.0/0" ]]; then
+    echo "Add rules for ip masquerade"
     iptables -w -t nat -N IP-MASQ
     iptables -w -t nat -A POSTROUTING -m comment --comment "ip-masq: ensure nat POSTROUTING directs all non-LOCAL destination traffic to our custom IP-MASQ chain" -m addrtype ! --dst-type LOCAL -j IP-MASQ
     iptables -w -t nat -A IP-MASQ -d 169.254.0.0/16 -m comment --comment "ip-masq: local traffic is not subject to MASQUERADE" -j RETURN
@@ -77,6 +70,11 @@ function config-ip-firewall {
     iptables -w -t nat -A IP-MASQ -d 192.168.0.0/16 -m comment --comment "ip-masq: local traffic is not subject to MASQUERADE" -j RETURN
     iptables -w -t nat -A IP-MASQ -m comment --comment "ip-masq: outbound traffic is subject to MASQUERADE (must be last in chain)" -j MASQUERADE
   fi
+
+  if [[ "${ENABLE_METADATA_CONCEALMENT:-}" == "true" ]]; then
+    echo "Add rule for metadata concealment"
+    iptables -w -t nat -I PREROUTING -p tcp -d 169.254.169.254 --dport 80 -m comment --comment "metadata-concealment: bridge traffic to metadata server goes to metadata proxy" -j DNAT --to-destination 127.0.0.1:988
+  fi
 }
 
 function create-dirs {

From b314d188776109b80e353144c1dbf4e9862d9031 Mon Sep 17 00:00:00 2001
From: Karol Wychowaniec <kawych@google.com>
Date: Fri, 24 Nov 2017 10:22:59 +0100
Subject: [PATCH 05/18] Fix configuration of Metadata Agent daemon set

---
 .../addons/metadata-agent/stackdriver/metadata-agent.yaml | 2 +-
 cluster/gce/config-default.sh                             | 2 +-
 cluster/gce/container-linux/configure-helper.sh           | 8 ++++----
 cluster/gce/gci/configure-helper.sh                       | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/cluster/addons/metadata-agent/stackdriver/metadata-agent.yaml b/cluster/addons/metadata-agent/stackdriver/metadata-agent.yaml
index 9f602e32b96..a7f2b1c55bd 100644
--- a/cluster/addons/metadata-agent/stackdriver/metadata-agent.yaml
+++ b/cluster/addons/metadata-agent/stackdriver/metadata-agent.yaml
@@ -22,7 +22,7 @@ spec:
         name: metadata-agent
         ports:
         - containerPort: 8000
-          hostPort: 8000
+          hostPort: 8799
           protocol: TCP
         resources:
           requests:
diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh
index f1dc45feecb..53ab3b42a9c 100755
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@@ -158,7 +158,7 @@ ENABLE_METRICS_SERVER="${KUBE_ENABLE_METRICS_SERVER:-true}"
 ENABLE_METADATA_AGENT="${KUBE_ENABLE_METADATA_AGENT:-none}"
 
 # Version tag of metadata agent
-METADATA_AGENT_VERSION="${KUBE_METADATA_AGENT_VERSION:-0.2-0.0.13-5}"
+METADATA_AGENT_VERSION="${KUBE_METADATA_AGENT_VERSION:-0.2-0.0.13-5-watch}"
 
 # One special node out of NUM_NODES would be created of this type if specified.
 # Useful for scheduling heapster in large clusters with nodes of small size.
diff --git a/cluster/gce/container-linux/configure-helper.sh b/cluster/gce/container-linux/configure-helper.sh
index 8c4f285505a..ddb55a27736 100755
--- a/cluster/gce/container-linux/configure-helper.sh
+++ b/cluster/gce/container-linux/configure-helper.sh
@@ -1305,10 +1305,10 @@ EOF
       metadata_agent_cpu_request="${METADATA_AGENT_CPU_REQUEST:-40m}"
       metadata_agent_memory_request="${METADATA_AGENT_MEMORY_REQUEST:-50Mi}"
       setup-addon-manifests "addons" "metadata-agent/stackdriver"
-      deployment_yaml="${dst_dir}/metadata-agent/stackdriver/metadata-agent.yaml"
-      sed -i -e "s@{{ metadata_agent_version }}@${METADATA_AGENT_VERSION}@g" "${deployment_yaml}"
-      sed -i -e "s@{{ metadata_agent_cpu_request }}@${metadata_agent_cpu_request}@g" "${deployment_yaml}"
-      sed -i -e "s@{{ metadata_agent_memory_request }}@${metadata_agent_memory_request}@g" "${deployment_yaml}"
+      daemon_set_yaml="${dst_dir}/metadata-agent/stackdriver/metadata-agent.yaml"
+      sed -i -e "s@{{ metadata_agent_version }}@${METADATA_AGENT_VERSION}@g" "${daemon_set_yaml}"
+      sed -i -e "s@{{ metadata_agent_cpu_request }}@${metadata_agent_cpu_request}@g" "${daemon_set_yaml}"
+      sed -i -e "s@{{ metadata_agent_memory_request }}@${metadata_agent_memory_request}@g" "${daemon_set_yaml}"
     fi
   fi
   if [[ "${ENABLE_METRICS_SERVER:-}" == "true" ]]; then
diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh
index 0d9efdc6e97..46ee97ca36e 100644
--- a/cluster/gce/gci/configure-helper.sh
+++ b/cluster/gce/gci/configure-helper.sh
@@ -2084,10 +2084,10 @@ EOF
       metadata_agent_cpu_request="${METADATA_AGENT_CPU_REQUEST:-40m}"
       metadata_agent_memory_request="${METADATA_AGENT_MEMORY_REQUEST:-50Mi}"
       setup-addon-manifests "addons" "metadata-agent/stackdriver"
-      deployment_yaml="${dst_dir}/metadata-agent/stackdriver/metadata-agent.yaml"
-      sed -i -e "s@{{ metadata_agent_version }}@${METADATA_AGENT_VERSION}@g" "${deployment_yaml}"
-      sed -i -e "s@{{ metadata_agent_cpu_request }}@${metadata_agent_cpu_request}@g" "${deployment_yaml}"
-      sed -i -e "s@{{ metadata_agent_memory_request }}@${metadata_agent_memory_request}@g" "${deployment_yaml}"
+      daemon_set_yaml="${dst_dir}/metadata-agent/stackdriver/metadata-agent.yaml"
+      sed -i -e "s@{{ metadata_agent_version }}@${METADATA_AGENT_VERSION}@g" "${daemon_set_yaml}"
+      sed -i -e "s@{{ metadata_agent_cpu_request }}@${metadata_agent_cpu_request}@g" "${daemon_set_yaml}"
+      sed -i -e "s@{{ metadata_agent_memory_request }}@${metadata_agent_memory_request}@g" "${daemon_set_yaml}"
     fi
   fi
   if [[ "${ENABLE_METRICS_SERVER:-}" == "true" ]]; then

From 19fc7742bbe0e82bb8f10c64144d620ebdf74032 Mon Sep 17 00:00:00 2001
From: m1093782566 <dujun5@huawei.com>
Date: Thu, 30 Nov 2017 17:49:08 +0800
Subject: [PATCH 06/18] declare ipvs proxier beta

---
 pkg/features/kube_features.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go
index 5002f5e41ba..80599039bc4 100644
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@@ -140,12 +140,6 @@ const (
 	// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
 	TaintNodesByCondition utilfeature.Feature = "TaintNodesByCondition"
 
-	// owner: @haibinxie
-	// alpha: v1.8
-	//
-	// Implement IPVS-based in-cluster service load balancing
-	SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
-
 	// owner: @jsafrane
 	// alpha: v1.8
 	//
@@ -212,6 +206,12 @@ const (
 	//
 	// Enable resource limits priority function
 	ResourceLimitsPriorityFunction utilfeature.Feature = "ResourceLimitsPriorityFunction"
+
+	// owner: @m1093782566
+	// beta: v1.9
+	//
+	// Implement IPVS-based in-cluster service load balancing
+	SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode"
 )
 
 func init() {
@@ -251,6 +251,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 	BlockVolume:                                 {Default: false, PreRelease: utilfeature.Alpha},
 	PVCProtection:                               {Default: false, PreRelease: utilfeature.Alpha},
 	ResourceLimitsPriorityFunction:              {Default: false, PreRelease: utilfeature.Alpha},
+	SupportIPVSProxyMode:                        {Default: false, PreRelease: utilfeature.Beta},
 
 	// inherited features from generic apiserver, relisted here to get a conflict if it is changed
 	// unintentionally on either side:
@@ -263,5 +264,4 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 	// inherited features from apiextensions-apiserver, relisted here to get a conflict if it is changed
 	// unintentionally on either side:
 	apiextensionsfeatures.CustomResourceValidation: {Default: true, PreRelease: utilfeature.Beta},
-	SupportIPVSProxyMode:                           {Default: false, PreRelease: utilfeature.Alpha},
 }

From da8e85e28e1dac8dcb58e0761a00c5c11a7e1044 Mon Sep 17 00:00:00 2001
From: Dong Liu <doliu@microsoft.com>
Date: Fri, 1 Dec 2017 18:34:47 +0800
Subject: [PATCH 07/18] Fix static IP issue for Azure internal LB

---
 .../providers/azure/azure_loadbalancer.go              | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go
index 33ce1770f7f..c7fd51bc997 100644
--- a/pkg/cloudprovider/providers/azure/azure_loadbalancer.go
+++ b/pkg/cloudprovider/providers/azure/azure_loadbalancer.go
@@ -1134,9 +1134,13 @@ func deduplicate(collection *[]string) *[]string {
 func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error) {
 	isInternal := requiresInternalLoadBalancer(service)
 	serviceName := getServiceName(service)
-	desiredPipName, err := az.determinePublicIPName(clusterName, service)
-	if err != nil {
-		return nil, err
+	var desiredPipName string
+	var err error
+	if !isInternal && wantLb {
+		desiredPipName, err = az.determinePublicIPName(clusterName, service)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	pips, err := az.ListPIPWithRetry()

From 3f7c7626485853bfc3d6d08c7b338ae9b704dd31 Mon Sep 17 00:00:00 2001
From: "Dr. Stefan Schimanski" <stefan.schimanski@gmail.com>
Date: Fri, 1 Dec 2017 11:28:43 +0100
Subject: [PATCH 08/18] kube-apiserver: enable admission registration v1beta by
 default

---
 hack/local-up-cluster.sh | 7 -------
 pkg/master/BUILD         | 1 +
 pkg/master/master.go     | 4 +++-
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh
index 1528668174c..e6c99552f1e 100755
--- a/hack/local-up-cluster.sh
+++ b/hack/local-up-cluster.sh
@@ -464,13 +464,6 @@ function start_apiserver {
         RUNTIME_CONFIG+="admissionregistration.k8s.io/v1alpha1"
     fi
 
-    if [[ ${ADMISSION_CONTROL} == *"AdmissionWebhook"* ]]; then
-        if [[ -n "${RUNTIME_CONFIG}" ]]; then
-          RUNTIME_CONFIG+=","
-        fi
-        RUNTIME_CONFIG+="admissionregistration.k8s.io/v1beta1"
-    fi
-
     runtime_config=""
     if [[ -n "${RUNTIME_CONFIG}" ]]; then
       runtime_config="--runtime-config=${RUNTIME_CONFIG}"
diff --git a/pkg/master/BUILD b/pkg/master/BUILD
index eee5265c18f..bcb36660bba 100644
--- a/pkg/master/BUILD
+++ b/pkg/master/BUILD
@@ -71,6 +71,7 @@ go_library(
         "//pkg/util/node:go_default_library",
         "//vendor/github.com/golang/glog:go_default_library",
         "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
+        "//vendor/k8s.io/api/admissionregistration/v1beta1:go_default_library",
         "//vendor/k8s.io/api/apps/v1:go_default_library",
         "//vendor/k8s.io/api/apps/v1beta1:go_default_library",
         "//vendor/k8s.io/api/apps/v1beta2:go_default_library",
diff --git a/pkg/master/master.go b/pkg/master/master.go
index b20a6f5e929..174e150e7ce 100644
--- a/pkg/master/master.go
+++ b/pkg/master/master.go
@@ -24,6 +24,7 @@ import (
 	"strconv"
 	"time"
 
+	admissionregistrationv1beta1 "k8s.io/api/admissionregistration/v1beta1"
 	appsv1 "k8s.io/api/apps/v1"
 	appsv1beta1 "k8s.io/api/apps/v1beta1"
 	appsv1beta2 "k8s.io/api/apps/v1beta2"
@@ -53,6 +54,7 @@ import (
 	"k8s.io/apiserver/pkg/server/healthz"
 	serverstorage "k8s.io/apiserver/pkg/server/storage"
 	storagefactory "k8s.io/apiserver/pkg/storage/storagebackend/factory"
+	"k8s.io/client-go/informers"
 	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
 	api "k8s.io/kubernetes/pkg/apis/core"
 	coreclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/internalversion"
@@ -69,7 +71,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 
 	// RESTStorage installers
-	"k8s.io/client-go/informers"
 	admissionregistrationrest "k8s.io/kubernetes/pkg/registry/admissionregistration/rest"
 	appsrest "k8s.io/kubernetes/pkg/registry/apps/rest"
 	authenticationrest "k8s.io/kubernetes/pkg/registry/authentication/rest"
@@ -483,6 +484,7 @@ func DefaultAPIResourceConfigSource() *serverstorage.ResourceConfig {
 		authorizationapiv1beta1.SchemeGroupVersion,
 		networkingapiv1.SchemeGroupVersion,
 		eventsv1beta1.SchemeGroupVersion,
+		admissionregistrationv1beta1.SchemeGroupVersion,
 	)
 
 	// all extensions resources except these are disabled by default

From 5b214bbac5468b377437788b49a042adc4e97898 Mon Sep 17 00:00:00 2001
From: "Dr. Stefan Schimanski" <stefan.schimanski@gmail.com>
Date: Fri, 1 Dec 2017 14:14:39 +0100
Subject: [PATCH 09/18] admission: do not require v1alph1 for v1beta1

---
 pkg/registry/admissionregistration/rest/storage_apiserver.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pkg/registry/admissionregistration/rest/storage_apiserver.go b/pkg/registry/admissionregistration/rest/storage_apiserver.go
index a428a124d73..5b26e830bd7 100644
--- a/pkg/registry/admissionregistration/rest/storage_apiserver.go
+++ b/pkg/registry/admissionregistration/rest/storage_apiserver.go
@@ -39,6 +39,9 @@ func (p RESTStorageProvider) NewRESTStorage(apiResourceConfigSource serverstorag
 
 	if apiResourceConfigSource.AnyResourcesForVersionEnabled(admissionregistrationv1alpha1.SchemeGroupVersion) {
 		apiGroupInfo.VersionedResourcesStorageMap[admissionregistrationv1alpha1.SchemeGroupVersion.Version] = p.v1alpha1Storage(apiResourceConfigSource, restOptionsGetter)
+		apiGroupInfo.GroupMeta.GroupVersion = admissionregistrationv1alpha1.SchemeGroupVersion
+	}
+	if apiResourceConfigSource.AnyResourcesForVersionEnabled(admissionregistrationv1beta1.SchemeGroupVersion) {
 		apiGroupInfo.VersionedResourcesStorageMap[admissionregistrationv1beta1.SchemeGroupVersion.Version] = p.v1beta1Storage(apiResourceConfigSource, restOptionsGetter)
 		apiGroupInfo.GroupMeta.GroupVersion = admissionregistrationv1beta1.SchemeGroupVersion
 	}

From af243f482437020b90174d9849e93d4a278a0683 Mon Sep 17 00:00:00 2001
From: Harry Zhang <harryz@hyper.sh>
Date: Thu, 30 Nov 2017 21:57:10 +0800
Subject: [PATCH 10/18] Fix PV counter predicate in eclass

---
 .../pkg/scheduler/core/equivalence_cache.go   | 17 +++++++
 plugin/pkg/scheduler/factory/factory.go       | 50 +++++++++++++------
 2 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/plugin/pkg/scheduler/core/equivalence_cache.go b/plugin/pkg/scheduler/core/equivalence_cache.go
index 9977fe18d30..fafe1298a82 100644
--- a/plugin/pkg/scheduler/core/equivalence_cache.go
+++ b/plugin/pkg/scheduler/core/equivalence_cache.go
@@ -188,6 +188,23 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod,
 
 	// GeneralPredicates: will always be affected by adding a new pod
 	invalidPredicates := sets.NewString("GeneralPredicates")
+
+	// MaxPDVolumeCountPredicate: we check the volumes of pod to make decision.
+	for _, vol := range pod.Spec.Volumes {
+		if vol.PersistentVolumeClaim != nil {
+			invalidPredicates.Insert("MaxEBSVolumeCount", "MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount")
+		} else {
+			if vol.AWSElasticBlockStore != nil {
+				invalidPredicates.Insert("MaxEBSVolumeCount")
+			}
+			if vol.GCEPersistentDisk != nil {
+				invalidPredicates.Insert("MaxGCEPDVolumeCount")
+			}
+			if vol.AzureDisk != nil {
+				invalidPredicates.Insert("MaxAzureDiskVolumeCount")
+			}
+		}
+	}
 	ec.InvalidateCachedPredicateItem(nodeName, invalidPredicates)
 }
 
diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go
index 764f449ccc4..0c3449590b1 100644
--- a/plugin/pkg/scheduler/factory/factory.go
+++ b/plugin/pkg/scheduler/factory/factory.go
@@ -71,11 +71,11 @@ const (
 )
 
 var (
-	serviceAffinitySet           = sets.NewString("ServiceAffinity")
-	maxPDVolumeCountPredicateSet = sets.NewString("MaxPDVolumeCountPredicate")
-	matchInterPodAffinitySet     = sets.NewString("MatchInterPodAffinity")
-	generalPredicatesSets        = sets.NewString("GeneralPredicates")
-	noDiskConflictSet            = sets.NewString("NoDiskConflict")
+	serviceAffinitySet            = sets.NewString("ServiceAffinity")
+	matchInterPodAffinitySet      = sets.NewString("MatchInterPodAffinity")
+	generalPredicatesSets         = sets.NewString("GeneralPredicates")
+	noDiskConflictSet             = sets.NewString("NoDiskConflict")
+	maxPDVolumeCountPredicateKeys = []string{"MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount", "MaxEBSVolumeCount"}
 )
 
 // configFactory is the default implementation of the scheduler.Configurator interface.
@@ -384,7 +384,11 @@ func (c *configFactory) onPvDelete(obj interface{}) {
 }
 
 func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) {
-	invalidPredicates := sets.NewString("MaxPDVolumeCountPredicate")
+	// You could have a PVC that points to a PV, but the PV object doesn't exist.
+	// So when the PV object gets added, we can recount.
+	invalidPredicates := sets.NewString()
+
+	// PV types which impact MaxPDVolumeCountPredicate
 	if pv.Spec.AWSElasticBlockStore != nil {
 		invalidPredicates.Insert("MaxEBSVolumeCount")
 	}
@@ -395,6 +399,14 @@ func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) {
 		invalidPredicates.Insert("MaxAzureDiskVolumeCount")
 	}
 
+	// If PV contains zone related label, it may impact cached NoVolumeZoneConflict
+	for k := range pv.ObjectMeta.Labels {
+		if k == kubeletapis.LabelZoneFailureDomain || k == kubeletapis.LabelZoneRegion {
+			invalidPredicates.Insert("NoVolumeZoneConflict")
+			break
+		}
+	}
+
 	if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
 		// Add/delete impacts the available PVs to choose from
 		invalidPredicates.Insert(predicates.CheckVolumeBinding)
@@ -458,24 +470,34 @@ func (c *configFactory) onPvcDelete(obj interface{}) {
 }
 
 func (c *configFactory) invalidatePredicatesForPvc(pvc *v1.PersistentVolumeClaim) {
-	if pvc.Spec.VolumeName != "" {
-		c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(maxPDVolumeCountPredicateSet)
+	// We need to do this here because the ecache uses PVC uid as part of equivalence hash of pod
+	// The binded volume type may change
+	invalidPredicates := sets.NewString(maxPDVolumeCountPredicateKeys...)
+	// // The binded volume's label may change
+	invalidPredicates.Insert("NoVolumeZoneConflict")
+
+	if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
+		// Add/delete impacts the available PVs to choose from
+		invalidPredicates.Insert(predicates.CheckVolumeBinding)
 	}
+	c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
 }
 
 func (c *configFactory) invalidatePredicatesForPvcUpdate(old, new *v1.PersistentVolumeClaim) {
 	invalidPredicates := sets.NewString()
 
-	if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
-		if old.Spec.VolumeName != new.Spec.VolumeName {
+	if old.Spec.VolumeName != new.Spec.VolumeName {
+		if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
 			// PVC volume binding has changed
 			invalidPredicates.Insert(predicates.CheckVolumeBinding)
 		}
+		// The binded volume type may change
+		invalidPredicates.Insert(maxPDVolumeCountPredicateKeys...)
+		// The binded volume's label may change
+		invalidPredicates.Insert("NoVolumeZoneConflict")
 	}
 
-	if invalidPredicates.Len() > 0 {
-		c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
-	}
+	c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
 }
 
 func (c *configFactory) onServiceAdd(obj interface{}) {
@@ -541,7 +563,7 @@ func (c *configFactory) addPodToCache(obj interface{}) {
 	c.podQueue.AssignedPodAdded(pod)
 
 	// NOTE: Updating equivalence cache of addPodToCache has been
-	// handled optimistically in InvalidateCachedPredicateItemForPodAdd.
+	// handled optimistically in: plugin/pkg/scheduler/scheduler.go#assume()
 }
 
 func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) {

From e4055c0df23abce921bc5f1d44a3599f28b64d1e Mon Sep 17 00:00:00 2001
From: Harry Zhang <harryz@hyper.sh>
Date: Wed, 29 Nov 2017 21:18:10 +0800
Subject: [PATCH 11/18] Add pvc as part of equivalence hash

Use factory to generat get equivalence pod func
---
 .../algorithm/predicates/predicates_test.go   |  52 ----
 .../algorithm/predicates/testing_helper.go    |  75 ++++++
 .../scheduler/algorithm/predicates/utils.go   |  52 +++-
 .../algorithmprovider/defaults/defaults.go    |   8 +-
 .../pkg/scheduler/core/equivalence_cache.go   |   4 +-
 .../scheduler/core/equivalence_cache_test.go  | 241 ++++++++++++++++--
 plugin/pkg/scheduler/factory/factory.go       |  24 +-
 plugin/pkg/scheduler/factory/plugins.go       |  10 +-
 8 files changed, 371 insertions(+), 95 deletions(-)
 create mode 100644 plugin/pkg/scheduler/algorithm/predicates/testing_helper.go

diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
index c9808dec6e0..0ed3709e51b 100644
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@@ -17,7 +17,6 @@ limitations under the License.
 package predicates
 
 import (
-	"fmt"
 	"os"
 	"reflect"
 	"strconv"
@@ -36,57 +35,6 @@ import (
 	schedutil "k8s.io/kubernetes/plugin/pkg/scheduler/util"
 )
 
-type FakeNodeInfo v1.Node
-
-func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
-	node := v1.Node(n)
-	return &node, nil
-}
-
-type FakeNodeListInfo []v1.Node
-
-func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
-	for _, node := range nodes {
-		if node.Name == nodeName {
-			return &node, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find node: %s", nodeName)
-}
-
-type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
-
-func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
-	for _, pvc := range pvcs {
-		if pvc.Name == pvcID && pvc.Namespace == namespace {
-			return &pvc, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
-}
-
-type FakePersistentVolumeInfo []v1.PersistentVolume
-
-func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
-	for _, pv := range pvs {
-		if pv.Name == pvID {
-			return &pv, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
-}
-
-type FakeStorageClassInfo []storagev1.StorageClass
-
-func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
-	for _, sc := range classes {
-		if sc.Name == name {
-			return &sc, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find storage class: %s", name)
-}
-
 var (
 	extendedResourceA = v1.ResourceName("example.com/aaa")
 	extendedResourceB = v1.ResourceName("example.com/bbb")
diff --git a/plugin/pkg/scheduler/algorithm/predicates/testing_helper.go b/plugin/pkg/scheduler/algorithm/predicates/testing_helper.go
new file mode 100644
index 00000000000..57306c58aad
--- /dev/null
+++ b/plugin/pkg/scheduler/algorithm/predicates/testing_helper.go
@@ -0,0 +1,75 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package predicates
+
+import (
+	"fmt"
+
+	"k8s.io/api/core/v1"
+	storagev1 "k8s.io/api/storage/v1"
+)
+
+type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
+
+func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
+	for _, pvc := range pvcs {
+		if pvc.Name == pvcID && pvc.Namespace == namespace {
+			return &pvc, nil
+		}
+	}
+	return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
+}
+
+type FakeNodeInfo v1.Node
+
+func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
+	node := v1.Node(n)
+	return &node, nil
+}
+
+type FakeNodeListInfo []v1.Node
+
+func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
+	for _, node := range nodes {
+		if node.Name == nodeName {
+			return &node, nil
+		}
+	}
+	return nil, fmt.Errorf("Unable to find node: %s", nodeName)
+}
+
+type FakePersistentVolumeInfo []v1.PersistentVolume
+
+func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
+	for _, pv := range pvs {
+		if pv.Name == pvID {
+			return &pv, nil
+		}
+	}
+	return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
+}
+
+type FakeStorageClassInfo []storagev1.StorageClass
+
+func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
+	for _, sc := range classes {
+		if sc.Name == name {
+			return &sc, nil
+		}
+	}
+	return nil, fmt.Errorf("Unable to find storage class: %s", name)
+}
diff --git a/plugin/pkg/scheduler/algorithm/predicates/utils.go b/plugin/pkg/scheduler/algorithm/predicates/utils.go
index d51f6cd633b..622bdc68359 100644
--- a/plugin/pkg/scheduler/algorithm/predicates/utils.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/utils.go
@@ -19,9 +19,13 @@ package predicates
 import (
 	"strings"
 
+	"github.com/golang/glog"
+
 	"k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	schedutil "k8s.io/kubernetes/plugin/pkg/scheduler/util"
 )
 
@@ -69,28 +73,66 @@ func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
 	return labels.Set(aL).AsSelector()
 }
 
+// EquivalencePodGenerator is a generator of equivalence class for pod with consideration of PVC info.
+type EquivalencePodGenerator struct {
+	pvcInfo PersistentVolumeClaimInfo
+}
+
+// NewEquivalencePodGenerator returns a getEquivalencePod method with consideration of PVC info.
+func NewEquivalencePodGenerator(pvcInfo PersistentVolumeClaimInfo) algorithm.GetEquivalencePodFunc {
+	g := &EquivalencePodGenerator{
+		pvcInfo: pvcInfo,
+	}
+	return g.getEquivalencePod
+}
+
 // GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
-func GetEquivalencePod(pod *v1.Pod) interface{} {
+func (e *EquivalencePodGenerator) getEquivalencePod(pod *v1.Pod) interface{} {
 	// For now we only consider pods:
 	// 1. OwnerReferences is Controller
 	// 2. with same OwnerReferences
+	// 3. with same PVC claim
 	// to be equivalent
-	if len(pod.OwnerReferences) != 0 {
-		for _, ref := range pod.OwnerReferences {
-			if *ref.Controller {
-				// a pod can only belongs to one controller
+	for _, ref := range pod.OwnerReferences {
+		if ref.Controller != nil && *ref.Controller {
+			if pvcSet, err := e.getPVCSet(pod); err == nil {
+				// A pod can only belongs to one controller, so let's return.
 				return &EquivalencePod{
 					ControllerRef: ref,
+					PVCSet:        pvcSet,
 				}
+			} else {
+				// If error encountered, log warning and return nil (i.e. no equivalent pod found)
+				glog.Warningf("[EquivalencePodGenerator] for pod: %v failed due to: %v", pod.GetName(), err)
+				return nil
 			}
 		}
 	}
 	return nil
 }
 
+// getPVCSet returns a set of PVC UIDs of given pod.
+func (e *EquivalencePodGenerator) getPVCSet(pod *v1.Pod) (sets.String, error) {
+	result := sets.NewString()
+	for _, volume := range pod.Spec.Volumes {
+		if volume.PersistentVolumeClaim == nil {
+			continue
+		}
+		pvcName := volume.PersistentVolumeClaim.ClaimName
+		pvc, err := e.pvcInfo.GetPersistentVolumeClaimInfo(pod.GetNamespace(), pvcName)
+		if err != nil {
+			return nil, err
+		}
+		result.Insert(string(pvc.UID))
+	}
+
+	return result, nil
+}
+
 // EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
 type EquivalencePod struct {
 	ControllerRef metav1.OwnerReference
+	PVCSet        sets.String
 }
 
 type hostPortInfo struct {
diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
index 1b7b34d96bd..9860efdd710 100644
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -80,8 +80,12 @@ func init() {
 	// Fit is determined by node selector query.
 	factory.RegisterFitPredicate("MatchNodeSelector", predicates.PodMatchNodeSelector)
 
-	// Use equivalence class to speed up predicates & priorities
-	factory.RegisterGetEquivalencePodFunction(predicates.GetEquivalencePod)
+	// Use equivalence class to speed up heavy predicates phase.
+	factory.RegisterGetEquivalencePodFunction(
+		func(args factory.PluginFactoryArgs) algorithm.GetEquivalencePodFunc {
+			return predicates.NewEquivalencePodGenerator(args.PVCInfo)
+		},
+	)
 
 	// ServiceSpreadingPriority is a priority config factory that spreads pods by minimizing
 	// the number of pods (belonging to the same service) on the same node.
diff --git a/plugin/pkg/scheduler/core/equivalence_cache.go b/plugin/pkg/scheduler/core/equivalence_cache.go
index fafe1298a82..ca27f40d57a 100644
--- a/plugin/pkg/scheduler/core/equivalence_cache.go
+++ b/plugin/pkg/scheduler/core/equivalence_cache.go
@@ -173,7 +173,7 @@ func (ec *EquivalenceCache) InvalidateAllCachedPredicateItemOfNode(nodeName stri
 
 // InvalidateCachedPredicateItemForPodAdd is a wrapper of InvalidateCachedPredicateItem for pod add case
 func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod, nodeName string) {
-	// MatchInterPodAffinity: we assume scheduler can make sure newly binded pod
+	// MatchInterPodAffinity: we assume scheduler can make sure newly bound pod
 	// will not break the existing inter pod affinity. So we does not need to invalidate
 	// MatchInterPodAffinity when pod added.
 	//
@@ -210,7 +210,7 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod,
 
 // getHashEquivalencePod returns the hash of equivalence pod.
 // 1. equivalenceHash
-// 2. if equivalence pod is found
+// 2. if equivalence hash is valid
 func (ec *EquivalenceCache) getHashEquivalencePod(pod *v1.Pod) (uint64, bool) {
 	equivalencePod := ec.getEquivalencePod(pod)
 	if equivalencePod != nil {
diff --git a/plugin/pkg/scheduler/core/equivalence_cache_test.go b/plugin/pkg/scheduler/core/equivalence_cache_test.go
index 20f2ed6d238..3b098c1d2b1 100644
--- a/plugin/pkg/scheduler/core/equivalence_cache_test.go
+++ b/plugin/pkg/scheduler/core/equivalence_cache_test.go
@@ -238,13 +238,37 @@ func TestPredicateWithECache(t *testing.T) {
 }
 
 func TestGetHashEquivalencePod(t *testing.T) {
-	//  use default equivalence class calculator
-	ecache := NewEquivalenceCache(predicates.GetEquivalencePod)
+
+	testNamespace := "test"
+
+	pvcInfo := predicates.FakePersistentVolumeClaimInfo{
+		{
+			ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol1", Name: "someEBSVol1", Namespace: testNamespace},
+			Spec:       v1.PersistentVolumeClaimSpec{VolumeName: "someEBSVol1"},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol2", Name: "someEBSVol2", Namespace: testNamespace},
+			Spec:       v1.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-0", Name: "someEBSVol3-0", Namespace: testNamespace},
+			Spec:       v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
+		},
+		{
+			ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-1", Name: "someEBSVol3-1", Namespace: testNamespace},
+			Spec:       v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
+		},
+	}
+
+	// use default equivalence class generator
+	ecache := NewEquivalenceCache(predicates.NewEquivalencePodGenerator(pvcInfo))
 
 	isController := true
+
 	pod1 := &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod1",
+			Name:      "pod1",
+			Namespace: testNamespace,
 			OwnerReferences: []metav1.OwnerReference{
 				{
 					APIVersion: "v1",
@@ -255,11 +279,30 @@ func TestGetHashEquivalencePod(t *testing.T) {
 				},
 			},
 		},
+		Spec: v1.PodSpec{
+			Volumes: []v1.Volume{
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol1",
+						},
+					},
+				},
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol2",
+						},
+					},
+				},
+			},
+		},
 	}
 
 	pod2 := &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod2",
+			Name:      "pod2",
+			Namespace: testNamespace,
 			OwnerReferences: []metav1.OwnerReference{
 				{
 					APIVersion: "v1",
@@ -270,11 +313,118 @@ func TestGetHashEquivalencePod(t *testing.T) {
 				},
 			},
 		},
+		Spec: v1.PodSpec{
+			Volumes: []v1.Volume{
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol2",
+						},
+					},
+				},
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol1",
+						},
+					},
+				},
+			},
+		},
 	}
 
 	pod3 := &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod3",
+			Name:      "pod3",
+			Namespace: testNamespace,
+			OwnerReferences: []metav1.OwnerReference{
+				{
+					APIVersion: "v1",
+					Kind:       "ReplicationController",
+					Name:       "rc",
+					UID:        "567",
+					Controller: &isController,
+				},
+			},
+		},
+		Spec: v1.PodSpec{
+			Volumes: []v1.Volume{
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol3-1",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	pod4 := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod4",
+			Namespace: testNamespace,
+			OwnerReferences: []metav1.OwnerReference{
+				{
+					APIVersion: "v1",
+					Kind:       "ReplicationController",
+					Name:       "rc",
+					UID:        "567",
+					Controller: &isController,
+				},
+			},
+		},
+		Spec: v1.PodSpec{
+			Volumes: []v1.Volume{
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "someEBSVol3-0",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	pod5 := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod5",
+			Namespace: testNamespace,
+		},
+	}
+
+	pod6 := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod6",
+			Namespace: testNamespace,
+			OwnerReferences: []metav1.OwnerReference{
+				{
+					APIVersion: "v1",
+					Kind:       "ReplicationController",
+					Name:       "rc",
+					UID:        "567",
+					Controller: &isController,
+				},
+			},
+		},
+		Spec: v1.PodSpec{
+			Volumes: []v1.Volume{
+				{
+					VolumeSource: v1.VolumeSource{
+						PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
+							ClaimName: "no-exists-pvc",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	pod7 := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod7",
+			Namespace: testNamespace,
 			OwnerReferences: []metav1.OwnerReference{
 				{
 					APIVersion: "v1",
@@ -287,28 +437,73 @@ func TestGetHashEquivalencePod(t *testing.T) {
 		},
 	}
 
-	hash1, _ := ecache.getHashEquivalencePod(pod1)
-	hash2, _ := ecache.getHashEquivalencePod(pod2)
-	hash3, _ := ecache.getHashEquivalencePod(pod3)
-
-	if hash1 != hash2 {
-		t.Errorf("Failed: pod %v and %v is expected to be equivalent", pod1.Name, pod2.Name)
+	type podInfo struct {
+		pod         *v1.Pod
+		hashIsValid bool
 	}
 
-	if hash2 == hash3 {
-		t.Errorf("Failed: pod %v and %v is not expected to be equivalent", pod2.Name, pod3.Name)
-	}
-
-	// pod4 is a pod without controller ref
-	pod4 := &v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod4",
+	tests := []struct {
+		podInfoList  []podInfo
+		isEquivalent bool
+	}{
+		// pods with same controllerRef and same pvc claim
+		{
+			podInfoList: []podInfo{
+				{pod: pod1, hashIsValid: true},
+				{pod: pod2, hashIsValid: true},
+			},
+			isEquivalent: true,
+		},
+		// pods with same controllerRef but different pvc claim
+		{
+			podInfoList: []podInfo{
+				{pod: pod3, hashIsValid: true},
+				{pod: pod4, hashIsValid: true},
+			},
+			isEquivalent: false,
+		},
+		// pod without controllerRef
+		{
+			podInfoList: []podInfo{
+				{pod: pod5, hashIsValid: false},
+			},
+			isEquivalent: false,
+		},
+		// pods with same controllerRef but one has non-exists pvc claim
+		{
+			podInfoList: []podInfo{
+				{pod: pod6, hashIsValid: false},
+				{pod: pod7, hashIsValid: true},
+			},
+			isEquivalent: false,
 		},
 	}
-	_, found := ecache.getHashEquivalencePod(pod4)
-	if found {
-		t.Errorf("Failed: equivalence hash of pod %v is not expected to be found, but got: %v",
-			pod4.Name, found)
+
+	var (
+		targetPodInfo podInfo
+		targetHash    uint64
+	)
+
+	for _, test := range tests {
+		for i, podInfo := range test.podInfoList {
+			testPod := podInfo.pod
+			hash, isValid := ecache.getHashEquivalencePod(testPod)
+			if isValid != podInfo.hashIsValid {
+				t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
+			}
+			// NOTE(harry): the first element will be used as target so
+			// this logic can't verify more than two inequivalent pods
+			if i == 0 {
+				targetHash = hash
+				targetPodInfo = podInfo
+			} else {
+				if targetHash != hash {
+					if test.isEquivalent {
+						t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
+					}
+				}
+			}
+		}
 	}
 }
 
diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go
index 0c3449590b1..865762388e9 100644
--- a/plugin/pkg/scheduler/factory/factory.go
+++ b/plugin/pkg/scheduler/factory/factory.go
@@ -471,9 +471,11 @@ func (c *configFactory) onPvcDelete(obj interface{}) {
 
 func (c *configFactory) invalidatePredicatesForPvc(pvc *v1.PersistentVolumeClaim) {
 	// We need to do this here because the ecache uses PVC uid as part of equivalence hash of pod
-	// The binded volume type may change
+
+	// The bound volume type may change
 	invalidPredicates := sets.NewString(maxPDVolumeCountPredicateKeys...)
-	// // The binded volume's label may change
+
+	// The bound volume's label may change
 	invalidPredicates.Insert("NoVolumeZoneConflict")
 
 	if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
@@ -491,9 +493,9 @@ func (c *configFactory) invalidatePredicatesForPvcUpdate(old, new *v1.Persistent
 			// PVC volume binding has changed
 			invalidPredicates.Insert(predicates.CheckVolumeBinding)
 		}
-		// The binded volume type may change
+		// The bound volume type may change
 		invalidPredicates.Insert(maxPDVolumeCountPredicateKeys...)
-		// The binded volume's label may change
+		// The bound volume's label may change
 		invalidPredicates.Insert("NoVolumeZoneConflict")
 	}
 
@@ -588,8 +590,8 @@ func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) {
 
 func (c *configFactory) invalidateCachedPredicatesOnUpdatePod(newPod *v1.Pod, oldPod *v1.Pod) {
 	if c.enableEquivalenceClassCache {
-		// if the pod does not have binded node, updating equivalence cache is meaningless;
-		// if pod's binded node has been changed, that case should be handled by pod add & delete.
+		// if the pod does not have bound node, updating equivalence cache is meaningless;
+		// if pod's bound node has been changed, that case should be handled by pod add & delete.
 		if len(newPod.Spec.NodeName) != 0 && newPod.Spec.NodeName == oldPod.Spec.NodeName {
 			if !reflect.DeepEqual(oldPod.GetLabels(), newPod.GetLabels()) {
 				// MatchInterPodAffinity need to be reconsidered for this node,
@@ -920,8 +922,14 @@ func (f *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
 	}
 
 	// Init equivalence class cache
-	if f.enableEquivalenceClassCache && getEquivalencePodFunc != nil {
-		f.equivalencePodCache = core.NewEquivalenceCache(getEquivalencePodFunc)
+	if f.enableEquivalenceClassCache && getEquivalencePodFuncFactory != nil {
+		pluginArgs, err := f.getPluginArgs()
+		if err != nil {
+			return nil, err
+		}
+		f.equivalencePodCache = core.NewEquivalenceCache(
+			getEquivalencePodFuncFactory(*pluginArgs),
+		)
 		glog.Info("Created equivalence class cache")
 	}
 
diff --git a/plugin/pkg/scheduler/factory/plugins.go b/plugin/pkg/scheduler/factory/plugins.go
index a0de0f67fda..6c7a7ab7d5f 100644
--- a/plugin/pkg/scheduler/factory/plugins.go
+++ b/plugin/pkg/scheduler/factory/plugins.go
@@ -76,6 +76,9 @@ type PriorityConfigFactory struct {
 	Weight            int
 }
 
+// EquivalencePodFuncFactory produces a function to get equivalence class for given pod.
+type EquivalencePodFuncFactory func(PluginFactoryArgs) algorithm.GetEquivalencePodFunc
+
 var (
 	schedulerFactoryMutex sync.Mutex
 
@@ -90,7 +93,7 @@ var (
 	predicateMetadataProducer PredicateMetadataProducerFactory
 
 	// get equivalence pod function
-	getEquivalencePodFunc algorithm.GetEquivalencePodFunc
+	getEquivalencePodFuncFactory EquivalencePodFuncFactory
 )
 
 const (
@@ -339,8 +342,9 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
 	return RegisterPriorityConfigFactory(policy.Name, *pcf)
 }
 
-func RegisterGetEquivalencePodFunction(equivalenceFunc algorithm.GetEquivalencePodFunc) {
-	getEquivalencePodFunc = equivalenceFunc
+// RegisterGetEquivalencePodFunction registers equivalenceFuncFactory to produce equivalence class for given pod.
+func RegisterGetEquivalencePodFunction(equivalenceFuncFactory EquivalencePodFuncFactory) {
+	getEquivalencePodFuncFactory = equivalenceFuncFactory
 }
 
 // IsPriorityFunctionRegistered is useful for testing providers.

From b3bb74e3a324bd492b3639e4069187c2eb96a82a Mon Sep 17 00:00:00 2001
From: Harry Zhang <harryz@hyper.sh>
Date: Thu, 30 Nov 2017 09:17:46 +0000
Subject: [PATCH 12/18] Update generated bazel

---
 plugin/pkg/scheduler/algorithm/predicates/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/plugin/pkg/scheduler/algorithm/predicates/BUILD b/plugin/pkg/scheduler/algorithm/predicates/BUILD
index 5ccf2d1f4c4..453fbafb726 100644
--- a/plugin/pkg/scheduler/algorithm/predicates/BUILD
+++ b/plugin/pkg/scheduler/algorithm/predicates/BUILD
@@ -12,6 +12,7 @@ go_library(
         "error.go",
         "metadata.go",
         "predicates.go",
+        "testing_helper.go",
         "utils.go",
     ],
     importpath = "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates",
@@ -34,6 +35,7 @@ go_library(
         "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
         "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
         "//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
+        "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
         "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
         "//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
         "//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",

From 724f926f8c67d9ef358baa1704b8dcffa17eacba Mon Sep 17 00:00:00 2001
From: Maciej Pytel <maciekpytel@google.com>
Date: Mon, 4 Dec 2017 13:08:07 +0100
Subject: [PATCH 13/18] Enable SD custom metric autoscaling e2e on GKE

---
 test/e2e/autoscaling/custom_metrics_autoscaling.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/autoscaling/custom_metrics_autoscaling.go b/test/e2e/autoscaling/custom_metrics_autoscaling.go
index 24c5a09e584..9b9a55889d9 100644
--- a/test/e2e/autoscaling/custom_metrics_autoscaling.go
+++ b/test/e2e/autoscaling/custom_metrics_autoscaling.go
@@ -42,7 +42,7 @@ const (
 
 var _ = SIGDescribe("[HPA] Horizontal pod autoscaling (scale resource: Custom Metrics from Stackdriver)", func() {
 	BeforeEach(func() {
-		framework.SkipUnlessProviderIs("gce")
+		framework.SkipUnlessProviderIs("gce", "gke")
 	})
 
 	f := framework.NewDefaultFramework("horizontal-pod-autoscaling")

From 7a159835bc62f54555d1ee6bcb76ea07b0fc6d63 Mon Sep 17 00:00:00 2001
From: Marian Lobur <loburm@google.com>
Date: Mon, 4 Dec 2017 13:58:25 +0100
Subject: [PATCH 14/18] Add DisabledForLargeClusters tag to audit tests.

Remove this tag once functionality from feature request #53455 is implemented.
---
 test/e2e/auth/audit.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/e2e/auth/audit.go b/test/e2e/auth/audit.go
index c6057f07f3d..f9a0720f651 100644
--- a/test/e2e/auth/audit.go
+++ b/test/e2e/auth/audit.go
@@ -56,7 +56,8 @@ var _ = SIGDescribe("Advanced Audit", func() {
 		framework.SkipUnlessProviderIs("gce")
 	})
 
-	It("should audit API calls", func() {
+	// TODO: Get rid of [DisabledForLargeClusters] when feature request #53455 is ready.
+	It("should audit API calls [DisabledForLargeClusters]", func() {
 		namespace := f.Namespace.Name
 
 		config, err := framework.LoadConfig()

From 514f219c229647ad2a3d6223fbcb596ea032363c Mon Sep 17 00:00:00 2001
From: Hemant Kumar <hekumar@redhat.com>
Date: Fri, 1 Dec 2017 10:15:30 -0500
Subject: [PATCH 15/18] cloud-provider needs cluster-role to apply taint to the
 node

When volume is stuck in attaching state on AWS, cloud-provider
needs to taint the node. But the node can not be tainted
without proper access.
---
 pkg/cloudprovider/providers/aws/aws.go        |  4 +--
 .../authorizer/rbac/bootstrappolicy/policy.go |  8 ++++++
 .../testdata/cluster-role-bindings.yaml       | 17 +++++++++++++
 .../testdata/cluster-roles.yaml               | 25 +++++++++++++++++++
 4 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go
index 034a26b6b7e..ad55f5383d7 100644
--- a/pkg/cloudprovider/providers/aws/aws.go
+++ b/pkg/cloudprovider/providers/aws/aws.go
@@ -1118,11 +1118,11 @@ func newAWSCloud(config io.Reader, awsServices Services) (*Cloud, error) {
 // Initialize passes a Kubernetes clientBuilder interface to the cloud provider
 func (c *Cloud) Initialize(clientBuilder controller.ControllerClientBuilder) {
 	c.clientBuilder = clientBuilder
-	c.kubeClient = clientBuilder.ClientOrDie("cloud-provider")
+	c.kubeClient = clientBuilder.ClientOrDie("aws-cloud-provider")
 	c.eventBroadcaster = record.NewBroadcaster()
 	c.eventBroadcaster.StartLogging(glog.Infof)
 	c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(c.kubeClient.CoreV1().RESTClient()).Events("")})
-	c.eventRecorder = c.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "aws-cloudprovider"})
+	c.eventRecorder = c.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "aws-cloud-provider"})
 }
 
 // Clusters returns the list of clusters.
diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go
index 02c896128aa..a74b0220385 100644
--- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go
+++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go
@@ -422,6 +422,13 @@ func ClusterRoles() []rbac.ClusterRole {
 				eventsRule(),
 			},
 		},
+		{
+			ObjectMeta: metav1.ObjectMeta{Name: "system:aws-cloud-provider"},
+			Rules: []rbac.PolicyRule{
+				rbac.NewRule("get", "patch").Groups(legacyGroup).Resources("nodes").RuleOrDie(),
+				eventsRule(),
+			},
+		},
 		{
 			// a role making the csrapprover controller approve a node client CSR
 			ObjectMeta: metav1.ObjectMeta{Name: "system:certificates.k8s.io:certificatesigningrequests:nodeclient"},
@@ -476,6 +483,7 @@ func ClusterRoleBindings() []rbac.ClusterRoleBinding {
 		rbac.NewClusterBinding("system:kube-controller-manager").Users(user.KubeControllerManager).BindingOrDie(),
 		rbac.NewClusterBinding("system:kube-dns").SAs("kube-system", "kube-dns").BindingOrDie(),
 		rbac.NewClusterBinding("system:kube-scheduler").Users(user.KubeScheduler).BindingOrDie(),
+		rbac.NewClusterBinding("system:aws-cloud-provider").SAs("kube-system", "aws-cloud-provider").BindingOrDie(),
 
 		// This default binding of the system:node role to the system:nodes group is deprecated in 1.7 with the availability of the Node authorizer.
 		// This leaves the binding, but with an empty set of subjects, so that tightening reconciliation can remove the subject.
diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-role-bindings.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-role-bindings.yaml
index 1dbc33dd449..dc25b090ebe 100644
--- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-role-bindings.yaml
+++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-role-bindings.yaml
@@ -17,6 +17,23 @@ items:
   - apiGroup: rbac.authorization.k8s.io
     kind: Group
     name: system:masters
+- apiVersion: rbac.authorization.k8s.io/v1
+  kind: ClusterRoleBinding
+  metadata:
+    annotations:
+      rbac.authorization.kubernetes.io/autoupdate: "true"
+    creationTimestamp: null
+    labels:
+      kubernetes.io/bootstrapping: rbac-defaults
+    name: system:aws-cloud-provider
+  roleRef:
+    apiGroup: rbac.authorization.k8s.io
+    kind: ClusterRole
+    name: system:aws-cloud-provider
+  subjects:
+  - kind: ServiceAccount
+    name: aws-cloud-provider
+    namespace: kube-system
 - apiVersion: rbac.authorization.k8s.io/v1
   kind: ClusterRoleBinding
   metadata:
diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml
index 4db6a8a1130..1e2a36c6289 100644
--- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml
+++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/cluster-roles.yaml
@@ -508,6 +508,31 @@ items:
     - subjectaccessreviews
     verbs:
     - create
+- apiVersion: rbac.authorization.k8s.io/v1
+  kind: ClusterRole
+  metadata:
+    annotations:
+      rbac.authorization.kubernetes.io/autoupdate: "true"
+    creationTimestamp: null
+    labels:
+      kubernetes.io/bootstrapping: rbac-defaults
+    name: system:aws-cloud-provider
+  rules:
+  - apiGroups:
+    - ""
+    resources:
+    - nodes
+    verbs:
+    - get
+    - patch
+  - apiGroups:
+    - ""
+    resources:
+    - events
+    verbs:
+    - create
+    - patch
+    - update
 - apiVersion: rbac.authorization.k8s.io/v1
   kind: ClusterRole
   metadata:

From d832e6ae6316aace481182cb8022e10896f5e68b Mon Sep 17 00:00:00 2001
From: Sandeep Rajan <srajan@infoblox.com>
Date: Mon, 4 Dec 2017 11:57:18 -0500
Subject: [PATCH 16/18] coredns 1.0.0

---
 cmd/kubeadm/app/phases/addons/dns/versions.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmd/kubeadm/app/phases/addons/dns/versions.go b/cmd/kubeadm/app/phases/addons/dns/versions.go
index ed3fcf19609..b68a0b7eee9 100644
--- a/cmd/kubeadm/app/phases/addons/dns/versions.go
+++ b/cmd/kubeadm/app/phases/addons/dns/versions.go
@@ -27,14 +27,14 @@ const (
 
 	kubeDNSProbeSRV = "SRV"
 	kubeDNSProbeA   = "A"
-	coreDNSVersion  = "0.9.10"
+	coreDNSVersion  = "1.0.0"
 )
 
 // GetDNSVersion returns the right kube-dns version for a specific k8s version
 func GetDNSVersion(kubeVersion *version.Version, dns string) string {
 	// v1.8.0+ uses kube-dns 1.14.5
 	// v1.9.0+ uses kube-dns 1.14.7
-	// v1.9.0+ uses CoreDNS  0.9.10
+	// v1.9.0+ uses CoreDNS  1.0.0
 
 	// In the future when the version is bumped at HEAD; add conditional logic to return the right versions
 	// Also, the version might be bumped for different k8s releases on the same branch

From 0e38a0e7dd97e7d95c479cfe9a197ca49ed500ec Mon Sep 17 00:00:00 2001
From: David Ashpole <dashpole@google.com>
Date: Mon, 4 Dec 2017 14:18:32 -0800
Subject: [PATCH 17/18] fake docker client can remove containers which have not
 been started

---
 pkg/kubelet/dockershim/libdocker/fake_client.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pkg/kubelet/dockershim/libdocker/fake_client.go b/pkg/kubelet/dockershim/libdocker/fake_client.go
index 4474fa4a5b6..21e27eba8fc 100644
--- a/pkg/kubelet/dockershim/libdocker/fake_client.go
+++ b/pkg/kubelet/dockershim/libdocker/fake_client.go
@@ -661,6 +661,15 @@ func (f *FakeDockerClient) RemoveContainer(id string, opts dockertypes.Container
 		}
 
 	}
+	for i := range f.RunningContainerList {
+		// allow removal of running containers which are not running
+		if f.RunningContainerList[i].ID == id && !f.ContainerMap[id].State.Running {
+			delete(f.ContainerMap, id)
+			f.RunningContainerList = append(f.RunningContainerList[:i], f.RunningContainerList[i+1:]...)
+			f.appendContainerTrace("Removed", id)
+			return nil
+		}
+	}
 	// To be a good fake, report error if container is not stopped.
 	return fmt.Errorf("container not stopped")
 }

From af42fbd3d086ac2240e50fd132a54607a9a7bdfd Mon Sep 17 00:00:00 2001
From: Minhan Xia <mixia@google.com>
Date: Tue, 5 Dec 2017 11:29:35 -0800
Subject: [PATCH 18/18] fix gce.conf multi-value parameter processing

---
 cluster/gce/container-linux/configure-helper.sh | 17 +++++++++++++----
 cluster/gce/gci/configure-helper.sh             | 17 +++++++++++++----
 pkg/cloudprovider/providers/gce/gce.go          |  3 ++-
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/cluster/gce/container-linux/configure-helper.sh b/cluster/gce/container-linux/configure-helper.sh
index a760917354f..d433865e3c9 100755
--- a/cluster/gce/container-linux/configure-helper.sh
+++ b/cluster/gce/container-linux/configure-helper.sh
@@ -215,14 +215,19 @@ EOF
   if [[ -n "${NODE_INSTANCE_PREFIX:-}" ]]; then
     use_cloud_config="true"
     if [[ -n "${NODE_TAGS:-}" ]]; then
-      local -r node_tags="${NODE_TAGS}"
+      # split NODE_TAGS into an array by comma.
+      IFS=',' read -r -a node_tags <<< ${NODE_TAGS}
     else
       local -r node_tags="${NODE_INSTANCE_PREFIX}"
     fi
     cat <<EOF >>/etc/gce.conf
-node-tags = ${node_tags}
 node-instance-prefix = ${NODE_INSTANCE_PREFIX}
 EOF
+    for tag in ${node_tags[@]}; do
+      cat <<EOF >>/etc/gce.conf
+node-tags = ${tag}
+EOF
+    done
   fi
   if [[ -n "${MULTIZONE:-}" ]]; then
     use_cloud_config="true"
@@ -232,9 +237,13 @@ EOF
   fi
   if [[ -n "${GCE_ALPHA_FEATURES:-}" ]]; then
     use_cloud_config="true"
-    cat <<EOF >>/etc/gce.conf
-alpha-features = ${GCE_ALPHA_FEATURES}
+    # split GCE_ALPHA_FEATURES into an array by comma.
+    IFS=',' read -r -a alpha_features <<< ${GCE_ALPHA_FEATURES}
+    for feature in ${alpha_features[@]}; do
+      cat <<EOF >>/etc/gce.conf
+alpha-features = ${feature}
 EOF
+    done
   fi
   if [[ -n "${SECONDARY_RANGE_NAME:-}" ]]; then
     use_cloud_config="true"
diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh
index 5af388c4d8e..f7518f3c893 100644
--- a/cluster/gce/gci/configure-helper.sh
+++ b/cluster/gce/gci/configure-helper.sh
@@ -585,14 +585,19 @@ EOF
   if [[ -n "${NODE_INSTANCE_PREFIX:-}" ]]; then
     use_cloud_config="true"
     if [[ -n "${NODE_TAGS:-}" ]]; then
-      local -r node_tags="${NODE_TAGS}"
+      # split NODE_TAGS into an array by comma.
+      IFS=',' read -r -a node_tags <<< ${NODE_TAGS}
     else
       local -r node_tags="${NODE_INSTANCE_PREFIX}"
     fi
     cat <<EOF >>/etc/gce.conf
-node-tags = ${node_tags}
 node-instance-prefix = ${NODE_INSTANCE_PREFIX}
 EOF
+    for tag in ${node_tags[@]}; do
+      cat <<EOF >>/etc/gce.conf
+node-tags = ${tag}
+EOF
+    done
   fi
   if [[ -n "${MULTIZONE:-}" ]]; then
     use_cloud_config="true"
@@ -602,9 +607,13 @@ EOF
   fi
   if [[ -n "${GCE_ALPHA_FEATURES:-}" ]]; then
     use_cloud_config="true"
-    cat <<EOF >>/etc/gce.conf
-alpha-features = ${GCE_ALPHA_FEATURES}
+    # split GCE_ALPHA_FEATURES into an array by comma.
+    IFS=',' read -r -a alpha_features <<< ${GCE_ALPHA_FEATURES}
+    for feature in ${alpha_features[@]}; do
+      cat <<EOF >>/etc/gce.conf
+alpha-features = ${feature}
 EOF
+    done
   fi
   if [[ -n "${SECONDARY_RANGE_NAME:-}" ]]; then
     use_cloud_config="true"
diff --git a/pkg/cloudprovider/providers/gce/gce.go b/pkg/cloudprovider/providers/gce/gce.go
index d1cb1092714..8447092f164 100644
--- a/pkg/cloudprovider/providers/gce/gce.go
+++ b/pkg/cloudprovider/providers/gce/gce.go
@@ -149,6 +149,7 @@ type GCECloud struct {
 	AlphaFeatureGate *AlphaFeatureGate
 }
 
+// TODO: replace gcfg with json
 type ConfigGlobal struct {
 	TokenURL  string `gcfg:"token-url"`
 	TokenBody string `gcfg:"token-body"`
@@ -173,7 +174,7 @@ type ConfigGlobal struct {
 	// located in (i.e. where the controller will be running). If this is
 	// blank, then the local zone will be discovered via the metadata server.
 	LocalZone string `gcfg:"local-zone"`
-	// Possible values: List of api names separated by comma. Default to none.
+	// Default to none.
 	// For example: MyFeatureFlag
 	AlphaFeatures []string `gcfg:"alpha-features"`
 }