mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #56718 from foxish/fix-network-partition-test-gce-2
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix for the network partition tests Fix https://github.com/kubernetes/kubernetes/issues/56416 The underlying issue was that after cluster upgrade, the nodes talk to the master using the in-cluster IP. The IPTables rules used for blocking were thus far only effective when the nodes used the external network interface. Reasoning: api-server.log [from gce upgrade cluster](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-e2e-gce-stable1-beta-upgrade-cluster-new/35/artifacts/bootstrap-e2e-master/kube-apiserver.log) > I1201 13:56:34.287956 5 wrap.go:42] PATCH /api/v1/nodes/bootstrap-e2e-minion-group-hv6p/status: (18.100082ms) 200 [[node-problem-detector/v1.4.0 (linux/amd64) kubernetes/$Format] **10.128.0.4:53766**] > I1201 13:56:34.287956 5 wrap.go:42] PATCH /api/v1/nodes/bootstrap-e2e-minion-group-hv6p/status: (18.100082ms) 200 [[node-problem-detector/v1.4.0 (linux/amd64) kubernetes/$Format] **10.128.0.4:53766**] > I1201 13:56:34.515042 5 wrap.go:42] PATCH /api/v1/nodes/bootstrap-e2e-master/status: (4.327563ms) 200 [[kubelet/v1.9.0 (linux/amd64) kubernetes/e067596] **10.128.0.2:41898**] api-server.log [from gce serial](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-e2e-gce-cos-k8sbeta-serial/70/artifacts/test-34cf3ed1e3-master/kube-apiserver.log) > I1201 15:59:46.863961 5 wrap.go:42] GET /api/v1/nodes/test-34cf3ed1e3-minion-group-zr99?resourceVersion=0: (926.753µs) 200 [[kubelet/v1.9.0 (linux/amd64) kubernetes/e067596] **104.154.254.154:40220**] > I1201 15:59:46.881810 5 wrap.go:42] PATCH /api/v1/nodes/test-34cf3ed1e3-minion-group-zr99/status: (10.157704ms) 200 [[kubelet/v1.9.0 (linux/amd64) kubernetes/e067596] **104.154.254.154:40220**] The underlying issue is one of cluster setup - but we can make the test more resilient with this change. cc @krzyzacy @spiffxp @enisoc @jberkus @kubernetes/sig-autoscaling-misc
This commit is contained in:
commit
a5d2a025b7
@ -935,7 +935,9 @@ func TestHitNodesFromOutsideWithCount(externalIP string, httpPort int32, timeout
|
||||
// This function executes commands on a node so it will work only for some
|
||||
// environments.
|
||||
func TestUnderTemporaryNetworkFailure(c clientset.Interface, ns string, node *v1.Node, testFunc func()) {
|
||||
host := GetNodeExternalIP(node)
|
||||
externalIP := GetNodeExternalIP(node)
|
||||
internalIP := GetNodeInternalIP(node)
|
||||
|
||||
master := GetMasterAddress(c)
|
||||
By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))
|
||||
defer func() {
|
||||
@ -944,14 +946,16 @@ func TestUnderTemporaryNetworkFailure(c clientset.Interface, ns string, node *v1
|
||||
// had been inserted. (yes, we could look at the error code and ssh error
|
||||
// separately, but I prefer to stay on the safe side).
|
||||
By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name))
|
||||
UnblockNetwork(host, master)
|
||||
UnblockNetwork(externalIP, master)
|
||||
UnblockNetwork(internalIP, master)
|
||||
}()
|
||||
|
||||
Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name)
|
||||
if !WaitForNodeToBe(c, node.Name, v1.NodeReady, true, resizeNodeReadyTimeout) {
|
||||
Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
|
||||
}
|
||||
BlockNetwork(host, master)
|
||||
BlockNetwork(externalIP, master)
|
||||
BlockNetwork(internalIP, master)
|
||||
|
||||
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
|
||||
if !WaitForNodeToBe(c, node.Name, v1.NodeReady, false, resizeNodeNotReadyTimeout) {
|
||||
|
@ -4925,6 +4925,23 @@ func GetNodeExternalIP(node *v1.Node) string {
|
||||
return host
|
||||
}
|
||||
|
||||
// GetNodeInternalIP returns node internal IP concatenated with port 22 for ssh
|
||||
// e.g. 1.2.3.4:22
|
||||
func GetNodeInternalIP(node *v1.Node) string {
|
||||
Logf("Getting internal IP address for %s", node.Name)
|
||||
host := ""
|
||||
for _, a := range node.Status.Addresses {
|
||||
if a.Type == v1.NodeInternalIP {
|
||||
host = net.JoinHostPort(a.Address, sshPort)
|
||||
break
|
||||
}
|
||||
}
|
||||
if host == "" {
|
||||
Failf("Couldn't get the internal IP of host %s with addresses %v", node.Name, node.Status.Addresses)
|
||||
}
|
||||
return host
|
||||
}
|
||||
|
||||
// SimpleGET executes a get on the given url, returns error if non-200 returned.
|
||||
func SimpleGET(c *http.Client, url, host string) (string, error) {
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
|
Loading…
Reference in New Issue
Block a user