From 7b6e4e4d8b12bfa0770f649611c4968b610d2b87 Mon Sep 17 00:00:00 2001 From: daschott Date: Tue, 1 Nov 2022 16:42:07 -0700 Subject: [PATCH] added retries to winkernel proxy rules deletion --- pkg/proxy/winkernel/hns.go | 6 +++ pkg/proxy/winkernel/hns_test.go | 75 +++++++++++++++++++++++++++++++++ pkg/proxy/winkernel/proxier.go | 45 ++++++++++++++------ 3 files changed, 114 insertions(+), 12 deletions(-) diff --git a/pkg/proxy/winkernel/hns.go b/pkg/proxy/winkernel/hns.go index 443dfb09572..9d3889d60cc 100644 --- a/pkg/proxy/winkernel/hns.go +++ b/pkg/proxy/winkernel/hns.go @@ -367,6 +367,12 @@ func (hns hns) deleteLoadBalancer(hnsID string) error { } err = lb.Delete() + if err != nil { + // There is a bug in Windows Server 2019, that can cause the delete call to fail sometimes. We retry one more time. + // TODO: The logic in syncProxyRules should be rewritten in the future to better stage and handle a call like this failing using the policyApplied fields. + klog.V(1).ErrorS(err, "Error deleting Hns loadbalancer policy resource. Attempting one more time...", "loadBalancer", lb) + return lb.Delete() + } return err } diff --git a/pkg/proxy/winkernel/hns_test.go b/pkg/proxy/winkernel/hns_test.go index f18fb40d1e1..b310c3e73f1 100644 --- a/pkg/proxy/winkernel/hns_test.go +++ b/pkg/proxy/winkernel/hns_test.go @@ -37,6 +37,7 @@ const ( gatewayAddress = "192.168.1.1" epMacAddress = "00-11-22-33-44-55" epIpAddress = "192.168.1.3" + epIpAddressB = "192.168.1.4" epIpAddressRemote = "192.168.2.3" epPaAddress = "10.0.0.3" protocol = 6 @@ -457,6 +458,80 @@ func mustTestNetwork(t *testing.T) *hcn.HostComputeNetwork { return network } +func TestHashEndpoints(t *testing.T) { + Network := mustTestNetwork(t) + // Create endpoint A + ipConfigA := &hcn.IpConfig{ + IpAddress: epIpAddress, + } + endpointASpec := &hcn.HostComputeEndpoint{ + IpConfigurations: []hcn.IpConfig{*ipConfigA}, + MacAddress: epMacAddress, + SchemaVersion: hcn.SchemaVersion{ + Major: 2, + Minor: 0, + }, + } + endpointA, err := Network.CreateEndpoint(endpointASpec) + if err != nil { + t.Error(err) + } + endpointInfoA := &endpointsInfo{ + ip: endpointA.IpConfigurations[0].IpAddress, + hnsID: endpointA.Id, + } + // Create Endpoint B + ipConfigB := &hcn.IpConfig{ + IpAddress: epIpAddressB, + } + endpointBSpec := &hcn.HostComputeEndpoint{ + IpConfigurations: []hcn.IpConfig{*ipConfigB}, + MacAddress: epMacAddress, + SchemaVersion: hcn.SchemaVersion{ + Major: 2, + Minor: 0, + }, + } + endpointB, err := Network.CreateEndpoint(endpointBSpec) + if err != nil { + t.Error(err) + } + endpointInfoB := &endpointsInfo{ + ip: endpointB.IpConfigurations[0].IpAddress, + hnsID: endpointB.Id, + } + endpoints := []endpointsInfo{*endpointInfoA, *endpointInfoB} + endpointsReverse := []endpointsInfo{*endpointInfoB, *endpointInfoA} + h1, err := hashEndpoints(endpoints) + if err != nil { + t.Error(err) + } else if len(h1) < 1 { + t.Error("HashEndpoints failed for endpoints", endpoints) + } + + h2, err := hashEndpoints(endpointsReverse) + if err != nil { + t.Error(err) + } + if h1 != h2 { + t.Errorf("%x does not match %x", h1, h2) + } + + // Clean up + err = endpointA.Delete() + if err != nil { + t.Error(err) + } + err = endpointB.Delete() + if err != nil { + t.Error(err) + } + err = Network.Delete() + if err != nil { + t.Error(err) + } +} + func createTestNetwork() (*hcn.HostComputeNetwork, error) { network := &hcn.HostComputeNetwork{ Type: NETWORK_TYPE_OVERLAY, diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index 38e0ec67cb3..235ea7680d2 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -783,7 +783,7 @@ func CleanupLeftovers() (encounteredError bool) { func (svcInfo *serviceInfo) cleanupAllPolicies(endpoints []proxy.Endpoint) { klog.V(3).InfoS("Service cleanup", "serviceInfo", svcInfo) // Skip the svcInfo.policyApplied check to remove all the policies - svcInfo.deleteAllHnsLoadBalancerPolicy() + svcInfo.deleteLoadBalancerPolicy() // Cleanup Endpoints references for _, ep := range endpoints { epInfo, ok := ep.(*endpointsInfo) @@ -798,25 +798,46 @@ func (svcInfo *serviceInfo) cleanupAllPolicies(endpoints []proxy.Endpoint) { svcInfo.policyApplied = false } -func (svcInfo *serviceInfo) deleteAllHnsLoadBalancerPolicy() { +func (svcInfo *serviceInfo) deleteLoadBalancerPolicy() { // Remove the Hns Policy corresponding to this service hns := svcInfo.hns - hns.deleteLoadBalancer(svcInfo.hnsID) - svcInfo.hnsID = "" + if err := hns.deleteLoadBalancer(svcInfo.hnsID); err != nil { + klog.V(1).ErrorS(err, "Error deleting Hns loadbalancer policy resource.", "hnsID", svcInfo.hnsID, "ClusterIP", svcInfo.ClusterIP()) + } else { + // On successful delete, remove hnsId + svcInfo.hnsID = "" + } - hns.deleteLoadBalancer(svcInfo.nodePorthnsID) - svcInfo.nodePorthnsID = "" + if err := hns.deleteLoadBalancer(svcInfo.nodePorthnsID); err != nil { + klog.V(1).ErrorS(err, "Error deleting Hns NodePort policy resource.", "hnsID", svcInfo.nodePorthnsID, "NodePort", svcInfo.NodePort()) + } else { + // On successful delete, remove hnsId + svcInfo.nodePorthnsID = "" + } for _, externalIP := range svcInfo.externalIPs { - hns.deleteLoadBalancer(externalIP.hnsID) - externalIP.hnsID = "" + if err := hns.deleteLoadBalancer(externalIP.hnsID); err != nil { + klog.V(1).ErrorS(err, "Error deleting Hns ExternalIP policy resource.", "hnsID", externalIP.hnsID, "IP", externalIP.ip) + } else { + // On successful delete, remove hnsId + externalIP.hnsID = "" + } } for _, lbIngressIP := range svcInfo.loadBalancerIngressIPs { - hns.deleteLoadBalancer(lbIngressIP.hnsID) - lbIngressIP.hnsID = "" + if err := hns.deleteLoadBalancer(lbIngressIP.hnsID); err != nil { + klog.V(1).ErrorS(err, "Error deleting Hns IngressIP policy resource.", "hnsID", lbIngressIP.hnsID, "IP", lbIngressIP.ip) + } else { + // On successful delete, remove hnsId + lbIngressIP.hnsID = "" + } + if lbIngressIP.healthCheckHnsID != "" { - hns.deleteLoadBalancer(lbIngressIP.healthCheckHnsID) - lbIngressIP.healthCheckHnsID = "" + if err := hns.deleteLoadBalancer(lbIngressIP.healthCheckHnsID); err != nil { + klog.V(1).ErrorS(err, "Error deleting Hns IngressIP HealthCheck policy resource.", "hnsID", lbIngressIP.healthCheckHnsID, "IP", lbIngressIP.ip) + } else { + // On successful delete, remove hnsId + lbIngressIP.healthCheckHnsID = "" + } } } }