diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index c32e561d8a0..3098df3c0ea 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -181,6 +181,9 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&o.config.IPTables.MinSyncPeriod.Duration, "iptables-min-sync-period", o.config.IPTables.MinSyncPeriod.Duration, "The minimum interval of how often the iptables rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').") fs.DurationVar(&o.config.IPVS.SyncPeriod.Duration, "ipvs-sync-period", o.config.IPVS.SyncPeriod.Duration, "The maximum interval of how often ipvs rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.") fs.DurationVar(&o.config.IPVS.MinSyncPeriod.Duration, "ipvs-min-sync-period", o.config.IPVS.MinSyncPeriod.Duration, "The minimum interval of how often the ipvs rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').") + fs.DurationVar(&o.config.IPVS.TCPTimeout.Duration, "ipvs-tcp-timeout", o.config.IPVS.TCPTimeout.Duration, "The timeout for idle IPVS TCP connections, 0 to leave as-is. (e.g. '5s', '1m', '2h22m').") + fs.DurationVar(&o.config.IPVS.TCPFinTimeout.Duration, "ipvs-tcpfin-timeout", o.config.IPVS.TCPFinTimeout.Duration, "The timeout for IPVS TCP connections after receiving a FIN packet, 0 to leave as-is. (e.g. '5s', '1m', '2h22m').") + fs.DurationVar(&o.config.IPVS.UDPTimeout.Duration, "ipvs-udp-timeout", o.config.IPVS.UDPTimeout.Duration, "The timeout for IPVS UDP packets, 0 to leave as-is. (e.g. '5s', '1m', '2h22m').") fs.DurationVar(&o.config.Conntrack.TCPEstablishedTimeout.Duration, "conntrack-tcp-timeout-established", o.config.Conntrack.TCPEstablishedTimeout.Duration, "Idle timeout for established TCP connections (0 to leave as-is)") fs.DurationVar( &o.config.Conntrack.TCPCloseWaitTimeout.Duration, "conntrack-tcp-timeout-close-wait", diff --git a/cmd/kube-proxy/app/server_others.go b/cmd/kube-proxy/app/server_others.go index fd622863a0a..52e5fc65b81 100644 --- a/cmd/kube-proxy/app/server_others.go +++ b/cmd/kube-proxy/app/server_others.go @@ -193,6 +193,9 @@ func newProxyServer( config.IPVS.MinSyncPeriod.Duration, config.IPVS.ExcludeCIDRs, config.IPVS.StrictARP, + config.IPVS.TCPTimeout.Duration, + config.IPVS.TCPFinTimeout.Duration, + config.IPVS.UDPTimeout.Duration, config.IPTables.MasqueradeAll, int(*config.IPTables.MasqueradeBit), cidrTuple(config.ClusterCIDR), @@ -214,6 +217,9 @@ func newProxyServer( config.IPVS.MinSyncPeriod.Duration, config.IPVS.ExcludeCIDRs, config.IPVS.StrictARP, + config.IPVS.TCPTimeout.Duration, + config.IPVS.TCPFinTimeout.Duration, + config.IPVS.UDPTimeout.Duration, config.IPTables.MasqueradeAll, int(*config.IPTables.MasqueradeBit), config.ClusterCIDR, diff --git a/cmd/kubeadm/app/componentconfigs/kubeproxy_test.go b/cmd/kubeadm/app/componentconfigs/kubeproxy_test.go index a3ab50d7e0a..b298697bdea 100644 --- a/cmd/kubeadm/app/componentconfigs/kubeproxy_test.go +++ b/cmd/kubeadm/app/componentconfigs/kubeproxy_test.go @@ -77,6 +77,9 @@ var kubeProxyMarshalCases = []struct { scheduler: "" strictARP: false syncPeriod: 0s + tcpFinTimeout: 0s + tcpTimeout: 0s + udpTimeout: 0s kind: KubeProxyConfiguration metricsBindAddress: "" mode: "" @@ -128,6 +131,9 @@ var kubeProxyMarshalCases = []struct { scheduler: "" strictARP: false syncPeriod: 0s + tcpFinTimeout: 0s + tcpTimeout: 0s + udpTimeout: 0s kind: KubeProxyConfiguration metricsBindAddress: "" mode: "" diff --git a/kind.yaml b/kind.yaml new file mode 100644 index 00000000000..2804cebd2c8 --- /dev/null +++ b/kind.yaml @@ -0,0 +1,32 @@ +# three node (two workers) cluster config +kind: Cluster +apiVersion: kind.sigs.k8s.io/v1alpha3 +nodes: +- role: control-plane +- role: worker +- role: worker +kubeadmConfigPatches: +- | + apiVersion: kubeadm.k8s.io/v1beta2 + kind: ClusterConfiguration + metadata: + name: config + apiServer: + extraArgs: + "feature-gates": "EndpointSlice=true,ServiceTopology=true" + scheduler: + extraArgs: + "feature-gates": "EndpointSlice=true,ServiceTopology=true" + controllerManager: + extraArgs: + "feature-gates": "EndpointSlice=true,ServiceTopology=true" +- | + apiVersion: kubeadm.k8s.io/v1beta2 + kind: InitConfiguration + metadata: + name: config + nodeRegistration: + kubeletExtraArgs: + "feature-gates": "EndpointSlice=true,ServiceTopology=true" +# 1 control plane node and 3 workers + diff --git a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/__internal.yaml b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/__internal.yaml index c20883b95ac..70227337684 100755 --- a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/__internal.yaml +++ b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/__internal.yaml @@ -27,6 +27,9 @@ IPVS: Scheduler: "" StrictARP: false SyncPeriod: 0s + TCPFinTimeout: 0s + TCPTimeout: 0s + UDPTimeout: 0s MetricsBindAddress: "" Mode: "" NodePortAddresses: null diff --git a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/v1alpha1.yaml b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/v1alpha1.yaml index 20a467ec023..5271fe62dd9 100755 --- a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/v1alpha1.yaml +++ b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/after/v1alpha1.yaml @@ -27,6 +27,9 @@ ipvs: scheduler: "" strictARP: false syncPeriod: 30s + tcpFinTimeout: 0s + tcpTimeout: 0s + udpTimeout: 0s kind: KubeProxyConfiguration metricsBindAddress: 127.0.0.1:10249 mode: "" diff --git a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1To__internal/empty.yaml.after_roundtrip b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1To__internal/empty.yaml.after_roundtrip index 341efcd3235..db7b93a2c83 100755 --- a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1To__internal/empty.yaml.after_roundtrip +++ b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1To__internal/empty.yaml.after_roundtrip @@ -27,6 +27,9 @@ IPVS: Scheduler: "" StrictARP: false SyncPeriod: 30s + TCPFinTimeout: 0s + TCPTimeout: 0s + UDPTimeout: 0s MetricsBindAddress: 127.0.0.1:10249 Mode: "" NodePortAddresses: null diff --git a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1Tov1alpha1/empty.yaml.after_roundtrip b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1Tov1alpha1/empty.yaml.after_roundtrip index 20a467ec023..5271fe62dd9 100755 --- a/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1Tov1alpha1/empty.yaml.after_roundtrip +++ b/pkg/proxy/apis/config/scheme/testdata/KubeProxyConfiguration/v1alpha1Tov1alpha1/empty.yaml.after_roundtrip @@ -27,6 +27,9 @@ ipvs: scheduler: "" strictARP: false syncPeriod: 30s + tcpFinTimeout: 0s + tcpTimeout: 0s + udpTimeout: 0s kind: KubeProxyConfiguration metricsBindAddress: 127.0.0.1:10249 mode: "" diff --git a/pkg/proxy/apis/config/types.go b/pkg/proxy/apis/config/types.go index b1314fc2e88..dd00dd12e5e 100644 --- a/pkg/proxy/apis/config/types.go +++ b/pkg/proxy/apis/config/types.go @@ -58,6 +58,15 @@ type KubeProxyIPVSConfiguration struct { // strict ARP configure arp_ignore and arp_announce to avoid answering ARP queries // from kube-ipvs0 interface StrictARP bool + // tcpTimeout is the timeout value used for idle IPVS TCP sessions. + // The default value is 0, which preserves the current timeout value on the system. + TCPTimeout metav1.Duration + // tcpFinTimeout is the timeout value used for IPVS TCP sessions after receiving a FIN. + // The default value is 0, which preserves the current timeout value on the system. + TCPFinTimeout metav1.Duration + // udpTimeout is the timeout value used for IPVS UDP packets. + // The default value is 0, which preserves the current timeout value on the system. + UDPTimeout metav1.Duration } // KubeProxyConntrackConfiguration contains conntrack settings for diff --git a/pkg/proxy/apis/config/v1alpha1/zz_generated.conversion.go b/pkg/proxy/apis/config/v1alpha1/zz_generated.conversion.go index 0f6248af45c..ca2fcdea112 100644 --- a/pkg/proxy/apis/config/v1alpha1/zz_generated.conversion.go +++ b/pkg/proxy/apis/config/v1alpha1/zz_generated.conversion.go @@ -223,6 +223,9 @@ func autoConvert_v1alpha1_KubeProxyIPVSConfiguration_To_config_KubeProxyIPVSConf out.Scheduler = in.Scheduler out.ExcludeCIDRs = *(*[]string)(unsafe.Pointer(&in.ExcludeCIDRs)) out.StrictARP = in.StrictARP + out.TCPTimeout = in.TCPTimeout + out.TCPFinTimeout = in.TCPFinTimeout + out.UDPTimeout = in.UDPTimeout return nil } @@ -237,6 +240,9 @@ func autoConvert_config_KubeProxyIPVSConfiguration_To_v1alpha1_KubeProxyIPVSConf out.Scheduler = in.Scheduler out.ExcludeCIDRs = *(*[]string)(unsafe.Pointer(&in.ExcludeCIDRs)) out.StrictARP = in.StrictARP + out.TCPTimeout = in.TCPTimeout + out.TCPFinTimeout = in.TCPFinTimeout + out.UDPTimeout = in.UDPTimeout return nil } diff --git a/pkg/proxy/apis/config/zz_generated.deepcopy.go b/pkg/proxy/apis/config/zz_generated.deepcopy.go index 2745d60c203..d35f7989c12 100644 --- a/pkg/proxy/apis/config/zz_generated.deepcopy.go +++ b/pkg/proxy/apis/config/zz_generated.deepcopy.go @@ -165,6 +165,9 @@ func (in *KubeProxyIPVSConfiguration) DeepCopyInto(out *KubeProxyIPVSConfigurati *out = make([]string, len(*in)) copy(*out, *in) } + out.TCPTimeout = in.TCPTimeout + out.TCPFinTimeout = in.TCPFinTimeout + out.UDPTimeout = in.UDPTimeout return } diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 45505b6b0db..e83f0647be5 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -327,6 +327,9 @@ func NewProxier(ipt utiliptables.Interface, minSyncPeriod time.Duration, excludeCIDRs []string, strictARP bool, + tcpTimeout time.Duration, + tcpFinTimeout time.Duration, + udpTimeout time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, @@ -402,6 +405,15 @@ func NewProxier(ipt utiliptables.Interface, } } + // Configure IPVS timeouts if any one of the timeout parameters have been set. + // This is the equivalent to running ipvsadm --set, a value of 0 indicates the + // current system timeout should be preserved + if tcpTimeout > 0 || tcpFinTimeout > 0 || udpTimeout > 0 { + if err := ipvs.ConfigureTimeouts(tcpTimeout, tcpFinTimeout, udpTimeout); err != nil { + klog.Warningf("failed to configure IPVS timeouts: %v", err) + } + } + // Generate the masquerade mark to use for SNAT rules. masqueradeValue := 1 << uint(masqueradeBit) masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue) @@ -483,6 +495,9 @@ func NewDualStackProxier( minSyncPeriod time.Duration, excludeCIDRs []string, strictARP bool, + tcpTimeout time.Duration, + tcpFinTimeout time.Duration, + udpTimeout time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR [2]string, @@ -499,7 +514,8 @@ func NewDualStackProxier( // Create an ipv4 instance of the single-stack proxier ipv4Proxier, err := NewProxier(ipt[0], ipvs, safeIpset, sysctl, exec, syncPeriod, minSyncPeriod, filterCIDRs(false, excludeCIDRs), strictARP, - masqueradeAll, masqueradeBit, clusterCIDR[0], hostname, nodeIP[0], + tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit, + clusterCIDR[0], hostname, nodeIP[0], recorder, healthzServer, scheduler, nodePortAddresses) if err != nil { return nil, fmt.Errorf("unable to create ipv4 proxier: %v", err) @@ -507,7 +523,8 @@ func NewDualStackProxier( ipv6Proxier, err := NewProxier(ipt[1], ipvs, safeIpset, sysctl, exec, syncPeriod, minSyncPeriod, filterCIDRs(true, excludeCIDRs), strictARP, - masqueradeAll, masqueradeBit, clusterCIDR[1], hostname, nodeIP[1], + tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit, + clusterCIDR[1], hostname, nodeIP[1], nil, nil, scheduler, nodePortAddresses) if err != nil { return nil, fmt.Errorf("unable to create ipv6 proxier: %v", err) diff --git a/pkg/util/ipvs/ipvs.go b/pkg/util/ipvs/ipvs.go index 5e7bacc8c6b..13eef323c70 100644 --- a/pkg/util/ipvs/ipvs.go +++ b/pkg/util/ipvs/ipvs.go @@ -19,6 +19,7 @@ package ipvs import ( "net" "strconv" + "time" "k8s.io/apimachinery/pkg/util/version" ) @@ -45,6 +46,8 @@ type Interface interface { DeleteRealServer(*VirtualServer, *RealServer) error // UpdateRealServer updates the specified real server from the specified virtual server. UpdateRealServer(*VirtualServer, *RealServer) error + // ConfigureTimeouts is the equivalent to running "ipvsadm --set" to configure tcp, tcpfin and udp timeouts + ConfigureTimeouts(time.Duration, time.Duration, time.Duration) error } // VirtualServer is an user-oriented definition of an IPVS virtual server in its entirety. diff --git a/pkg/util/ipvs/ipvs_linux.go b/pkg/util/ipvs/ipvs_linux.go index 54dac294262..9acfc570a4a 100644 --- a/pkg/util/ipvs/ipvs_linux.go +++ b/pkg/util/ipvs/ipvs_linux.go @@ -25,6 +25,7 @@ import ( "strings" "sync" "syscall" + "time" libipvs "github.com/docker/libnetwork/ipvs" "k8s.io/klog" @@ -201,6 +202,17 @@ func (runner *runner) GetRealServers(vs *VirtualServer) ([]*RealServer, error) { return rss, nil } +// ConfigureTimeouts is the equivalent to running "ipvsadm --set" to configure tcp, tcpfin and udp timeouts +func (runner *runner) ConfigureTimeouts(tcpTimeout, tcpFinTimeout, udpTimeout time.Duration) error { + ipvsConfig := &libipvs.Config{ + TimeoutTCP: tcpTimeout, + TimeoutTCPFin: tcpFinTimeout, + TimeoutUDP: udpTimeout, + } + + return runner.ipvsHandle.SetConfig(ipvsConfig) +} + // toVirtualServer converts an IPVS Service to the equivalent VirtualServer structure. func toVirtualServer(svc *libipvs.Service) (*VirtualServer, error) { if svc == nil { diff --git a/pkg/util/ipvs/ipvs_unsupported.go b/pkg/util/ipvs/ipvs_unsupported.go index 86447d57c59..31a86eb25c5 100644 --- a/pkg/util/ipvs/ipvs_unsupported.go +++ b/pkg/util/ipvs/ipvs_unsupported.go @@ -20,6 +20,7 @@ package ipvs import ( "fmt" + "time" utilexec "k8s.io/utils/exec" ) @@ -72,4 +73,8 @@ func (runner *runner) UpdateRealServer(*VirtualServer, *RealServer) error { return fmt.Errorf("IPVS not supported for this platform") } +func (runner *runner) ConfigureTimeouts(time.Duration, time.Duration, time.Duration) error { + return fmt.Errorf("IPVS not supported for this platform") +} + var _ = Interface(&runner{}) diff --git a/pkg/util/ipvs/testing/fake.go b/pkg/util/ipvs/testing/fake.go index 14d16e893d7..f9badbd282a 100644 --- a/pkg/util/ipvs/testing/fake.go +++ b/pkg/util/ipvs/testing/fake.go @@ -20,6 +20,7 @@ import ( "fmt" "net" "strconv" + "time" utilipvs "k8s.io/kubernetes/pkg/util/ipvs" ) @@ -204,4 +205,9 @@ func (f *FakeIPVS) UpdateRealServer(serv *utilipvs.VirtualServer, dest *utilipvs return f.AddRealServer(serv, dest) } +// ConfigureTimeouts is not supported for fake IPVS +func (f *FakeIPVS) ConfigureTimeouts(time.Duration, time.Duration, time.Duration) error { + return fmt.Errorf("not supported in fake IPVS") +} + var _ = utilipvs.Interface(&FakeIPVS{}) diff --git a/staging/src/k8s.io/kube-proxy/config/v1alpha1/types.go b/staging/src/k8s.io/kube-proxy/config/v1alpha1/types.go index 8bc7894e481..48435c642ca 100644 --- a/staging/src/k8s.io/kube-proxy/config/v1alpha1/types.go +++ b/staging/src/k8s.io/kube-proxy/config/v1alpha1/types.go @@ -54,6 +54,15 @@ type KubeProxyIPVSConfiguration struct { // strict ARP configure arp_ignore and arp_announce to avoid answering ARP queries // from kube-ipvs0 interface StrictARP bool `json:"strictARP"` + // tcpTimeout is the timeout value used for idle IPVS TCP sessions. + // The default value is 0, which preserves the current timeout value on the system. + TCPTimeout metav1.Duration `json:"tcpTimeout"` + // tcpFinTimeout is the timeout value used for IPVS TCP sessions after receiving a FIN. + // The default value is 0, which preserves the current timeout value on the system. + TCPFinTimeout metav1.Duration `json:"tcpFinTimeout"` + // udpTimeout is the timeout value used for IPVS UDP packets. + // The default value is 0, which preserves the current timeout value on the system. + UDPTimeout metav1.Duration `json:"udpTimeout"` } // KubeProxyConntrackConfiguration contains conntrack settings for diff --git a/staging/src/k8s.io/kube-proxy/config/v1alpha1/zz_generated.deepcopy.go b/staging/src/k8s.io/kube-proxy/config/v1alpha1/zz_generated.deepcopy.go index 5241f4820d2..307e068c2bf 100644 --- a/staging/src/k8s.io/kube-proxy/config/v1alpha1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kube-proxy/config/v1alpha1/zz_generated.deepcopy.go @@ -143,6 +143,9 @@ func (in *KubeProxyIPVSConfiguration) DeepCopyInto(out *KubeProxyIPVSConfigurati *out = make([]string, len(*in)) copy(*out, *in) } + out.TCPTimeout = in.TCPTimeout + out.TCPFinTimeout = in.TCPFinTimeout + out.UDPTimeout = in.UDPTimeout return }