diff --git a/cmd/kube-proxy/app/server_linux.go b/cmd/kube-proxy/app/server_linux.go index 46873e821ea..56fb9d5e45e 100644 --- a/cmd/kube-proxy/app/server_linux.go +++ b/cmd/kube-proxy/app/server_linux.go @@ -36,13 +36,11 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" - utilfeature "k8s.io/apiserver/pkg/util/feature" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" toolswatch "k8s.io/client-go/tools/watch" utilsysctl "k8s.io/component-helpers/node/util/sysctl" "k8s.io/klog/v2" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" proxyconfigapi "k8s.io/kubernetes/pkg/proxy/apis/config" "k8s.io/kubernetes/pkg/proxy/iptables" @@ -527,11 +525,9 @@ func platformCleanup(ctx context.Context, mode proxyconfigapi.ProxyMode, cleanup } } - if utilfeature.DefaultFeatureGate.Enabled(features.NFTablesProxyMode) { - // Clean up nftables rules when switching to iptables or ipvs, or if cleanupAndExit - if isIPTablesBased(mode) || cleanupAndExit { - encounteredError = nftables.CleanupLeftovers(ctx) || encounteredError - } + // Clean up nftables rules when switching to iptables or ipvs, or if cleanupAndExit + if isIPTablesBased(mode) || cleanupAndExit { + encounteredError = nftables.CleanupLeftovers(ctx) || encounteredError } if encounteredError { diff --git a/pkg/features/versioned_kube_features.go b/pkg/features/versioned_kube_features.go index 79b7ff156a0..163de6476fa 100644 --- a/pkg/features/versioned_kube_features.go +++ b/pkg/features/versioned_kube_features.go @@ -540,6 +540,7 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate NFTablesProxyMode: { {Version: version.MustParse("1.29"), Default: false, PreRelease: featuregate.Alpha}, {Version: version.MustParse("1.31"), Default: true, PreRelease: featuregate.Beta}, + {Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, }, NodeInclusionPolicyInPodTopologySpread: { diff --git a/pkg/proxy/apis/config/validation/validation.go b/pkg/proxy/apis/config/validation/validation.go index 3568f50b29e..6ce772d2f2c 100644 --- a/pkg/proxy/apis/config/validation/validation.go +++ b/pkg/proxy/apis/config/validation/validation.go @@ -30,7 +30,6 @@ import ( logsapi "k8s.io/component-base/logs/api/v1" "k8s.io/component-base/metrics" apivalidation "k8s.io/kubernetes/pkg/apis/core/validation" - "k8s.io/kubernetes/pkg/features" kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config" netutils "k8s.io/utils/net" ) @@ -173,12 +172,9 @@ func validateProxyModeLinux(mode kubeproxyconfig.ProxyMode, fldPath *field.Path) validModes := sets.New[string]( string(kubeproxyconfig.ProxyModeIPTables), string(kubeproxyconfig.ProxyModeIPVS), + string(kubeproxyconfig.ProxyModeNFTables), ) - if utilfeature.DefaultFeatureGate.Enabled(features.NFTablesProxyMode) { - validModes.Insert(string(kubeproxyconfig.ProxyModeNFTables)) - } - if mode == "" || validModes.Has(string(mode)) { return nil } diff --git a/pkg/proxy/nftables/README.md b/pkg/proxy/nftables/README.md index 57e0369ad66..d97fb872179 100644 --- a/pkg/proxy/nftables/README.md +++ b/pkg/proxy/nftables/README.md @@ -106,4 +106,34 @@ This is implemented as follows: rule for ClusterIPs belonging to any of the ServiceCIDRs in `forward` and `output` hook, with a higher (i.e. less urgent) priority than the DNAT chains making sure all valid traffic directed for ClusterIPs is already DNATed. Drop rule will only - be installed if `MultiCIDRServiceAllocator` feature is enabled. \ No newline at end of file + be installed if `MultiCIDRServiceAllocator` feature is enabled. + +## Integrating with kube-proxy's nftables mode + +Implementations of pod networking, NetworkPolicy, service meshes, etc, may need to be +aware of some slightly lower-level details of kube-proxy's implementation. + +Components other than kube-proxy should *never* make any modifications to the +`kube-proxy` nftables table, or any of the chains, sets, maps, etc, within it. Every +component should create its own table and only work within that table. However, +you can ensure that rules in your own table will run before or after kube-proxy's rules +by setting appropriate `priority` values for your base chains. In particular: + + - Service traffic that needs to be DNATted will be DNATted by kube-proxy on a chain of + `type nat` with `priority dstnat` and either `hook output` (for traffic on the + "output" path) or `hook prerouting` (for traffic on the "input" or "forward" paths). + (So chains in other tables that run before this will see traffic addressed to service + IPs, while chains that run after this will see traffic addressed to endpoint IPs.) + + - Service traffic that needs to be masqueraded will be SNATted on a chain of `type + nat`, `hook postrouting`, and `priority srcnat`. (So chains in other tables that run + before this will always see the original client IP, while chains that run after this + will will see masqueraded source IPs for some traffic.) + + - Traffic to services with no endpoints will be dropped or rejected from a chain with + `type filter`, `priority dstnat-10`, and any of `hook input`, `hook output`, or `hook + forward`. + +Note that the use of `mark` to indicate what traffic needs to be masqueraded is *not* +part of kube-proxy's public API, and you should not assume that you can cause traffic to +be masqueraded (or not) by setting or clearing a particular mark bit. diff --git a/test/featuregates_linter/test_data/versioned_feature_list.yaml b/test/featuregates_linter/test_data/versioned_feature_list.yaml index df71e5d3a1e..565290d616c 100644 --- a/test/featuregates_linter/test_data/versioned_feature_list.yaml +++ b/test/featuregates_linter/test_data/versioned_feature_list.yaml @@ -842,6 +842,10 @@ lockToDefault: false preRelease: Beta version: "1.31" + - default: true + lockToDefault: true + preRelease: GA + version: "1.33" - name: NodeInclusionPolicyInPodTopologySpread versionedSpecs: - default: false