mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 09:22:44 +00:00
Merge pull request #129653 from danwinship/nftables-ga
KEP-3866 nftables kube-proxy to GA
This commit is contained in:
commit
d7774fce9a
@ -36,13 +36,11 @@ import (
|
|||||||
"k8s.io/apimachinery/pkg/fields"
|
"k8s.io/apimachinery/pkg/fields"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/watch"
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
toolswatch "k8s.io/client-go/tools/watch"
|
toolswatch "k8s.io/client-go/tools/watch"
|
||||||
utilsysctl "k8s.io/component-helpers/node/util/sysctl"
|
utilsysctl "k8s.io/component-helpers/node/util/sysctl"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
|
||||||
"k8s.io/kubernetes/pkg/proxy"
|
"k8s.io/kubernetes/pkg/proxy"
|
||||||
proxyconfigapi "k8s.io/kubernetes/pkg/proxy/apis/config"
|
proxyconfigapi "k8s.io/kubernetes/pkg/proxy/apis/config"
|
||||||
"k8s.io/kubernetes/pkg/proxy/iptables"
|
"k8s.io/kubernetes/pkg/proxy/iptables"
|
||||||
@ -527,12 +525,10 @@ func platformCleanup(ctx context.Context, mode proxyconfigapi.ProxyMode, cleanup
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.NFTablesProxyMode) {
|
|
||||||
// Clean up nftables rules when switching to iptables or ipvs, or if cleanupAndExit
|
// Clean up nftables rules when switching to iptables or ipvs, or if cleanupAndExit
|
||||||
if isIPTablesBased(mode) || cleanupAndExit {
|
if isIPTablesBased(mode) || cleanupAndExit {
|
||||||
encounteredError = nftables.CleanupLeftovers(ctx) || encounteredError
|
encounteredError = nftables.CleanupLeftovers(ctx) || encounteredError
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if encounteredError {
|
if encounteredError {
|
||||||
return errors.New("encountered an error while tearing down rules")
|
return errors.New("encountered an error while tearing down rules")
|
||||||
|
@ -540,6 +540,7 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
|
|||||||
NFTablesProxyMode: {
|
NFTablesProxyMode: {
|
||||||
{Version: version.MustParse("1.29"), Default: false, PreRelease: featuregate.Alpha},
|
{Version: version.MustParse("1.29"), Default: false, PreRelease: featuregate.Alpha},
|
||||||
{Version: version.MustParse("1.31"), Default: true, PreRelease: featuregate.Beta},
|
{Version: version.MustParse("1.31"), Default: true, PreRelease: featuregate.Beta},
|
||||||
|
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true},
|
||||||
},
|
},
|
||||||
|
|
||||||
NodeInclusionPolicyInPodTopologySpread: {
|
NodeInclusionPolicyInPodTopologySpread: {
|
||||||
|
@ -30,7 +30,6 @@ import (
|
|||||||
logsapi "k8s.io/component-base/logs/api/v1"
|
logsapi "k8s.io/component-base/logs/api/v1"
|
||||||
"k8s.io/component-base/metrics"
|
"k8s.io/component-base/metrics"
|
||||||
apivalidation "k8s.io/kubernetes/pkg/apis/core/validation"
|
apivalidation "k8s.io/kubernetes/pkg/apis/core/validation"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
|
||||||
kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config"
|
kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
)
|
)
|
||||||
@ -173,12 +172,9 @@ func validateProxyModeLinux(mode kubeproxyconfig.ProxyMode, fldPath *field.Path)
|
|||||||
validModes := sets.New[string](
|
validModes := sets.New[string](
|
||||||
string(kubeproxyconfig.ProxyModeIPTables),
|
string(kubeproxyconfig.ProxyModeIPTables),
|
||||||
string(kubeproxyconfig.ProxyModeIPVS),
|
string(kubeproxyconfig.ProxyModeIPVS),
|
||||||
|
string(kubeproxyconfig.ProxyModeNFTables),
|
||||||
)
|
)
|
||||||
|
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.NFTablesProxyMode) {
|
|
||||||
validModes.Insert(string(kubeproxyconfig.ProxyModeNFTables))
|
|
||||||
}
|
|
||||||
|
|
||||||
if mode == "" || validModes.Has(string(mode)) {
|
if mode == "" || validModes.Has(string(mode)) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -107,3 +107,33 @@ This is implemented as follows:
|
|||||||
higher (i.e. less urgent) priority than the DNAT chains making sure all valid
|
higher (i.e. less urgent) priority than the DNAT chains making sure all valid
|
||||||
traffic directed for ClusterIPs is already DNATed. Drop rule will only
|
traffic directed for ClusterIPs is already DNATed. Drop rule will only
|
||||||
be installed if `MultiCIDRServiceAllocator` feature is enabled.
|
be installed if `MultiCIDRServiceAllocator` feature is enabled.
|
||||||
|
|
||||||
|
## Integrating with kube-proxy's nftables mode
|
||||||
|
|
||||||
|
Implementations of pod networking, NetworkPolicy, service meshes, etc, may need to be
|
||||||
|
aware of some slightly lower-level details of kube-proxy's implementation.
|
||||||
|
|
||||||
|
Components other than kube-proxy should *never* make any modifications to the
|
||||||
|
`kube-proxy` nftables table, or any of the chains, sets, maps, etc, within it. Every
|
||||||
|
component should create its own table and only work within that table. However,
|
||||||
|
you can ensure that rules in your own table will run before or after kube-proxy's rules
|
||||||
|
by setting appropriate `priority` values for your base chains. In particular:
|
||||||
|
|
||||||
|
- Service traffic that needs to be DNATted will be DNATted by kube-proxy on a chain of
|
||||||
|
`type nat` with `priority dstnat` and either `hook output` (for traffic on the
|
||||||
|
"output" path) or `hook prerouting` (for traffic on the "input" or "forward" paths).
|
||||||
|
(So chains in other tables that run before this will see traffic addressed to service
|
||||||
|
IPs, while chains that run after this will see traffic addressed to endpoint IPs.)
|
||||||
|
|
||||||
|
- Service traffic that needs to be masqueraded will be SNATted on a chain of `type
|
||||||
|
nat`, `hook postrouting`, and `priority srcnat`. (So chains in other tables that run
|
||||||
|
before this will always see the original client IP, while chains that run after this
|
||||||
|
will will see masqueraded source IPs for some traffic.)
|
||||||
|
|
||||||
|
- Traffic to services with no endpoints will be dropped or rejected from a chain with
|
||||||
|
`type filter`, `priority dstnat-10`, and any of `hook input`, `hook output`, or `hook
|
||||||
|
forward`.
|
||||||
|
|
||||||
|
Note that the use of `mark` to indicate what traffic needs to be masqueraded is *not*
|
||||||
|
part of kube-proxy's public API, and you should not assume that you can cause traffic to
|
||||||
|
be masqueraded (or not) by setting or clearing a particular mark bit.
|
||||||
|
@ -842,6 +842,10 @@
|
|||||||
lockToDefault: false
|
lockToDefault: false
|
||||||
preRelease: Beta
|
preRelease: Beta
|
||||||
version: "1.31"
|
version: "1.31"
|
||||||
|
- default: true
|
||||||
|
lockToDefault: true
|
||||||
|
preRelease: GA
|
||||||
|
version: "1.33"
|
||||||
- name: NodeInclusionPolicyInPodTopologySpread
|
- name: NodeInclusionPolicyInPodTopologySpread
|
||||||
versionedSpecs:
|
versionedSpecs:
|
||||||
- default: false
|
- default: false
|
||||||
|
Loading…
Reference in New Issue
Block a user