diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 3ed0a2c0d21..fcd5de89419 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -200,6 +200,18 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) { Thresholds: thresholds, } + if s.MakeIPTablesUtilChains { + if s.IPTablesMasqueradeBit > 31 || s.IPTablesMasqueradeBit < 0 { + return nil, fmt.Errorf("iptables-masquerade-bit is not valid. Must be within [0, 31]") + } + if s.IPTablesDropBit > 31 || s.IPTablesDropBit < 0 { + return nil, fmt.Errorf("iptables-drop-bit is not valid. Must be within [0, 31]") + } + if s.IPTablesDropBit == s.IPTablesMasqueradeBit { + return nil, fmt.Errorf("iptables-masquerade-bit and iptables-drop-bit must be different") + } + } + return &KubeletConfig{ Address: net.ParseIP(s.Address), AllowPrivileged: s.AllowPrivileged, @@ -282,10 +294,13 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) { HairpinMode: s.HairpinMode, BabysitDaemons: s.BabysitDaemons, ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay, - NodeIP: net.ParseIP(s.NodeIP), - EvictionConfig: evictionConfig, - PodsPerCore: int(s.PodsPerCore), - ProtectKernelDefaults: s.ProtectKernelDefaults, + NodeIP: net.ParseIP(s.NodeIP), + EvictionConfig: evictionConfig, + PodsPerCore: int(s.PodsPerCore), + ProtectKernelDefaults: s.ProtectKernelDefaults, + MakeIPTablesUtilChains: s.MakeIPTablesUtilChains, + iptablesMasqueradeBit: int(s.IPTablesMasqueradeBit), + iptablesDropBit: int(s.IPTablesDropBit), }, nil } @@ -891,8 +906,10 @@ type KubeletConfig struct { HairpinMode string BabysitDaemons bool Options []kubelet.Option - - ProtectKernelDefaults bool + ProtectKernelDefaults bool + MakeIPTablesUtilChains bool + iptablesMasqueradeBit int + iptablesDropBit int } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { @@ -990,6 +1007,9 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod kc.EvictionConfig, kc.Options, kc.EnableControllerAttachDetach, + kc.MakeIPTablesUtilChains, + kc.iptablesMasqueradeBit, + kc.iptablesDropBit, ) if err != nil { diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 67f12d7c57d..dbeda1543ec 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -243,6 +243,9 @@ func NewMainKubelet( evictionConfig eviction.Config, kubeOptions []Option, enableControllerAttachDetach bool, + makeIPTablesUtilChains bool, + iptablesMasqueradeBit int, + iptablesDropBit int, ) (*Kubelet, error) { if rootDirectory == "" { return nil, fmt.Errorf("invalid root directory %q", rootDirectory) @@ -367,6 +370,9 @@ func NewMainKubelet( babysitDaemons: babysitDaemons, enableControllerAttachDetach: enableControllerAttachDetach, iptClient: utilipt.New(utilexec.New(), utildbus.New(), utilipt.ProtocolIpv4), + makeIPTablesUtilChains: makeIPTablesUtilChains, + iptablesMasqueradeBit: iptablesMasqueradeBit, + iptablesDropBit: iptablesDropBit, } if klet.flannelExperimentalOverlay { @@ -840,6 +846,15 @@ type Kubelet struct { // trigger deleting containers in a pod containerDeletor *podContainerDeletor + + // config iptables util rules + makeIPTablesUtilChains bool + + // The bit of the fwmark space to mark packets for SNAT. + iptablesMasqueradeBit int + + // The bit of the fwmark space to mark packets for dropping. + iptablesDropBit int } // setupDataDirs creates: @@ -966,6 +981,11 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) { go wait.Until(kl.syncNetworkStatus, 30*time.Second, wait.NeverStop) go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop) + // Start loop to sync iptables util rules + if kl.makeIPTablesUtilChains { + go wait.Until(kl.syncNetworkUtil, 1*time.Minute, wait.NeverStop) + } + // Start a goroutine responsible for killing pods (that are not properly // handled by pod workers). go wait.Until(kl.podKiller, 1*time.Second, wait.NeverStop) diff --git a/pkg/kubelet/kubelet_network.go b/pkg/kubelet/kubelet_network.go index 3a303487436..ed668e951d4 100644 --- a/pkg/kubelet/kubelet_network.go +++ b/pkg/kubelet/kubelet_network.go @@ -28,9 +28,25 @@ import ( "k8s.io/kubernetes/pkg/apis/componentconfig" "k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/util/bandwidth" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" "k8s.io/kubernetes/pkg/util/sets" ) +const ( + // the mark-for-masquerade chain + // TODO: clean up this logic in kube-proxy + KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ" + + // the mark-for-drop chain + KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP" + + // kubernetes postrouting rules + KubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING" + + // kubernetes postrouting rules + KubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL" +) + // effectiveHairpinMode determines the effective hairpin mode given the // configured mode, container runtime, and whether cbr0 should be configured. func effectiveHairpinMode(hairpinMode componentconfig.HairpinMode, containerRuntime string, configureCBR0 bool, networkPlugin string) (componentconfig.HairpinMode, error) { @@ -303,3 +319,88 @@ func (kl *Kubelet) shapingEnabled() bool { } return true } + +// syncNetworkUtil ensures the network utility are present on host. +// Network util includes: +// 1. In nat table, KUBE-MARK-DROP rule to mark connections for dropping +// Marked connection will be drop on INPUT/OUTPUT Chain in filter table +// 2. In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT +// Marked connection will get SNAT on POSTROUTING Chain in nat table +func (kl *Kubelet) syncNetworkUtil() { + if kl.iptablesMasqueradeBit < 0 || kl.iptablesMasqueradeBit > 31 { + glog.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", kl.iptablesMasqueradeBit) + return + } + + if kl.iptablesDropBit < 0 || kl.iptablesDropBit > 31 { + glog.Errorf("invalid iptables-drop-bit %v not in [0, 31]", kl.iptablesDropBit) + return + } + + if kl.iptablesDropBit == kl.iptablesMasqueradeBit { + glog.Errorf("iptables-masquerade-bit %v and iptables-drop-bit %v must be different", kl.iptablesMasqueradeBit, kl.iptablesDropBit) + return + } + + // Setup KUBE-MARK-DROP rules + dropMark := getIPTablesMark(kl.iptablesDropBit) + if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkDropChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil { + glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err) + return + } + if _, err := kl.iptClient.EnsureChain(utiliptables.TableFilter, KubeFirewallChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableFilter, KubeFirewallChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain, + "-m", "comment", "--comment", "kubernetes firewall for dropping marked packets", + "-m", "mark", "--mark", dropMark, + "-j", "DROP"); err != nil { + glog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainOutput, "-j", string(KubeFirewallChain)); err != nil { + glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainOutput, KubeFirewallChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainInput, "-j", string(KubeFirewallChain)); err != nil { + glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainInput, KubeFirewallChain, err) + return + } + + // Setup KUBE-MARK-MASQ rules + masqueradeMark := getIPTablesMark(kl.iptablesMasqueradeBit) + if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkMasqChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkMasqChain, err) + return + } + if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubePostroutingChain); err != nil { + glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil { + glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, + "-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(KubePostroutingChain)); err != nil { + glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err) + return + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain, + "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", + "-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil { + glog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err) + return + } +} + +// getIPTablesMark returns the fwmark given the bit +func getIPTablesMark(bit int) string { + value := 1 << uint(bit) + return fmt.Sprintf("%#08x/%#08x", value, value) +} diff --git a/pkg/kubelet/kubelet_network_test.go b/pkg/kubelet/kubelet_network_test.go index d546757a15a..dbb3fa1cba8 100644 --- a/pkg/kubelet/kubelet_network_test.go +++ b/pkg/kubelet/kubelet_network_test.go @@ -217,3 +217,25 @@ func TestCleanupBandwidthLimits(t *testing.T) { } } } + +func TestGetIPTablesMark(t *testing.T) { + tests := []struct { + bit int + expect string + }{ + { + 14, + "0x00004000/0x00004000", + }, + { + 15, + "0x00008000/0x00008000", + }, + } + for _, tc := range tests { + res := getIPTablesMark(tc.bit) + if res != tc.expect { + t.Errorf("getIPTablesMark output unexpected result: %v when input bit is %d. Expect result: %v", res, tc.bit, tc.expect) + } + } +}