Change iptables fwmark to use single configurable bit instead of whole mark space

This commit is contained in:
Matt Dupre
2016-02-08 11:12:09 +00:00
parent 1e7db4a174
commit 9925cddc11
13 changed files with 2420 additions and 2232 deletions

View File

@@ -71,6 +71,7 @@ func (s *ProxyServerConfig) AddFlags(fs *pflag.FlagSet) {
fs.Var(componentconfig.PortRangeVar{&s.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.") fs.Var(componentconfig.PortRangeVar{&s.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.")
fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
fs.Var(&s.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, look at the Node object on the Kubernetes API and respect the '"+ExperimentalProxyModeAnnotation+"' annotation if provided. Otherwise use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.") fs.Var(&s.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, look at the Node object on the Kubernetes API and respect the '"+ExperimentalProxyModeAnnotation+"' annotation if provided. Otherwise use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.IntVar(s.IPTablesMasqueradeBit, "iptables-masquerade-bit", util.IntPtrDerefOr(s.IPTablesMasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].")
fs.DurationVar(&s.IPTablesSyncPeriod.Duration, "iptables-sync-period", s.IPTablesSyncPeriod.Duration, "How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.") fs.DurationVar(&s.IPTablesSyncPeriod.Duration, "iptables-sync-period", s.IPTablesSyncPeriod.Duration, "How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&s.ConfigSyncPeriod, "config-sync-period", s.ConfigSyncPeriod, "How often configuration from the apiserver is refreshed. Must be greater than 0.") fs.DurationVar(&s.ConfigSyncPeriod, "config-sync-period", s.ConfigSyncPeriod, "How often configuration from the apiserver is refreshed. Must be greater than 0.")
fs.BoolVar(&s.MasqueradeAll, "masquerade-all", s.MasqueradeAll, "If using the pure iptables proxy, SNAT everything") fs.BoolVar(&s.MasqueradeAll, "masquerade-all", s.MasqueradeAll, "If using the pure iptables proxy, SNAT everything")

View File

@@ -191,7 +191,12 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
proxyMode := getProxyMode(string(config.Mode), client.Nodes(), hostname, iptInterface) proxyMode := getProxyMode(string(config.Mode), client.Nodes(), hostname, iptInterface)
if proxyMode == proxyModeIptables { if proxyMode == proxyModeIptables {
glog.V(0).Info("Using iptables Proxier.") glog.V(0).Info("Using iptables Proxier.")
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll) if config.IPTablesMasqueradeBit == nil {
// IPTablesMasqueradeBit must be specified or defaulted.
return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config")
}
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, *config.IPTablesMasqueradeBit)
if err != nil { if err != nil {
glog.Fatalf("Unable to create proxier: %v", err) glog.Fatalf("Unable to create proxier: %v", err)
} }

View File

@@ -63,6 +63,7 @@ kube-proxy
--healthz-bind-address=127.0.0.1: The IP address for the health check server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces) --healthz-bind-address=127.0.0.1: The IP address for the health check server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces)
--healthz-port=10249: The port to bind the health check server. Use 0 to disable. --healthz-port=10249: The port to bind the health check server. Use 0 to disable.
--hostname-override="": If non-empty, will use this string as identification instead of the actual hostname. --hostname-override="": If non-empty, will use this string as identification instead of the actual hostname.
--iptables-masquerade-bit=14: If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].
--iptables-sync-period=30s: How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0. --iptables-sync-period=30s: How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.
--kube-api-burst=10: Burst to use while talking with kubernetes apiserver --kube-api-burst=10: Burst to use while talking with kubernetes apiserver
--kube-api-qps=5: QPS to use while talking with kubernetes apiserver --kube-api-qps=5: QPS to use while talking with kubernetes apiserver
@@ -76,7 +77,7 @@ kube-proxy
--udp-timeout=250ms: How long an idle UDP connection will be kept open (e.g. '250ms', '2s'). Must be greater than 0. Only applicable for proxy-mode=userspace --udp-timeout=250ms: How long an idle UDP connection will be kept open (e.g. '250ms', '2s'). Must be greater than 0. Only applicable for proxy-mode=userspace
``` ```
###### Auto generated by spf13/cobra on 1-Feb-2016 ###### Auto generated by spf13/cobra on 7-Feb-2016
<!-- BEGIN MUNGE: GENERATED_ANALYTICS --> <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@@ -513,11 +513,11 @@ then look at the logs again.
```console ```console
u@node$ iptables-save | grep hostnames u@node$ iptables-save | grep hostnames
-A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff -A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-57KPRZ3JQVENLNBR -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.3.6:9376 -A KUBE-SEP-57KPRZ3JQVENLNBR -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.3.6:9376
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff -A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.1.7:9376 -A KUBE-SEP-WNBA2IHDGP2BOBGZ -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.1.7:9376
-A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff -A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-X3P2623AGDH6CDF3 -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.2.3:9376 -A KUBE-SEP-X3P2623AGDH6CDF3 -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.2.3:9376
-A KUBE-SERVICES -d 10.0.1.175/32 -p tcp -m comment --comment "default/hostnames: cluster IP" -m tcp --dport 80 -j KUBE-SVC-NWV5X2332I4OT4T3 -A KUBE-SERVICES -d 10.0.1.175/32 -p tcp -m comment --comment "default/hostnames: cluster IP" -m tcp --dport 80 -j KUBE-SVC-NWV5X2332I4OT4T3
-A KUBE-SVC-NWV5X2332I4OT4T3 -m comment --comment "default/hostnames:" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-WNBA2IHDGP2BOBGZ -A KUBE-SVC-NWV5X2332I4OT4T3 -m comment --comment "default/hostnames:" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-WNBA2IHDGP2BOBGZ

View File

@@ -148,6 +148,7 @@ input-dirs
insecure-bind-address insecure-bind-address
insecure-port insecure-port
insecure-skip-tls-verify insecure-skip-tls-verify
iptables-masquerade-bit
iptables-sync-period iptables-sync-period
ir-data-source ir-data-source
ir-dbname ir-dbname

File diff suppressed because it is too large Load Diff

View File

@@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int `json:"healthzPort"` HealthzPort int `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname. // hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"` HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m', // iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0. // '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"` IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@@ -37,6 +37,12 @@ func autoConvert_componentconfig_KubeProxyConfiguration_To_v1alpha1_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int32(in.HealthzPort) out.HealthzPort = int32(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = int32(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil { if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err return err
} }
@@ -127,6 +133,12 @@ func autoConvert_v1alpha1_KubeProxyConfiguration_To_componentconfig_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int(in.HealthzPort) out.HealthzPort = int(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int)
*out.IPTablesMasqueradeBit = int(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil { if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err return err
} }

View File

@@ -43,6 +43,12 @@ func deepCopy_v1alpha1_KubeProxyConfiguration(in KubeProxyConfiguration, out *Ku
out.HealthzBindAddress = in.HealthzBindAddress out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = in.HealthzPort out.HealthzPort = in.HealthzPort
out.HostnameOverride = in.HostnameOverride out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = *in.IPTablesMasqueradeBit
} else {
out.IPTablesMasqueradeBit = nil
}
if err := deepCopy_unversioned_Duration(in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, c); err != nil { if err := deepCopy_unversioned_Duration(in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, c); err != nil {
return err return err
} }

View File

@@ -55,6 +55,10 @@ func addDefaultingFuncs(scheme *runtime.Scheme) {
if obj.ConntrackMax == 0 { if obj.ConntrackMax == 0 {
obj.ConntrackMax = 256 * 1024 // 4x default (64k) obj.ConntrackMax = 256 * 1024 // 4x default (64k)
} }
if obj.IPTablesMasqueradeBit == nil {
temp := int32(14)
obj.IPTablesMasqueradeBit = &temp
}
if obj.ConntrackTCPEstablishedTimeout == zero { if obj.ConntrackTCPEstablishedTimeout == zero {
obj.ConntrackTCPEstablishedTimeout = unversioned.Duration{Duration: 24 * time.Hour} // 1 day (1/5 default) obj.ConntrackTCPEstablishedTimeout = unversioned.Duration{Duration: 24 * time.Hour} // 1 day (1/5 default)
} }

File diff suppressed because it is too large Load Diff

View File

@@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int32 `json:"healthzPort"` HealthzPort int32 `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname. // hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"` HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m', // iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0. // '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"` IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@@ -66,7 +66,8 @@ const kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
const kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ" const kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark we apply to traffic needing SNAT // the mark we apply to traffic needing SNAT
const iptablesMasqueradeMark = "0x4d415351" // TODO(thockin): Remove this for v1.3 or v1.4.
const oldIptablesMasqueradeMark = "0x4d415351"
// IptablesVersioner can query the current iptables version. // IptablesVersioner can query the current iptables version.
type IptablesVersioner interface { type IptablesVersioner interface {
@@ -143,9 +144,10 @@ type Proxier struct {
haveReceivedEndpointsUpdate bool // true once we've seen an OnEndpointsUpdate event haveReceivedEndpointsUpdate bool // true once we've seen an OnEndpointsUpdate event
// These are effectively const and do not need the mutex to be held. // These are effectively const and do not need the mutex to be held.
syncPeriod time.Duration syncPeriod time.Duration
iptables utiliptables.Interface iptables utiliptables.Interface
masqueradeAll bool masqueradeAll bool
masqueradeMark string
} }
type localPort struct { type localPort struct {
@@ -171,7 +173,7 @@ var _ proxy.ProxyProvider = &Proxier{}
// An error will be returned if iptables fails to update or acquire the initial lock. // An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and // Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails. // will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool) (*Proxier, error) { func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int) (*Proxier, error) {
// Set the route_localnet sysctl we need for // Set the route_localnet sysctl we need for
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil { if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err) return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
@@ -185,13 +187,21 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err) glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
} }
// Generate the masquerade mark to use for SNAT rules.
if masqueradeBit < 0 || masqueradeBit > 31 {
return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit)
}
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
return &Proxier{ return &Proxier{
serviceMap: make(map[proxy.ServicePortName]*serviceInfo), serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string), endpointsMap: make(map[proxy.ServicePortName][]string),
portsMap: make(map[localPort]closeable), portsMap: make(map[localPort]closeable),
syncPeriod: syncPeriod, syncPeriod: syncPeriod,
iptables: ipt, iptables: ipt,
masqueradeAll: masqueradeAll, masqueradeAll: masqueradeAll,
masqueradeMark: masqueradeMark,
}, nil }, nil
} }
@@ -283,7 +293,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
// TODO(thockin): Remove this for v1.3 or v1.4. // TODO(thockin): Remove this for v1.3 or v1.4.
args = []string{ args = []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", iptablesMasqueradeMark, "-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE", "-j", "MASQUERADE",
} }
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
@@ -577,7 +587,7 @@ func (proxier *Proxier) syncProxyRules() {
comment := "kubernetes postrouting rules" comment := "kubernetes postrouting rules"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)} args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)}
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubeServicesChain, err) glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err)
return return
} }
} }
@@ -643,7 +653,7 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(natRules, []string{ writeLine(natRules, []string{
"-A", string(kubePostroutingChain), "-A", string(kubePostroutingChain),
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`, "-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
"-m", "mark", "--mark", iptablesMasqueradeMark, "-m", "mark", "--mark", proxier.masqueradeMark,
"-j", "MASQUERADE", "-j", "MASQUERADE",
}...) }...)
@@ -652,7 +662,7 @@ func (proxier *Proxier) syncProxyRules() {
// value should ever change. // value should ever change.
writeLine(natRules, []string{ writeLine(natRules, []string{
"-A", string(kubeMarkMasqChain), "-A", string(kubeMarkMasqChain),
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark), "-j", "MARK", "--set-xmark", proxier.masqueradeMark,
}...) }...)
// Accumulate NAT chains to keep. // Accumulate NAT chains to keep.
@@ -931,7 +941,7 @@ func (proxier *Proxier) syncProxyRules() {
// TODO(thockin): Remove this for v1.3 or v1.4. // TODO(thockin): Remove this for v1.3 or v1.4.
args := []string{ args := []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", iptablesMasqueradeMark, "-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE", "-j", "MASQUERADE",
} }
if err := proxier.iptables.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil { if err := proxier.iptables.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {