diff --git a/cluster/saltbase/salt/_states/container_bridge.py b/cluster/saltbase/salt/_states/container_bridge.py index 63158926c25..ed64d71dd10 100644 --- a/cluster/saltbase/salt/_states/container_bridge.py +++ b/cluster/saltbase/salt/_states/container_bridge.py @@ -34,11 +34,22 @@ def ensure(name, cidr, mtu=1460): ''' ret = {'name': name, 'changes': {}, 'result': False, 'comment': ''} - iptables_rule = { - 'table': 'nat', - 'chain': 'POSTROUTING', - 'rule': '-o eth0 -j MASQUERADE \! -d 10.0.0.0/8' - } + # This is a little hacky. I should probably import a real library for this + # but this'll work for now. + try: + cidr_network = ipaddr.IPNetwork(cidr, strict=True) + except Exception: + raise salt.exceptions.SaltInvocationError( + 'Invalid CIDR \'{0}\''.format(cidr)) + + if cidr_network.version == 4: + iptables_rule = { + 'table': 'nat', + 'chain': 'POSTROUTING', + 'rule': '-o eth0 -j MASQUERADE \! -d 10.0.0.0/8' + } + else: + iptables_rule = None def bridge_exists(name): 'Determine if a bridge exists already.' @@ -90,20 +101,15 @@ def ensure(name, cidr, mtu=1460): ret['details'] = {} # This module function is strange and returns True if the rule exists. # If not, it returns a string with the error from the call to iptables. - ret['iptables_rule_exists'] = \ - __salt__['iptables.check'](**iptables_rule) == True + if iptables_rule: + ret['iptables_rule_exists'] = \ + __salt__['iptables.check'](**iptables_rule) == True + else: + ret['iptables_rule_exists'] = True return ret - # This is a little hacky. I should probably import a real library for this - # but this'll work for now. - try: - cidr_network = ipaddr.IPv4Network(cidr, strict=True) - except Exception: - raise salt.exceptions.SaltInvocationError( - 'Invalid CIDR \'{0}\''.format(cidr)) - desired_network = '{0}/{1}'.format( - str(ipaddr.IPv4Address(cidr_network._ip + 1)), + str(ipaddr.IPAddress(cidr_network._ip + 1)), str(cidr_network.prefixlen)) current_state = get_current_state() @@ -147,7 +153,7 @@ def ensure(name, cidr, mtu=1460): __salt__['cmd.run']( 'ip link set dev {0} up'.format(name)) new_state = get_current_state() - if not new_state['iptables_rule_exists']: + if iptables_rule and not new_state['iptables_rule_exists']: __salt__['iptables.append'](**iptables_rule) new_state = get_current_state() diff --git a/cluster/saltbase/salt/docker/docker-defaults b/cluster/saltbase/salt/docker/docker-defaults index 1317593c272..115a0bb2475 100644 --- a/cluster/saltbase/salt/docker/docker-defaults +++ b/cluster/saltbase/salt/docker/docker-defaults @@ -1 +1,6 @@ -DOCKER_OPTS="--bridge cbr0 --iptables=false --ip-masq=false -r=false" +{% if grains.docker_opts is defined %} + {% set docker_opts = grains.docker_opts %} +{% else %} + {% set docker_opts = "" %} +{% endif %} +DOCKER_OPTS="{{docker_opts}} --bridge cbr0 --iptables=false --ip-masq=false -r=false" diff --git a/cmd/proxy/proxy.go b/cmd/proxy/proxy.go index b9c3fdb1d9c..2d0e6c9734a 100644 --- a/cmd/proxy/proxy.go +++ b/cmd/proxy/proxy.go @@ -100,8 +100,12 @@ func main() { } } + protocol := iptables.ProtocolIpv4 + if net.IP(bindAddress).To4() == nil { + protocol = iptables.ProtocolIpv6 + } loadBalancer := proxy.NewLoadBalancerRR() - proxier := proxy.NewProxier(loadBalancer, net.IP(bindAddress), iptables.New(exec.New())) + proxier := proxy.NewProxier(loadBalancer, net.IP(bindAddress), iptables.New(exec.New(), protocol)) // Wire proxier to handle changes to services serviceConfig.RegisterHandler(proxier) // And wire loadBalancer to handle changes to endpoints to services diff --git a/pkg/proxy/proxier.go b/pkg/proxy/proxier.go index cad3d3af465..9ee86159246 100644 --- a/pkg/proxy/proxier.go +++ b/pkg/proxy/proxier.go @@ -532,8 +532,11 @@ func iptablesFlush(ipt iptables.Interface) error { } // Used below. -var zeroIP = net.ParseIP("0.0.0.0") -var localhostIP = net.ParseIP("127.0.0.1") +var zeroIPv4 = net.ParseIP("0.0.0.0") +var localhostIPv4 = net.ParseIP("127.0.0.1") + +var zeroIPv6 = net.ParseIP("::0") +var localhostIPv6 = net.ParseIP("::1") // Build a slice of iptables args for a portal rule. func iptablesPortalArgs(destIP net.IP, destPort int, protocol api.Protocol, proxyIP net.IP, proxyPort int, service string) []string { @@ -561,10 +564,13 @@ func iptablesPortalArgs(destIP net.IP, destPort int, protocol api.Protocol, prox // Unfortunately, I don't know of any way to listen on some (N > 1) // interfaces but not ALL interfaces, short of doing it manually, and // this is simpler than that. - if proxyIP.Equal(zeroIP) || proxyIP.Equal(localhostIP) { + if proxyIP.Equal(zeroIPv4) || proxyIP.Equal(zeroIPv6) || + proxyIP.Equal(localhostIPv4) || proxyIP.Equal(localhostIPv6) { + // TODO: Can we REDIRECT with IPv6? args = append(args, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", proxyPort)) } else { - args = append(args, "-j", "DNAT", "--to-destination", fmt.Sprintf("%s:%d", proxyIP.String(), proxyPort)) + // TODO: Can we DNAT with IPv6? + args = append(args, "-j", "DNAT", "--to-destination", net.JoinHostPort(proxyIP.String(), strconv.Itoa(proxyPort))) } return args } diff --git a/pkg/proxy/proxier_test.go b/pkg/proxy/proxier_test.go index 2c845e42236..d5c59387317 100644 --- a/pkg/proxy/proxier_test.go +++ b/pkg/proxy/proxier_test.go @@ -94,6 +94,10 @@ func (fake *fakeIptables) DeleteRule(table iptables.Table, chain iptables.Chain, return nil } +func (fake *fakeIptables) IsIpv6() bool { + return false +} + var tcpServerPort string var udpServerPort string diff --git a/pkg/registry/service/ip_allocator.go b/pkg/registry/service/ip_allocator.go index 947c0a64534..6fb288c1b1f 100644 --- a/pkg/registry/service/ip_allocator.go +++ b/pkg/registry/service/ip_allocator.go @@ -18,17 +18,67 @@ package service import ( "fmt" + math_rand "math/rand" "net" "sync" + "time" "github.com/golang/glog" ) type ipAllocator struct { - subnet *net.IPNet - // TODO: This could be smarter, but for now a bitmap will suffice. lock sync.Mutex // protects 'used' - used []byte // a bitmap of allocated IPs + + subnet net.IPNet + ipSpaceSize int64 // Size of subnet, or -1 if it does not fit in an int64 + used ipAddrSet + randomAttempts int + + random *math_rand.Rand +} + +type ipAddrSet struct { + // We are pretty severely restricted in the types of things we can use as a key + ips map[string]bool +} + +func (s *ipAddrSet) Init() { + s.ips = map[string]bool{} +} + +// Adds to the ipAddrSet; returns true iff it was added (was not already in set) +func (s *ipAddrSet) Size() int { + return len(s.ips) +} + +func (s *ipAddrSet) Contains(ip net.IP) bool { + key := ip.String() + exists := s.ips[key] + return exists +} + +// Adds to the ipAddrSet; returns true iff it was added (was not already in set) +func (s *ipAddrSet) Add(ip net.IP) bool { + key := ip.String() + exists := s.ips[key] + if exists { + return false + } + s.ips[key] = true + return true +} + +// Removes from the ipAddrSet; returns true iff it was removed (was already in set) +func (s *ipAddrSet) Remove(ip net.IP) bool { + key := ip.String() + exists := s.ips[key] + if !exists { + return false + } + delete(s.ips, key) + // TODO: We probably should add this IP to an 'embargo' list for a limited amount of time + + return true } // The smallest number of IPs we accept. @@ -40,20 +90,42 @@ func newIPAllocator(subnet *net.IPNet) *ipAllocator { return nil } + seed := time.Now().UTC().UnixNano() + r := math_rand.New(math_rand.NewSource(seed)) + + ipSpaceSize := int64(-1) ones, bits := subnet.Mask.Size() - // TODO: some settings with IPv6 address could cause this to take - // an excessive amount of memory. - numIps := 1 << uint(bits-ones) - if numIps < minIPSpace { - glog.Errorf("IPAllocator requires at least %d IPs", minIPSpace) - return nil + if (bits - ones) < 63 { + ipSpaceSize = int64(1) << uint(bits-ones) + + if ipSpaceSize < minIPSpace { + glog.Errorf("IPAllocator requires at least %d IPs", minIPSpace) + return nil + } } + ipa := &ipAllocator{ - subnet: subnet, - used: make([]byte, numIps/8), + subnet: *subnet, + ipSpaceSize: ipSpaceSize, + random: r, + randomAttempts: 1000, } - ipa.used[0] = 0x01 // block the network addr - ipa.used[(numIps/8)-1] = 0x80 // block the broadcast addr + ipa.used.Init() + + zero := make(net.IP, len(subnet.IP), len(subnet.IP)) + for i := 0; i < len(subnet.IP); i++ { + zero[i] = subnet.IP[i] & subnet.Mask[i] + } + ipa.used.Add(zero) // block the zero addr + + ipa.used.Add(subnet.IP) // block the network addr + + broadcast := make(net.IP, len(subnet.IP), len(subnet.IP)) + for i := 0; i < len(subnet.IP); i++ { + broadcast[i] = subnet.IP[i] | ^subnet.Mask[i] + } + ipa.used.Add(broadcast) // block the broadcast addr + return ipa } @@ -65,13 +137,11 @@ func (ipa *ipAllocator) Allocate(ip net.IP) error { if !ipa.subnet.Contains(ip) { return fmt.Errorf("IP %s does not fall within subnet %s", ip, ipa.subnet) } - offset := ipSub(ip, ipa.subnet.IP) - i := offset / 8 - m := byte(1 << byte(offset%8)) - if ipa.used[i]&m != 0 { + + if !ipa.used.Add(ip) { return fmt.Errorf("IP %s is already allocated", ip) } - ipa.used[i] |= m + return nil } @@ -80,33 +150,48 @@ func (ipa *ipAllocator) AllocateNext() (net.IP, error) { ipa.lock.Lock() defer ipa.lock.Unlock() - for i := range ipa.used { - if ipa.used[i] != 0xff { - freeMask := ^ipa.used[i] - nextBit, err := ffs(freeMask) - if err != nil { - // If this happens, something really weird is going on. - glog.Errorf("ffs(%#x) had an unexpected error: %s", freeMask, err) - return nil, err - } - ipa.used[i] |= 1 << nextBit - offset := (i * 8) + int(nextBit) - ip := ipAdd(ipa.subnet.IP, offset) + if int64(ipa.used.Size()) == ipa.ipSpaceSize { + return nil, fmt.Errorf("can't find a free IP in %s", ipa.subnet) + } + + // Try randomly first + for i := 0; i < ipa.randomAttempts; i++ { + ip := ipa.createRandomIp() + + if ipa.used.Add(ip) { return ip, nil } } + + // If that doesn't work, try a linear search + ip := copyIP(ipa.subnet.IP) + for ipa.subnet.Contains(ip) { + ip = ipAdd(ip, 1) + if ipa.used.Add(ip) { + return ip, nil + } + } + return nil, fmt.Errorf("can't find a free IP in %s", ipa.subnet) } -// This is a really dumb implementation of find-first-set-bit. -func ffs(val byte) (uint, error) { - if val == 0 { - return 0, fmt.Errorf("Can't find-first-set on 0") +func (ipa *ipAllocator) createRandomIp() net.IP { + ip := ipa.subnet.IP + mask := ipa.subnet.Mask + n := len(ip) + + randomIp := make(net.IP, n, n) + + for i := 0; i < n; i++ { + if mask[i] == 0xff { + randomIp[i] = ipa.subnet.IP[i] + } else { + b := byte(ipa.random.Intn(256)) + randomIp[i] = (ipa.subnet.IP[i] & mask[i]) | (b &^ mask[i]) + } } - i := uint(0) - for ; i < 8 && (val&(1<