mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 20:53:33 +00:00
Merge pull request #44494 from wojtek-t/faster_kube_proxy
Automatic merge from submit-queue (batch tested with PRs 44722, 44704, 44681, 44494, 39732) Don't rebuild endpoints map in iptables kube-proxy all the time. @thockin - i think that this PR should help with yours https://github.com/kubernetes/kubernetes/pull/41030 - it (besides performance improvements) clearly defines when update because of endpoints is needed. If we do the same for services (I'm happy to help with it), i think it should be much simpler. But please take a look if it makes sense from your perspective too.
This commit is contained in:
commit
0a443ba4c7
@ -51,6 +51,7 @@ go_test(
|
|||||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||||
|
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -195,23 +195,43 @@ func newServiceInfo(serviceName proxy.ServicePortName, port *api.ServicePort, se
|
|||||||
return info
|
return info
|
||||||
}
|
}
|
||||||
|
|
||||||
type endpointsMap map[types.NamespacedName]*api.Endpoints
|
type endpointsChange struct {
|
||||||
|
previous *api.Endpoints
|
||||||
|
current *api.Endpoints
|
||||||
|
}
|
||||||
|
type endpointsChangeMap map[types.NamespacedName]*endpointsChange
|
||||||
type serviceMap map[types.NamespacedName]*api.Service
|
type serviceMap map[types.NamespacedName]*api.Service
|
||||||
type proxyServiceMap map[proxy.ServicePortName]*serviceInfo
|
type proxyServiceMap map[proxy.ServicePortName]*serviceInfo
|
||||||
type proxyEndpointMap map[proxy.ServicePortName][]*endpointsInfo
|
type proxyEndpointsMap map[proxy.ServicePortName][]*endpointsInfo
|
||||||
|
|
||||||
|
func (em proxyEndpointsMap) merge(other proxyEndpointsMap) {
|
||||||
|
for svcPort := range other {
|
||||||
|
em[svcPort] = other[svcPort]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (em proxyEndpointsMap) unmerge(other proxyEndpointsMap) {
|
||||||
|
for svcPort := range other {
|
||||||
|
delete(em, svcPort)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Proxier is an iptables based proxy for connections between a localhost:lport
|
// Proxier is an iptables based proxy for connections between a localhost:lport
|
||||||
// and services that provide the actual backends.
|
// and services that provide the actual backends.
|
||||||
type Proxier struct {
|
type Proxier struct {
|
||||||
mu sync.Mutex // protects the following fields
|
mu sync.Mutex // protects the following fields
|
||||||
serviceMap proxyServiceMap
|
serviceMap proxyServiceMap
|
||||||
endpointsMap proxyEndpointMap
|
endpointsMap proxyEndpointsMap
|
||||||
|
// endpointsChanges contains all changes to endpoints that happened since
|
||||||
|
// last syncProxyRules call. For a single object, changes are accumulated,
|
||||||
|
// i.e. previous is state from before all of them, current is state after
|
||||||
|
// applying all of those.
|
||||||
|
endpointsChanges endpointsChangeMap
|
||||||
portsMap map[localPort]closeable
|
portsMap map[localPort]closeable
|
||||||
// allServices and allEndpoints should never be modified by proxier - the
|
// allServices should never be modified by proxier - the
|
||||||
// pointers are shared with higher layers of kube-proxy. They are guaranteed
|
// pointers are shared with higher layers of kube-proxy. They are guaranteed
|
||||||
// to not be modified in the meantime, but also require to be not modified
|
// to not be modified in the meantime, but also require to be not modified
|
||||||
// by Proxier.
|
// by Proxier.
|
||||||
allEndpoints endpointsMap
|
|
||||||
allServices serviceMap
|
allServices serviceMap
|
||||||
|
|
||||||
// endpointsSynced and servicesSynced are set to true when corresponding
|
// endpointsSynced and servicesSynced are set to true when corresponding
|
||||||
@ -331,9 +351,9 @@ func NewProxier(ipt utiliptables.Interface,
|
|||||||
|
|
||||||
return &Proxier{
|
return &Proxier{
|
||||||
serviceMap: make(proxyServiceMap),
|
serviceMap: make(proxyServiceMap),
|
||||||
endpointsMap: make(proxyEndpointMap),
|
endpointsMap: make(proxyEndpointsMap),
|
||||||
|
endpointsChanges: make(endpointsChangeMap),
|
||||||
portsMap: make(map[localPort]closeable),
|
portsMap: make(map[localPort]closeable),
|
||||||
allEndpoints: make(endpointsMap),
|
|
||||||
allServices: make(serviceMap),
|
allServices: make(serviceMap),
|
||||||
syncPeriod: syncPeriod,
|
syncPeriod: syncPeriod,
|
||||||
minSyncPeriod: minSyncPeriod,
|
minSyncPeriod: minSyncPeriod,
|
||||||
@ -566,16 +586,32 @@ func (proxier *Proxier) OnEndpointsAdd(endpoints *api.Endpoints) {
|
|||||||
|
|
||||||
proxier.mu.Lock()
|
proxier.mu.Lock()
|
||||||
defer proxier.mu.Unlock()
|
defer proxier.mu.Unlock()
|
||||||
proxier.allEndpoints[namespacedName] = endpoints
|
|
||||||
|
change, exists := proxier.endpointsChanges[namespacedName]
|
||||||
|
if !exists {
|
||||||
|
change = &endpointsChange{}
|
||||||
|
change.previous = nil
|
||||||
|
proxier.endpointsChanges[namespacedName] = change
|
||||||
|
}
|
||||||
|
change.current = endpoints
|
||||||
|
|
||||||
proxier.syncProxyRules(syncReasonEndpoints)
|
proxier.syncProxyRules(syncReasonEndpoints)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (proxier *Proxier) OnEndpointsUpdate(_, endpoints *api.Endpoints) {
|
func (proxier *Proxier) OnEndpointsUpdate(oldEndpoints, endpoints *api.Endpoints) {
|
||||||
namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}
|
namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}
|
||||||
|
|
||||||
proxier.mu.Lock()
|
proxier.mu.Lock()
|
||||||
defer proxier.mu.Unlock()
|
defer proxier.mu.Unlock()
|
||||||
proxier.allEndpoints[namespacedName] = endpoints
|
|
||||||
|
change, exists := proxier.endpointsChanges[namespacedName]
|
||||||
|
if !exists {
|
||||||
|
change = &endpointsChange{}
|
||||||
|
change.previous = oldEndpoints
|
||||||
|
proxier.endpointsChanges[namespacedName] = change
|
||||||
|
}
|
||||||
|
change.current = endpoints
|
||||||
|
|
||||||
proxier.syncProxyRules(syncReasonEndpoints)
|
proxier.syncProxyRules(syncReasonEndpoints)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -584,7 +620,15 @@ func (proxier *Proxier) OnEndpointsDelete(endpoints *api.Endpoints) {
|
|||||||
|
|
||||||
proxier.mu.Lock()
|
proxier.mu.Lock()
|
||||||
defer proxier.mu.Unlock()
|
defer proxier.mu.Unlock()
|
||||||
delete(proxier.allEndpoints, namespacedName)
|
|
||||||
|
change, exists := proxier.endpointsChanges[namespacedName]
|
||||||
|
if !exists {
|
||||||
|
change = &endpointsChange{}
|
||||||
|
change.previous = endpoints
|
||||||
|
proxier.endpointsChanges[namespacedName] = change
|
||||||
|
}
|
||||||
|
change.current = nil
|
||||||
|
|
||||||
proxier.syncProxyRules(syncReasonEndpoints)
|
proxier.syncProxyRules(syncReasonEndpoints)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -595,45 +639,65 @@ func (proxier *Proxier) OnEndpointsSynced() {
|
|||||||
proxier.syncProxyRules(syncReasonEndpoints)
|
proxier.syncProxyRules(syncReasonEndpoints)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert a slice of api.Endpoints objects into a map of service-port -> endpoints.
|
// <endpointsMap> is updated by this function (based on the given changes).
|
||||||
func buildNewEndpointsMap(allEndpoints endpointsMap, curMap proxyEndpointMap, hostname string) (newMap proxyEndpointMap, hcEndpoints map[types.NamespacedName]int, staleSet map[endpointServicePair]bool) {
|
// <changes> map is cleared after applying them.
|
||||||
|
func updateEndpointsMap(
|
||||||
// return values
|
endpointsMap proxyEndpointsMap,
|
||||||
newMap = make(proxyEndpointMap)
|
changes *endpointsChangeMap,
|
||||||
hcEndpoints = make(map[types.NamespacedName]int)
|
hostname string) (syncRequired bool, hcEndpoints map[types.NamespacedName]int, staleSet map[endpointServicePair]bool) {
|
||||||
|
syncRequired = false
|
||||||
staleSet = make(map[endpointServicePair]bool)
|
staleSet = make(map[endpointServicePair]bool)
|
||||||
|
for _, change := range *changes {
|
||||||
// Update endpoints for services.
|
oldEndpointsMap := endpointsToEndpointsMap(change.previous, hostname)
|
||||||
for _, endpoints := range allEndpoints {
|
newEndpointsMap := endpointsToEndpointsMap(change.current, hostname)
|
||||||
accumulateEndpointsMap(endpoints, hostname, &newMap)
|
if !reflect.DeepEqual(oldEndpointsMap, newEndpointsMap) {
|
||||||
|
endpointsMap.unmerge(oldEndpointsMap)
|
||||||
|
endpointsMap.merge(newEndpointsMap)
|
||||||
|
detectStaleConnections(oldEndpointsMap, newEndpointsMap, staleSet)
|
||||||
|
syncRequired = true
|
||||||
}
|
}
|
||||||
// Check stale connections against endpoints missing from the update.
|
}
|
||||||
// TODO: we should really only mark a connection stale if the proto was UDP
|
*changes = make(endpointsChangeMap)
|
||||||
// and the (ip, port, proto) was removed from the endpoints.
|
|
||||||
for svcPort, epList := range curMap {
|
if !utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: If this will appear to be computationally expensive, consider
|
||||||
|
// computing this incrementally similarly to endpointsMap.
|
||||||
|
hcEndpoints = make(map[types.NamespacedName]int)
|
||||||
|
localIPs := getLocalIPs(endpointsMap)
|
||||||
|
for nsn, ips := range localIPs {
|
||||||
|
hcEndpoints[nsn] = len(ips)
|
||||||
|
}
|
||||||
|
|
||||||
|
return syncRequired, hcEndpoints, staleSet
|
||||||
|
}
|
||||||
|
|
||||||
|
// <staleEndpoints> are modified by this function with detected stale
|
||||||
|
// connections.
|
||||||
|
func detectStaleConnections(oldEndpointsMap, newEndpointsMap proxyEndpointsMap, staleEndpoints map[endpointServicePair]bool) {
|
||||||
|
for svcPort, epList := range oldEndpointsMap {
|
||||||
for _, ep := range epList {
|
for _, ep := range epList {
|
||||||
stale := true
|
stale := true
|
||||||
for i := range newMap[svcPort] {
|
for i := range newEndpointsMap[svcPort] {
|
||||||
if *newMap[svcPort][i] == *ep {
|
if *newEndpointsMap[svcPort][i] == *ep {
|
||||||
stale = false
|
stale = false
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if stale {
|
if stale {
|
||||||
glog.V(4).Infof("Stale endpoint %v -> %v", svcPort, ep.endpoint)
|
glog.V(4).Infof("Stale endpoint %v -> %v", svcPort, ep.endpoint)
|
||||||
staleSet[endpointServicePair{endpoint: ep.endpoint, servicePortName: svcPort}] = true
|
staleEndpoints[endpointServicePair{endpoint: ep.endpoint, servicePortName: svcPort}] = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) {
|
func getLocalIPs(endpointsMap proxyEndpointsMap) map[types.NamespacedName]sets.String {
|
||||||
return
|
localIPs := make(map[types.NamespacedName]sets.String)
|
||||||
}
|
for svcPort := range endpointsMap {
|
||||||
|
for _, ep := range endpointsMap[svcPort] {
|
||||||
// accumulate local IPs per service, ignoring ports
|
|
||||||
localIPs := map[types.NamespacedName]sets.String{}
|
|
||||||
for svcPort := range newMap {
|
|
||||||
for _, ep := range newMap[svcPort] {
|
|
||||||
if ep.isLocal {
|
if ep.isLocal {
|
||||||
nsn := svcPort.NamespacedName
|
nsn := svcPort.NamespacedName
|
||||||
if localIPs[nsn] == nil {
|
if localIPs[nsn] == nil {
|
||||||
@ -644,25 +708,19 @@ func buildNewEndpointsMap(allEndpoints endpointsMap, curMap proxyEndpointMap, ho
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// produce a count per service
|
return localIPs
|
||||||
for nsn, ips := range localIPs {
|
|
||||||
hcEndpoints[nsn] = len(ips)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return newMap, hcEndpoints, staleSet
|
// Translates single Endpoints object to proxyEndpointsMap.
|
||||||
}
|
// This function is used for incremental updated of endpointsMap.
|
||||||
|
|
||||||
// Gather information about all the endpoint state for a given api.Endpoints.
|
|
||||||
// This can not report complete info on stale connections because it has limited
|
|
||||||
// scope - it only knows one Endpoints, but sees the whole current map. That
|
|
||||||
// cleanup has to be done above.
|
|
||||||
//
|
//
|
||||||
// NOTE: endpoints object should NOT be modified.
|
// NOTE: endpoints object should NOT be modified.
|
||||||
//
|
func endpointsToEndpointsMap(endpoints *api.Endpoints, hostname string) proxyEndpointsMap {
|
||||||
// TODO: this could be simplified:
|
if endpoints == nil {
|
||||||
// - the test for this is overlapped by the test for buildNewEndpointsMap
|
return nil
|
||||||
// - naming is poor and responsibilities are muddled
|
}
|
||||||
func accumulateEndpointsMap(endpoints *api.Endpoints, hostname string, newEndpoints *proxyEndpointMap) {
|
|
||||||
|
endpointsMap := make(proxyEndpointsMap)
|
||||||
// We need to build a map of portname -> all ip:ports for that
|
// We need to build a map of portname -> all ip:ports for that
|
||||||
// portname. Explode Endpoints.Subsets[*] into this structure.
|
// portname. Explode Endpoints.Subsets[*] into this structure.
|
||||||
for i := range endpoints.Subsets {
|
for i := range endpoints.Subsets {
|
||||||
@ -687,17 +745,18 @@ func accumulateEndpointsMap(endpoints *api.Endpoints, hostname string, newEndpoi
|
|||||||
endpoint: net.JoinHostPort(addr.IP, strconv.Itoa(int(port.Port))),
|
endpoint: net.JoinHostPort(addr.IP, strconv.Itoa(int(port.Port))),
|
||||||
isLocal: addr.NodeName != nil && *addr.NodeName == hostname,
|
isLocal: addr.NodeName != nil && *addr.NodeName == hostname,
|
||||||
}
|
}
|
||||||
(*newEndpoints)[svcPort] = append((*newEndpoints)[svcPort], epInfo)
|
endpointsMap[svcPort] = append(endpointsMap[svcPort], epInfo)
|
||||||
}
|
}
|
||||||
if glog.V(3) {
|
if glog.V(3) {
|
||||||
newEPList := []string{}
|
newEPList := []string{}
|
||||||
for _, ep := range (*newEndpoints)[svcPort] {
|
for _, ep := range endpointsMap[svcPort] {
|
||||||
newEPList = append(newEPList, ep.endpoint)
|
newEPList = append(newEPList, ep.endpoint)
|
||||||
}
|
}
|
||||||
glog.Infof("Setting endpoints for %q to %+v", svcPort, newEPList)
|
glog.Infof("Setting endpoints for %q to %+v", svcPort, newEPList)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return endpointsMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// portProtoHash takes the ServicePortName and protocol for a service
|
// portProtoHash takes the ServicePortName and protocol for a service
|
||||||
@ -784,7 +843,7 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
glog.V(4).Infof("syncProxyRules(%s) took %v", reason, time.Since(start))
|
glog.V(4).Infof("syncProxyRules(%s) took %v", reason, time.Since(start))
|
||||||
}()
|
}()
|
||||||
// don't sync rules till we've received services and endpoints
|
// don't sync rules till we've received services and endpoints
|
||||||
if !proxier.endpointsSynced || proxier.allServices == nil {
|
if !proxier.endpointsSynced || !proxier.servicesSynced {
|
||||||
glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master")
|
glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -798,11 +857,11 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Figure out the new endpoints we need to activate.
|
endpointsSyncRequired, hcEndpoints, staleEndpoints := updateEndpointsMap(
|
||||||
newEndpoints, hcEndpoints, staleEndpoints := buildNewEndpointsMap(proxier.allEndpoints, proxier.endpointsMap, proxier.hostname)
|
proxier.endpointsMap, &proxier.endpointsChanges, proxier.hostname)
|
||||||
|
|
||||||
// If this was called because of an endpoints update, but nothing actionable has changed, skip it.
|
// If this was called because of an endpoints update, but nothing actionable has changed, skip it.
|
||||||
if reason == syncReasonEndpoints && reflect.DeepEqual(newEndpoints, proxier.endpointsMap) {
|
if reason == syncReasonEndpoints && !endpointsSyncRequired {
|
||||||
glog.V(3).Infof("Skipping iptables sync because nothing changed")
|
glog.V(3).Infof("Skipping iptables sync because nothing changed")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1157,7 +1216,7 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
// table doesn't currently have the same per-service structure that
|
// table doesn't currently have the same per-service structure that
|
||||||
// the nat table does, so we just stick this into the kube-services
|
// the nat table does, so we just stick this into the kube-services
|
||||||
// chain.
|
// chain.
|
||||||
if len(newEndpoints[svcName]) == 0 {
|
if len(proxier.endpointsMap[svcName]) == 0 {
|
||||||
writeLine(filterRules,
|
writeLine(filterRules,
|
||||||
"-A", string(kubeServicesChain),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||||
@ -1170,7 +1229,7 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If the service has no endpoints then reject packets.
|
// If the service has no endpoints then reject packets.
|
||||||
if len(newEndpoints[svcName]) == 0 {
|
if len(proxier.endpointsMap[svcName]) == 0 {
|
||||||
writeLine(filterRules,
|
writeLine(filterRules,
|
||||||
"-A", string(kubeServicesChain),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||||
@ -1189,7 +1248,7 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
// These two slices parallel each other - keep in sync
|
// These two slices parallel each other - keep in sync
|
||||||
endpoints := make([]*endpointsInfo, 0)
|
endpoints := make([]*endpointsInfo, 0)
|
||||||
endpointChains := make([]utiliptables.Chain, 0)
|
endpointChains := make([]utiliptables.Chain, 0)
|
||||||
for _, ep := range newEndpoints[svcName] {
|
for _, ep := range proxier.endpointsMap[svcName] {
|
||||||
endpoints = append(endpoints, ep)
|
endpoints = append(endpoints, ep)
|
||||||
endpointChain := servicePortEndpointChainName(svcNameString, protocol, ep.endpoint)
|
endpointChain := servicePortEndpointChainName(svcNameString, protocol, ep.endpoint)
|
||||||
endpointChains = append(endpointChains, endpointChain)
|
endpointChains = append(endpointChains, endpointChain)
|
||||||
@ -1379,7 +1438,6 @@ func (proxier *Proxier) syncProxyRules(reason syncReason) {
|
|||||||
|
|
||||||
// Finish housekeeping.
|
// Finish housekeeping.
|
||||||
proxier.serviceMap = newServices
|
proxier.serviceMap = newServices
|
||||||
proxier.endpointsMap = newEndpoints
|
|
||||||
|
|
||||||
// TODO: these and clearUDPConntrackForPort() could be made more consistent.
|
// TODO: these and clearUDPConntrackForPort() could be made more consistent.
|
||||||
utilproxy.DeleteServiceConnections(proxier.exec, staleServices.List())
|
utilproxy.DeleteServiceConnections(proxier.exec, staleServices.List())
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user