Merge pull request #128886 from npinaeva/kube-proxy-debug-log

kube-proxy,nftables: add debug logging for failed transactions.
This commit is contained in:
Kubernetes Prow Robot 2024-12-14 08:14:41 +01:00 committed by GitHub
commit 30ef6110a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 1 deletions

View File

@ -28,8 +28,10 @@ import (
"crypto/sha256" "crypto/sha256"
"encoding/base32" "encoding/base32"
"fmt" "fmt"
"golang.org/x/time/rate"
"net" "net"
"os" "os"
"os/exec"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
@ -194,6 +196,7 @@ type Proxier struct {
serviceCIDRs string serviceCIDRs string
logger klog.Logger logger klog.Logger
logRateLimiter *rate.Limiter
clusterIPs *nftElementStorage clusterIPs *nftElementStorage
serviceIPs *nftElementStorage serviceIPs *nftElementStorage
@ -266,6 +269,7 @@ func NewProxier(ctx context.Context,
networkInterfacer: proxyutil.RealNetwork{}, networkInterfacer: proxyutil.RealNetwork{},
staleChains: make(map[string]time.Time), staleChains: make(map[string]time.Time),
logger: logger, logger: logger,
logRateLimiter: rate.NewLimiter(rate.Every(24*time.Hour), 1),
clusterIPs: newNFTElementStorage("set", clusterIPsSet), clusterIPs: newNFTElementStorage("set", clusterIPsSet),
serviceIPs: newNFTElementStorage("map", serviceIPsMap), serviceIPs: newNFTElementStorage("map", serviceIPsMap),
firewallIPs: newNFTElementStorage("map", firewallIPsMap), firewallIPs: newNFTElementStorage("map", firewallIPsMap),
@ -1136,6 +1140,21 @@ func (s *nftElementStorage) cleanupLeftoverKeys(tx *knftables.Transaction) {
s.resetLeftoverKeys() s.resetLeftoverKeys()
} }
// logFailure logs the transaction and the full table with a rate limit.
func (proxier *Proxier) logFailure(tx *knftables.Transaction) {
if klogV4 := klog.V(4); klogV4.Enabled() && proxier.logRateLimiter.Allow() {
klogV4.InfoS("Failed transaction", "transaction", tx.String())
// knftables doesn't supporting listing the full table yet, this is a workaround.
cmd := exec.Command("nft", "list", "table", kubeProxyTable)
out, err := cmd.Output()
if err != nil {
klogV4.InfoS("Listing full table failed", "error", err)
} else {
klogV4.InfoS("Listing full table", "result", string(out))
}
}
}
// This is where all of the nftables calls happen. // This is where all of the nftables calls happen.
// This assumes proxier.mu is NOT held // This assumes proxier.mu is NOT held
func (proxier *Proxier) syncProxyRules() { func (proxier *Proxier) syncProxyRules() {
@ -1209,6 +1228,10 @@ func (proxier *Proxier) syncProxyRules() {
// (with a later timestamp) at the end of the sync. // (with a later timestamp) at the end of the sync.
proxier.logger.Error(err, "Unable to delete stale chains; will retry later") proxier.logger.Error(err, "Unable to delete stale chains; will retry later")
metrics.NFTablesCleanupFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() metrics.NFTablesCleanupFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
tryPartialSync = false
// Log failed transaction and list full kube-proxy table.
proxier.logFailure(tx)
} }
} }
} }
@ -1808,6 +1831,8 @@ func (proxier *Proxier) syncProxyRules() {
// staleChains is now incorrect since we didn't actually flush the // staleChains is now incorrect since we didn't actually flush the
// chains in it. We can recompute it next time. // chains in it. We can recompute it next time.
clear(proxier.staleChains) clear(proxier.staleChains)
// Log failed transaction and list full kube-proxy table.
proxier.logFailure(tx)
return return
} }
success = true success = true

View File

@ -21,6 +21,7 @@ package nftables
import ( import (
"fmt" "fmt"
"golang.org/x/time/rate"
"net" "net"
"reflect" "reflect"
"testing" "testing"
@ -132,6 +133,7 @@ func NewFakeProxier(ipFamily v1.IPFamily) (*knftables.Fake, *Proxier) {
networkInterfacer: networkInterfacer, networkInterfacer: networkInterfacer,
staleChains: make(map[string]time.Time), staleChains: make(map[string]time.Time),
serviceCIDRs: serviceCIDRs, serviceCIDRs: serviceCIDRs,
logRateLimiter: rate.NewLimiter(rate.Every(24*time.Hour), 1),
clusterIPs: newNFTElementStorage("set", clusterIPsSet), clusterIPs: newNFTElementStorage("set", clusterIPsSet),
serviceIPs: newNFTElementStorage("map", serviceIPsMap), serviceIPs: newNFTElementStorage("map", serviceIPsMap),
firewallIPs: newNFTElementStorage("map", firewallIPsMap), firewallIPs: newNFTElementStorage("map", firewallIPsMap),