Merge pull request #85350 from liggitt/mutation-detection

Enable mutation detection in test-cmd/test-integration/test-e2e-node, improve memory impact
This commit is contained in:
Kubernetes Prow Robot 2019-12-02 07:39:04 -08:00 committed by GitHub
commit ea385aa5e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 1 deletions

View File

@ -21,6 +21,14 @@ set -o errexit
set -o nounset
set -o pipefail
# start the cache mutation detector by default so that cache mutators will be found
KUBE_CACHE_MUTATION_DETECTOR="${KUBE_CACHE_MUTATION_DETECTOR:-true}"
export KUBE_CACHE_MUTATION_DETECTOR
# panic the server on watch decode errors since they are considered coder mistakes
KUBE_PANIC_WATCH_DECODE_ERROR="${KUBE_PANIC_WATCH_DECODE_ERROR:-true}"
export KUBE_PANIC_WATCH_DECODE_ERROR
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
source "${KUBE_ROOT}/hack/lib/init.sh"
source "${KUBE_ROOT}/hack/lib/test.sh"

View File

@ -17,6 +17,14 @@
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
source "${KUBE_ROOT}/hack/lib/init.sh"
# start the cache mutation detector by default so that cache mutators will be found
KUBE_CACHE_MUTATION_DETECTOR="${KUBE_CACHE_MUTATION_DETECTOR:-true}"
export KUBE_CACHE_MUTATION_DETECTOR
# panic the server on watch decode errors since they are considered coder mistakes
KUBE_PANIC_WATCH_DECODE_ERROR="${KUBE_PANIC_WATCH_DECODE_ERROR:-true}"
export KUBE_PANIC_WATCH_DECODE_ERROR
focus=${FOCUS:-""}
skip=${SKIP-"\[Flaky\]|\[Slow\]|\[Serial\]"}
# The number of tests that can run in parallel depends on what tests

View File

@ -21,6 +21,14 @@ set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
source "${KUBE_ROOT}/hack/lib/init.sh"
# start the cache mutation detector by default so that cache mutators will be found
KUBE_CACHE_MUTATION_DETECTOR="${KUBE_CACHE_MUTATION_DETECTOR:-true}"
export KUBE_CACHE_MUTATION_DETECTOR
# panic the server on watch decode errors since they are considered coder mistakes
KUBE_PANIC_WATCH_DECODE_ERROR="${KUBE_PANIC_WATCH_DECODE_ERROR:-true}"
export KUBE_PANIC_WATCH_DECODE_ERROR
# Give integration tests longer to run by default.
KUBE_TIMEOUT=${KUBE_TIMEOUT:--timeout=600s}
KUBE_INTEGRATION_TEST_MAX_CONCURRENCY=${KUBE_INTEGRATION_TEST_MAX_CONCURRENCY:-"-1"}

View File

@ -48,7 +48,7 @@ func NewCacheMutationDetector(name string) MutationDetector {
return dummyMutationDetector{}
}
klog.Warningln("Mutation detector is enabled, this will result in memory leakage.")
return &defaultCacheMutationDetector{name: name, period: 1 * time.Second}
return &defaultCacheMutationDetector{name: name, period: 1 * time.Second, retainDuration: 2 * time.Minute}
}
type dummyMutationDetector struct{}
@ -68,6 +68,10 @@ type defaultCacheMutationDetector struct {
lock sync.Mutex
cachedObjs []cacheObj
retainDuration time.Duration
lastRotated time.Time
retainedCachedObjs []cacheObj
// failureFunc is injectable for unit testing. If you don't have it, the process will panic.
// This panic is intentional, since turning on this detection indicates you want a strong
// failure signal. This failure is effectively a p0 bug and you can't trust process results
@ -84,6 +88,14 @@ type cacheObj struct {
func (d *defaultCacheMutationDetector) Run(stopCh <-chan struct{}) {
// we DON'T want protection from panics. If we're running this code, we want to die
for {
if d.lastRotated.IsZero() {
d.lastRotated = time.Now()
} else if time.Now().Sub(d.lastRotated) > d.retainDuration {
d.retainedCachedObjs = d.cachedObjs
d.cachedObjs = nil
d.lastRotated = time.Now()
}
d.CompareObjects()
select {
@ -120,6 +132,12 @@ func (d *defaultCacheMutationDetector) CompareObjects() {
altered = true
}
}
for i, obj := range d.retainedCachedObjs {
if !reflect.DeepEqual(obj.cached, obj.copied) {
fmt.Printf("CACHE %s[%d] ALTERED!\n%v\n", d.name, i, diff.ObjectGoPrintSideBySide(obj.cached, obj.copied))
altered = true
}
}
if altered {
msg := fmt.Sprintf("cache %s modified", d.name)