diff --git a/go.mod b/go.mod index 97c3d7be344..40109f88625 100644 --- a/go.mod +++ b/go.mod @@ -74,6 +74,7 @@ require ( github.com/opencontainers/selinux v1.8.0 github.com/pkg/errors v0.9.1 github.com/pmezard/go-difflib v1.0.0 + github.com/prometheus/client_golang v1.7.1 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.10.0 github.com/quobyte/api v0.1.8 diff --git a/hack/verify-prometheus-imports.sh b/hack/verify-prometheus-imports.sh index 8768d8b8f94..b3510ffb5da 100755 --- a/hack/verify-prometheus-imports.sh +++ b/hack/verify-prometheus-imports.sh @@ -63,6 +63,7 @@ allowed_prometheus_importers=( ./staging/src/k8s.io/component-base/metrics/value.go ./staging/src/k8s.io/component-base/metrics/wrappers.go ./test/e2e/apimachinery/flowcontrol.go + ./test/e2e/node/pods.go ./test/e2e_node/dynamic_kubelet_config_test.go ./test/e2e_node/resource_metrics_test.go ./test/instrumentation/main_test.go diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index 471f2a39e3a..70fa4fbe082 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -17,6 +17,7 @@ limitations under the License. package node import ( + "bytes" "context" "encoding/json" "fmt" @@ -43,6 +44,8 @@ import ( imageutils "k8s.io/kubernetes/test/utils/image" "github.com/onsi/ginkgo" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/expfmt" ) var _ = SIGDescribe("Pods Extended", func() { @@ -215,6 +218,18 @@ var _ = SIGDescribe("Pods Extended", func() { wg sync.WaitGroup ) + r := prometheus.NewRegistry() + h := prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Name: "start_latency", + Objectives: map[float64]float64{ + 0.5: 0.05, + 0.75: 0.025, + 0.9: 0.01, + 0.99: 0.001, + }, + }, []string{"node"}) + r.MustRegister(h) + const delay = 2000 const workers = 3 const pods = 15 @@ -352,10 +367,15 @@ var _ = SIGDescribe("Pods Extended", func() { return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status) } } + var hasNoStartTime bool hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil if t != nil { if !t.FinishedAt.Time.IsZero() { - duration = t.FinishedAt.Sub(t.StartedAt.Time) + if t.StartedAt.IsZero() { + hasNoStartTime = true + } else { + duration = t.FinishedAt.Sub(t.StartedAt.Time) + } completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time) } @@ -369,6 +389,14 @@ var _ = SIGDescribe("Pods Extended", func() { default: return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message) } + switch { + case duration > time.Hour: + // problem with status reporting + return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, duration, t.StartedAt, t.FinishedAt) + case hasNoStartTime: + // should never happen + return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt) + } } if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded { hasTerminalPhase = true @@ -422,6 +450,7 @@ var _ = SIGDescribe("Pods Extended", func() { if duration > max || max == 0 { max = duration } + h.WithLabelValues(pod.Spec.NodeName).Observe(end.Sub(start).Seconds()) framework.Logf("Pod %s on node %s timings total=%s t=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, end.Sub(start), t, completeDuration, duration) } @@ -437,8 +466,13 @@ var _ = SIGDescribe("Pods Extended", func() { } framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n")) } + values, _ := r.Gather() + var buf bytes.Buffer + for _, m := range values { + expfmt.MetricFamilyToText(&buf, m) + } + framework.Logf("Summary of latencies:\n%s", buf.String()) }) - }) ginkgo.Describe("Pod Container lifecycle", func() { diff --git a/vendor/modules.txt b/vendor/modules.txt index 2e5890fc886..4359e5d8e47 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -687,6 +687,7 @@ github.com/pmezard/go-difflib/difflib github.com/pquerna/cachecontrol github.com/pquerna/cachecontrol/cacheobject # github.com/prometheus/client_golang v1.7.1 => github.com/prometheus/client_golang v1.7.1 +## explicit github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promhttp