Merge pull request #87730 from marosset/windows-kubelet-stats-timeout-updates

Windows kubelet stats timeout updates
This commit is contained in:
Kubernetes Prow Robot 2020-02-06 13:57:24 -08:00 committed by GitHub
commit 6a92f19444
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 177 additions and 6 deletions

View File

@ -78,6 +78,7 @@ go_library(
"@io_bazel_rules_go//go/platform:windows": [
"//pkg/kubelet/apis:go_default_library",
"//pkg/kubelet/winstats:go_default_library",
"//vendor/github.com/Microsoft/hcsshim:go_default_library",
"//vendor/golang.org/x/sys/windows/registry:go_default_library",
],
"//conditions:default": [],

View File

@ -22,7 +22,9 @@ import (
"context"
"time"
"github.com/Microsoft/hcsshim"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/klog"
)
func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
@ -31,7 +33,18 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
return nil, err
}
statsJSON, err := ds.client.GetContainerStats(containerID)
hcsshim_container, err := hcsshim.OpenContainer(containerID)
if err != nil {
return nil, err
}
defer func() {
closeErr := hcsshim_container.Close()
if closeErr != nil {
klog.Errorf("Error closing container '%s': %v", containerID, closeErr)
}
}()
stats, err := hcsshim_container.Statistics()
if err != nil {
return nil, err
}
@ -47,7 +60,6 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
}
status := statusResp.GetStatus()
dockerStats := statsJSON.Stats
timestamp := time.Now().UnixNano()
containerStats := &runtimeapi.ContainerStats{
Attributes: &runtimeapi.ContainerAttributes{
@ -58,13 +70,12 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
},
Cpu: &runtimeapi.CpuUsage{
Timestamp: timestamp,
// have to multiply cpu usage by 100 since docker stats units is in 100's of nano seconds for Windows
// see https://github.com/moby/moby/blob/v1.13.1/api/types/stats.go#L22
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: dockerStats.CPUStats.CPUUsage.TotalUsage * 100},
// have to multiply cpu usage by 100 since stats units is in 100's of nano seconds for Windows
UsageCoreNanoSeconds: &runtimeapi.UInt64Value{Value: stats.Processor.TotalRuntime100ns * 100},
},
Memory: &runtimeapi.MemoryUsage{
Timestamp: timestamp,
WorkingSetBytes: &runtimeapi.UInt64Value{Value: dockerStats.MemoryStats.PrivateWorkingSet},
WorkingSetBytes: &runtimeapi.UInt64Value{Value: stats.Memory.UsagePrivateWorkingSetBytes},
},
WritableLayer: &runtimeapi.FilesystemUsage{
Timestamp: timestamp,

View File

@ -11,6 +11,7 @@ go_library(
"gmsa_full.go",
"gmsa_kubelet.go",
"hybrid_network.go",
"kubelet_stats.go",
"memory_limits.go",
"security_context.go",
"service.go",
@ -35,6 +36,7 @@ go_library(
"//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e/framework/kubectl:go_default_library",
"//test/e2e/framework/kubelet:go_default_library",
"//test/e2e/framework/metrics:go_default_library",
"//test/e2e/framework/node:go_default_library",
"//test/e2e/framework/pod:go_default_library",

View File

@ -0,0 +1,157 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/kubernetes/test/e2e/framework"
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
imageutils "k8s.io/kubernetes/test/utils/image"
"github.com/onsi/ginkgo"
)
var _ = SIGDescribe("[Feature:Windows] Kubelet-Stats", func() {
f := framework.NewDefaultFramework("kubelet-stats-test-windows")
ginkgo.Describe("Kubelet stats collection for Windows nodes", func() {
ginkgo.Context("when running 10 pods", func() {
// 10 seconds is the default scrape timeout for metrics-server and kube-prometheus
ginkgo.It("should return within 10 seconds", func() {
ginkgo.By("Selecting a Windows node")
targetNode, err := findWindowsNode(f)
framework.ExpectNoError(err, "Error finding Windows node")
framework.Logf("Using node: %v", targetNode.Name)
ginkgo.By("Scheduling 10 pods")
powershellImage := imageutils.GetConfig(imageutils.BusyBox)
pods := newKubeletStatsTestPods(10, powershellImage, targetNode.Name)
f.PodClient().CreateBatch(pods)
ginkgo.By("Waiting up to 3 minutes for pods to be running")
timeout := 3 * time.Minute
e2epod.WaitForPodsRunningReady(f.ClientSet, f.Namespace.Name, 10, 0, timeout, make(map[string]string))
ginkgo.By("Getting kubelet stats 5 times and checking average duration")
iterations := 5
var totalDurationMs int64
for i := 0; i < iterations; i++ {
start := time.Now()
nodeStats, err := e2ekubelet.GetStatsSummary(f.ClientSet, targetNode.Name)
duration := time.Since(start)
totalDurationMs += duration.Milliseconds()
framework.ExpectNoError(err, "Error getting kubelet stats")
// Perform some basic sanity checks on retrieved stats for pods in this test's namespace
statsChecked := 0
for _, podStats := range nodeStats.Pods {
if podStats.PodRef.Namespace != f.Namespace.Name {
continue
}
statsChecked = statsChecked + 1
framework.ExpectEqual(*podStats.CPU.UsageCoreNanoSeconds > 0, true, "Pod stats should not report 0 cpu usage")
framework.ExpectEqual(*podStats.Memory.WorkingSetBytes > 0, true, "Pod stats should not report 0 bytes for memory working set ")
}
framework.ExpectEqual(statsChecked, 10, "Should find stats for 10 pods in kubelet stats")
time.Sleep(5 * time.Second)
}
avgDurationMs := totalDurationMs / int64(iterations)
durationMatch := avgDurationMs <= time.Duration(10*time.Second).Milliseconds()
framework.Logf("Getting kubelet stats for node %v took an average of %v milliseconds over %v iterations", targetNode.Name, avgDurationMs, iterations)
framework.ExpectEqual(durationMatch, true, "Collecting kubelet stats should not take longer than 10 seconds")
})
})
})
})
// findWindowsNode finds a Windows node that is Ready and Schedulable
func findWindowsNode(f *framework.Framework) (v1.Node, error) {
selector := labels.Set{"kubernetes.io/os": "windows"}.AsSelector()
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
return v1.Node{}, err
}
var targetNode v1.Node
foundNode := false
for _, n := range nodeList.Items {
if e2enode.IsNodeReady(&n) && e2enode.IsNodeSchedulable(&n) {
targetNode = n
foundNode = true
break
}
}
if foundNode == false {
e2eskipper.Skipf("Could not find and ready and schedulable Windows nodes")
}
return targetNode, nil
}
// newKubeletStatsTestPods creates a list of pods (specification) for test.
func newKubeletStatsTestPods(numPods int, image imageutils.Config, nodeName string) []*v1.Pod {
var pods []*v1.Pod
for i := 0; i < numPods; i++ {
podName := "statscollectiontest-" + string(uuid.NewUUID())
pod := v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Labels: map[string]string{
"name": podName,
"testapp": "stats-collection",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Image: image.GetE2EImage(),
Name: podName,
Command: []string{
"powershell.exe",
"-Command",
"sleep -Seconds 600",
},
},
},
NodeName: nodeName,
},
}
pods = append(pods, &pod)
}
return pods
}