Adding per container stats for CRI runtimes

This commit aims to collect per container log stats. The
change was proposed as a part of #55905. The change includes
change of the log path from /var/pod/<pod uid>/containername_attempt.log
to /var/pod/<pod uid>/containername/containername_attempt.log.
The logs are collected by reusing volume package to collect
metrics from the log path.

Signed-off-by: abhi <abhi@docker.com>
This commit is contained in:
abhi 2018-02-20 19:49:51 -08:00
parent 00070b5490
commit 6649d38c96
7 changed files with 93 additions and 23 deletions

View File

@ -694,7 +694,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.podManager, klet.podManager,
klet.runtimeCache, klet.runtimeCache,
runtimeService, runtimeService,
imageService) imageService,
stats.NewLogMetricsService())
} }
} else { } else {
// rkt uses the legacy, non-CRI, integration. Configure it the old way. // rkt uses the legacy, non-CRI, integration. Configure it the old way.

View File

@ -207,7 +207,7 @@ func getStableKey(pod *v1.Pod, container *v1.Container) string {
// buildContainerLogsPath builds log path for container relative to pod logs directory. // buildContainerLogsPath builds log path for container relative to pod logs directory.
func buildContainerLogsPath(containerName string, restartCount int) string { func buildContainerLogsPath(containerName string, restartCount int) string {
return fmt.Sprintf("%s_%d.log", containerName, restartCount) return filepath.Join(containerName, fmt.Sprintf("%d.log", restartCount))
} }
// buildFullContainerLogsPath builds absolute log path for container. // buildFullContainerLogsPath builds absolute log path for container.
@ -215,6 +215,11 @@ func buildFullContainerLogsPath(podUID types.UID, containerName string, restartC
return filepath.Join(buildPodLogsDirectory(podUID), buildContainerLogsPath(containerName, restartCount)) return filepath.Join(buildPodLogsDirectory(podUID), buildContainerLogsPath(containerName, restartCount))
} }
// BuildContainerLogsDirectory builds absolute log directory path for a container in pod.
func BuildContainerLogsDirectory(podUID types.UID, containerName string) string {
return filepath.Join(buildPodLogsDirectory(podUID), containerName)
}
// buildPodLogsDirectory builds absolute log directory path for a pod sandbox. // buildPodLogsDirectory builds absolute log directory path for a pod sandbox.
func buildPodLogsDirectory(podUID types.UID) string { func buildPodLogsDirectory(podUID types.UID) string {
return filepath.Join(podLogsRootDirectory, string(podUID)) return filepath.Join(podLogsRootDirectory, string(podUID))

View File

@ -190,6 +190,11 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai
} }
command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
logDir := BuildContainerLogsDirectory(kubetypes.UID(pod.UID), container.Name)
err = m.osInterface.MkdirAll(logDir, 0755)
if err != nil {
return nil, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err)
}
containerLogsPath := buildContainerLogsPath(container.Name, restartCount) containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
restartCountUint32 := uint32(restartCount) restartCountUint32 := uint32(restartCount)
config := &runtimeapi.ContainerConfig{ config := &runtimeapi.ContainerConfig{
@ -840,8 +845,7 @@ func (m *kubeGenericRuntimeManager) removeContainerLog(containerID string) error
return fmt.Errorf("failed to get container status %q: %v", containerID, err) return fmt.Errorf("failed to get container status %q: %v", containerID, err)
} }
labeledInfo := getContainerInfoFromLabels(status.Labels) labeledInfo := getContainerInfoFromLabels(status.Labels)
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations) path := BuildContainerLogsDirectory(labeledInfo.PodUID, labeledInfo.ContainerName)
path := buildFullContainerLogsPath(labeledInfo.PodUID, labeledInfo.ContainerName, annotatedInfo.RestartCount)
if err := m.osInterface.Remove(path); err != nil && !os.IsNotExist(err) { if err := m.osInterface.Remove(path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove container %q log %q: %v", containerID, path, err) return fmt.Errorf("failed to remove container %q log %q: %v", containerID, path, err)
} }

View File

@ -6,6 +6,7 @@ go_library(
"cadvisor_stats_provider.go", "cadvisor_stats_provider.go",
"cri_stats_provider.go", "cri_stats_provider.go",
"helper.go", "helper.go",
"log_metrics_provider.go",
"stats_provider.go", "stats_provider.go",
], ],
importpath = "k8s.io/kubernetes/pkg/kubelet/stats", importpath = "k8s.io/kubernetes/pkg/kubelet/stats",
@ -17,11 +18,13 @@ go_library(
"//pkg/kubelet/cadvisor:go_default_library", "//pkg/kubelet/cadvisor:go_default_library",
"//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/kuberuntime:go_default_library",
"//pkg/kubelet/leaky:go_default_library", "//pkg/kubelet/leaky:go_default_library",
"//pkg/kubelet/network:go_default_library", "//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/pod:go_default_library", "//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/server/stats:go_default_library", "//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/types:go_default_library", "//pkg/kubelet/types:go_default_library",
"//pkg/volume:go_default_library",
"//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/golang/protobuf/proto:go_default_library", "//vendor/github.com/golang/protobuf/proto:go_default_library",
"//vendor/github.com/google/cadvisor/fs:go_default_library", "//vendor/github.com/google/cadvisor/fs:go_default_library",
@ -52,6 +55,7 @@ go_test(
"cadvisor_stats_provider_test.go", "cadvisor_stats_provider_test.go",
"cri_stats_provider_test.go", "cri_stats_provider_test.go",
"helper_test.go", "helper_test.go",
"log_metrics_provider_test.go",
"stats_provider_test.go", "stats_provider_test.go",
], ],
embed = [":go_default_library"], embed = [":go_default_library"],
@ -63,16 +67,19 @@ go_test(
"//pkg/kubelet/cadvisor/testing:go_default_library", "//pkg/kubelet/cadvisor/testing:go_default_library",
"//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/container/testing:go_default_library", "//pkg/kubelet/container/testing:go_default_library",
"//pkg/kubelet/kuberuntime:go_default_library",
"//pkg/kubelet/leaky:go_default_library", "//pkg/kubelet/leaky:go_default_library",
"//pkg/kubelet/pod/testing:go_default_library", "//pkg/kubelet/pod/testing:go_default_library",
"//pkg/kubelet/server/stats:go_default_library", "//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/types:go_default_library", "//pkg/kubelet/types:go_default_library",
"//pkg/volume:go_default_library",
"//vendor/github.com/google/cadvisor/fs:go_default_library", "//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library", "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library", "//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/github.com/google/gofuzz:go_default_library", "//vendor/github.com/google/gofuzz:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library", "//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library", "//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
], ],

View File

@ -35,6 +35,7 @@ import (
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
"k8s.io/kubernetes/pkg/kubelet/server/stats" "k8s.io/kubernetes/pkg/kubelet/server/stats"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
) )
@ -53,6 +54,8 @@ type criStatsProvider struct {
runtimeService internalapi.RuntimeService runtimeService internalapi.RuntimeService
// imageService is used to get the stats of the image filesystem. // imageService is used to get the stats of the image filesystem.
imageService internalapi.ImageManagerService imageService internalapi.ImageManagerService
// logMetrics provides the metrics for container logs
logMetricsService LogMetricsService
} }
// newCRIStatsProvider returns a containerStatsProvider implementation that // newCRIStatsProvider returns a containerStatsProvider implementation that
@ -62,12 +65,14 @@ func newCRIStatsProvider(
resourceAnalyzer stats.ResourceAnalyzer, resourceAnalyzer stats.ResourceAnalyzer,
runtimeService internalapi.RuntimeService, runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService, imageService internalapi.ImageManagerService,
logMetricsService LogMetricsService,
) containerStatsProvider { ) containerStatsProvider {
return &criStatsProvider{ return &criStatsProvider{
cadvisor: cadvisor, cadvisor: cadvisor,
resourceAnalyzer: resourceAnalyzer, resourceAnalyzer: resourceAnalyzer,
runtimeService: runtimeService, runtimeService: runtimeService,
imageService: imageService, imageService: imageService,
logMetricsService: logMetricsService,
} }
} }
@ -94,7 +99,6 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
for _, s := range podSandboxes { for _, s := range podSandboxes {
podSandboxMap[s.Id] = s podSandboxMap[s.Id] = s
} }
// fsIDtoInfo is a map from filesystem id to its stats. This will be used // fsIDtoInfo is a map from filesystem id to its stats. This will be used
// as a cache to avoid querying cAdvisor for the filesystem stats with the // as a cache to avoid querying cAdvisor for the filesystem stats with the
// same filesystem id many times. // same filesystem id many times.
@ -149,7 +153,7 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
} }
sandboxIDToPodStats[podSandboxID] = ps sandboxIDToPodStats[podSandboxID] = ps
} }
cs := p.makeContainerStats(stats, container, &rootFsInfo, fsIDtoInfo) cs := p.makeContainerStats(stats, container, &rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata().GetUid())
// If cadvisor stats is available for the container, use it to populate // If cadvisor stats is available for the container, use it to populate
// container stats // container stats
caStats, caFound := caInfos[containerID] caStats, caFound := caInfos[containerID]
@ -277,6 +281,7 @@ func (p *criStatsProvider) makeContainerStats(
container *runtimeapi.Container, container *runtimeapi.Container,
rootFsInfo *cadvisorapiv2.FsInfo, rootFsInfo *cadvisorapiv2.FsInfo,
fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo, fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
uid string,
) *statsapi.ContainerStats { ) *statsapi.ContainerStats {
result := &statsapi.ContainerStats{ result := &statsapi.ContainerStats{
Name: stats.Attributes.Metadata.Name, Name: stats.Attributes.Metadata.Name,
@ -291,17 +296,6 @@ func (p *criStatsProvider) makeContainerStats(
RSSBytes: proto.Uint64(0), RSSBytes: proto.Uint64(0),
}, },
Rootfs: &statsapi.FsStats{}, Rootfs: &statsapi.FsStats{},
Logs: &statsapi.FsStats{
Time: metav1.NewTime(rootFsInfo.Timestamp),
AvailableBytes: &rootFsInfo.Available,
CapacityBytes: &rootFsInfo.Capacity,
InodesFree: rootFsInfo.InodesFree,
Inodes: rootFsInfo.Inodes,
// UsedBytes and InodesUsed are unavailable from CRI stats.
//
// TODO(yguo0905): Get this information from kubelet and
// populate the two fields here.
},
// UserDefinedMetrics is not supported by CRI. // UserDefinedMetrics is not supported by CRI.
} }
if stats.Cpu != nil { if stats.Cpu != nil {
@ -343,7 +337,8 @@ func (p *criStatsProvider) makeContainerStats(
result.Rootfs.Inodes = imageFsInfo.Inodes result.Rootfs.Inodes = imageFsInfo.Inodes
} }
} }
containerLogPath := kuberuntime.BuildContainerLogsDirectory(types.UID(uid), container.GetMetadata().GetName())
result.Logs = p.getContainerLogStats(containerLogPath, rootFsInfo)
return result return result
} }
@ -423,3 +418,25 @@ func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.Contai
} }
return stats, nil return stats, nil
} }
// TODO Cache the metrics in container log manager
func (p *criStatsProvider) getContainerLogStats(path string, rootFsInfo *cadvisorapiv2.FsInfo) *statsapi.FsStats {
m := p.logMetricsService.createLogMetricsProvider(path)
logMetrics, err := m.GetMetrics()
if err != nil {
glog.Errorf("Unable to fetch container log stats for path %s: %v ", path, err)
return nil
}
result := &statsapi.FsStats{
Time: metav1.NewTime(rootFsInfo.Timestamp),
AvailableBytes: &rootFsInfo.Available,
CapacityBytes: &rootFsInfo.Capacity,
InodesFree: rootFsInfo.InodesFree,
Inodes: rootFsInfo.Inodes,
}
usedbytes := uint64(logMetrics.Used.Value())
result.UsedBytes = &usedbytes
inodesUsed := uint64(logMetrics.InodesUsed.Value())
result.InodesUsed = &inodesUsed
return result
}

View File

@ -0,0 +1,35 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package stats
import (
"k8s.io/kubernetes/pkg/volume"
)
type LogMetricsService interface {
createLogMetricsProvider(path string) volume.MetricsProvider
}
type logMetrics struct{}
func NewLogMetricsService() LogMetricsService {
return logMetrics{}
}
func (l logMetrics) createLogMetricsProvider(path string) volume.MetricsProvider {
return volume.NewMetricsDu(path)
}

View File

@ -39,8 +39,9 @@ func NewCRIStatsProvider(
runtimeCache kubecontainer.RuntimeCache, runtimeCache kubecontainer.RuntimeCache,
runtimeService internalapi.RuntimeService, runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService, imageService internalapi.ImageManagerService,
logMetricsService LogMetricsService,
) *StatsProvider { ) *StatsProvider {
return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer, runtimeService, imageService)) return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer, runtimeService, imageService, logMetricsService))
} }
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both // NewCadvisorStatsProvider returns a containerStatsProvider that provides both