Inject top level QoS cgroup creation in the Kubelet

This commit is contained in:
Buddha Prakash 2016-06-27 11:46:20 -07:00
parent e967a773c4
commit 5000e74664
24 changed files with 1166 additions and 918 deletions

View File

@ -164,6 +164,7 @@ test-e2e: ginkgo generated_files
# Example:
# make test-e2e-node FOCUS=kubelet SKIP=container
# make test-e2e-node REMOTE=true DELETE_INSTANCES=true
# make test-e2e-node TEST_ARGS="--cgroups-per-qos=true"
# Build and run tests.
.PHONY: test-e2e-node
test-e2e-node: ginkgo generated_files

View File

@ -133,6 +133,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.MarkDeprecated("system-container", "Use --system-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.SystemCgroups, "system-cgroups", s.SystemCgroups, "Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.BoolVar(&s.CgroupsPerQOS, "cgroups-per-qos", s.CgroupsPerQOS, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.DurationVar(&s.RuntimeRequestTimeout.Duration, "runtime-request-timeout", s.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later. Default: 2m0s")

View File

@ -220,6 +220,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
EnableControllerAttachDetach: s.EnableControllerAttachDetach,
EnableCustomMetrics: s.EnableCustomMetrics,
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
CgroupsPerQOS: s.CgroupsPerQOS,
EnableServer: s.EnableServer,
EventBurst: int(s.EventBurst),
EventRecordQPS: float32(s.EventRecordQPS),
@ -363,12 +364,13 @@ func run(s *options.KubeletServer, kcfg *KubeletConfig) (err error) {
if kcfg.SystemCgroups != "" && kcfg.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
CgroupsPerQOS: kcfg.CgroupsPerQOS,
CgroupRoot: kcfg.CgroupRoot,
})
if err != nil {
return err
@ -575,6 +577,7 @@ func SimpleKubelet(client *clientset.Clientset,
EnableCustomMetrics: false,
EnableDebuggingHandlers: true,
EnableServer: true,
CgroupsPerQOS: false,
FileCheckFrequency: fileCheckFrequency,
// Since this kubelet runs with --configure-cbr0=false, it needs to use
// hairpin-veth to allow hairpin packets. Note that this deviates from
@ -798,6 +801,7 @@ type KubeletConfig struct {
EnableControllerAttachDetach bool
EnableCustomMetrics bool
EnableDebuggingHandlers bool
CgroupsPerQOS bool
EnableServer bool
EventClient *clientset.Clientset
EventBurst int
@ -929,6 +933,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.NodeLabels,
kc.NodeStatusUpdateFrequency,
kc.OSInterface,
kc.CgroupsPerQOS,
kc.CgroupRoot,
kc.ContainerRuntime,
kc.RuntimeRequestTimeout,

View File

@ -205,6 +205,14 @@ less useful for catching flakes related creating the instance from an image.**
make test-e2e-node REMOTE=true RUN_UNTIL_FAILURE=true
```
## Additional QoS Cgroups Hierarchy level testing
For testing with the QoS Cgroup Hierarchy enabled, you can pass --cgroups-per-qos flag as an argument into Ginkgo using TEST_ARGS
```sh
make test_e2e_node TEST_ARGS="--cgroups-per-qos=true"
```
# Notes on tests run by the Kubernetes project during pre-, post- submit.
The node e2e tests are run by the PR builder for each Pull Request and the results published at

View File

@ -33,6 +33,7 @@ cleanup=${CLEANUP:-"true"}
delete_instances=${DELETE_INSTANCES:-"false"}
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
list_images=${LIST_IMAGES:-"false"}
test_args=${TEST_ARGS:-""}
if [[ $list_images == "true" ]]; then
gcloud compute images list --project="${image_project}" | grep "e2e-node"
@ -117,7 +118,7 @@ if [ $remote = true ] ; then
--hosts="$hosts" --images="$images" --cleanup="$cleanup" \
--results-dir="$artifacts" --ginkgo-flags="$ginkgoflags" \
--image-project="$image_project" --instance-name-prefix="$instance_prefix" --setup-node="true" \
--delete-instances="$delete_instances"
--delete-instances="$delete_instances" --test_args="$test_args"
exit $?
else
@ -129,6 +130,7 @@ else
# Test using the host the script was run on
# Provided for backwards compatibility
"${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \
-- --alsologtostderr --v 2 --node-name $(hostname) --disable-kubenet=true --build-services=true --start-services=true --stop-services=true
-- --alsologtostderr --v 2 --node-name $(hostname) --disable-kubenet=true --build-services=true \
--start-services=true --stop-services=true "$test_args"
exit $?
fi

View File

@ -42,6 +42,7 @@ build-tag
cadvisor-port
cert-dir
certificate-authority
cgroups-per-qos
cgroup-root
chaos-chance
clean-start

File diff suppressed because it is too large Load Diff

View File

@ -258,14 +258,18 @@ type KubeletConfiguration struct {
CloudConfigFile string `json:"cloudConfigFile,omitempty"`
// KubeletCgroups is the absolute name of cgroups to isolate the kubelet in.
KubeletCgroups string `json:"kubeletCgroups,omitempty"`
// Enable QoS based Cgroup hierarchy: top level cgroups for QoS Classes
// And all Burstable and BestEffort pods are brought up under their
// specific top level QoS cgroup.
CgroupsPerQOS bool `json:"CgroupsPerQOS,omitempty"`
// Cgroups that container runtime is expected to be isolated in.
RuntimeCgroups string `json:"runtimeCgroups,omitempty"`
// SystemCgroups is absolute name of cgroups in which to place
// all non-kernel processes that are not already in a container. Empty
// for no container. Rolling back the flag requires a reboot.
SystemCgroups string `json:"systemCgroups,omitempty"`
// cgroupRoot is the root cgroup to use for pods. This is handled by the
// container runtime on a best effort basis.
// CgroupRoot is the root cgroup to use for pods.
// If CgroupsPerQOS is enabled, this is the root of the QoS cgroup hierarchy.
CgroupRoot string `json:"cgroupRoot,omitempty"`
// containerRuntime is the container runtime to use.
ContainerRuntime string `json:"containerRuntime"`

View File

@ -154,6 +154,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.ConfigureCBR0 == nil {
obj.ConfigureCBR0 = boolVar(false)
}
if obj.CgroupsPerQOS == nil {
obj.CgroupsPerQOS = boolVar(false)
}
if obj.ContainerRuntime == "" {
obj.ContainerRuntime = "docker"
}

View File

@ -322,6 +322,10 @@ type KubeletConfiguration struct {
// cgroupRoot is the root cgroup to use for pods. This is handled by the
// container runtime on a best effort basis.
CgroupRoot string `json:"cgroupRoot"`
// Enable QoS based Cgroup hierarchy: top level cgroups for QoS Classes
// And all Burstable and BestEffort pods are brought up under their
// specific top level QoS cgroup.
CgroupsPerQOS *bool `json:"CgroupsPerQOS,omitempty"`
// containerRuntime is the container runtime to use.
ContainerRuntime string `json:"containerRuntime"`
// runtimeRequestTimeout is the timeout for all runtime requests except long running

View File

@ -43,6 +43,8 @@ type NodeConfig struct {
SystemCgroupsName string
KubeletCgroupsName string
ContainerRuntime string
CgroupsPerQOS bool
CgroupRoot string
}
type Status struct {

View File

@ -37,6 +37,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/util"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/pkg/util/mount"
@ -97,7 +98,10 @@ type containerManagerImpl struct {
status Status
// External containers being managed.
systemContainers []*systemContainer
qosContainers QOSContainersInfo
periodicTasks []func()
// holds all the mounted cgroup subsystems
subsystems *cgroupSubsystems
}
type features struct {
@ -161,10 +165,24 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
// Takes the absolute name of the specified containers.
// Empty container name disables use of the specified container.
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig) (ContainerManager, error) {
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
if nodeConfig.CgroupRoot == "" {
return nil, fmt.Errorf("invalid configuration: cgroups-per-qos was specified and cgroup-root was not specified. To enable the QoS cgroup hierarchy you need to specify a valid cgroup-root")
}
if _, err := os.Stat(nodeConfig.CgroupRoot); err != nil {
return nil, fmt.Errorf("invalid configuration: cgroup-root doesn't exist : %v", err)
}
}
subsystems, err := getCgroupSubsystems()
if err != nil {
return nil, fmt.Errorf("failed to get mounted subsystems: %v", err)
}
return &containerManagerImpl{
cadvisorInterface: cadvisorInterface,
mountUtil: mountUtil,
NodeConfig: nodeConfig,
subsystems: subsystems,
}, nil
}
@ -190,6 +208,41 @@ const (
KernelTunableModify KernelTunableBehavior = "modify"
)
// InitQOS creates the top level qos cgroup containers
// We create top level QoS containers for only Burstable and Best Effort
// and not Guaranteed QoS class. All guaranteed pods are nested under the
// RootContainer by default. InitQOS is called only once during kubelet bootstrapping.
// TODO(@dubstack) Add support for cgroup-root to work on both systemd and cgroupfs
// drivers. Currently we only support systems running cgroupfs driver
func InitQOS(rootContainer string, subsystems *cgroupSubsystems) (QOSContainersInfo, error) {
cm := NewCgroupManager(subsystems)
// Top level for Qos containers are created only for Burstable
// and Best Effort classes
qosClasses := [2]qos.QOSClass{qos.Burstable, qos.BestEffort}
// Create containers for both qos classes
for _, qosClass := range qosClasses {
// get the container's absolute name
absoluteContainerName := path.Join(rootContainer, string(qosClass))
// containerConfig object stores the cgroup specifications
containerConfig := &CgroupConfig{
Name: absoluteContainerName,
ResourceParameters: &ResourceConfig{},
}
// TODO(@dubstack) Add support on systemd cgroups driver
if err := cm.Create(containerConfig); err != nil {
return QOSContainersInfo{}, fmt.Errorf("failed to create top level %v QOS cgroup : %v", qosClass, err)
}
}
// Store the top level qos container names
qosContainersInfo := QOSContainersInfo{
Guaranteed: rootContainer,
Burstable: path.Join(rootContainer, string(qos.Burstable)),
BestEffort: path.Join(rootContainer, string(qos.BestEffort)),
}
return qosContainersInfo, nil
}
// setupKernelTunables validates kernel tunable flags are set as expected
// depending upon the specified option, it will either warn, error, or modify the kernel tunable flags
func setupKernelTunables(option KernelTunableBehavior) error {
@ -240,6 +293,15 @@ func (cm *containerManagerImpl) setupNode() error {
return err
}
// Setup top level qos containers only if CgroupsPerQOS flag is specified as true
if cm.NodeConfig.CgroupsPerQOS {
qosContainersInfo, err := InitQOS(cm.NodeConfig.CgroupRoot, cm.subsystems)
if err != nil {
return fmt.Errorf("failed to initialise top level QOS containers: %v", err)
}
cm.qosContainers = qosContainersInfo
}
systemContainers := []*systemContainer{}
if cm.ContainerRuntime == "docker" {
if cm.RuntimeCgroupsName != "" {

View File

@ -56,3 +56,10 @@ type CgroupManager interface {
// Exists checks if the cgroup already exists
Exists(string) bool
}
// QOSContainersInfo hold the names of containers per qos
type QOSContainersInfo struct {
Guaranteed string
BestEffort string
Burstable string
}

View File

@ -206,6 +206,7 @@ func NewMainKubelet(
nodeLabels map[string]string,
nodeStatusUpdateFrequency time.Duration,
osInterface kubecontainer.OSInterface,
CgroupsPerQOS bool,
cgroupRoot string,
containerRuntime string,
runtimeRequestTimeout time.Duration,
@ -339,6 +340,7 @@ func NewMainKubelet(
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
os: osInterface,
oomWatcher: oomWatcher,
CgroupsPerQOS: CgroupsPerQOS,
cgroupRoot: cgroupRoot,
mounter: mounter,
writer: writer,
@ -706,6 +708,9 @@ type Kubelet struct {
// Monitor resource usage
resourceAnalyzer stats.ResourceAnalyzer
// Whether or not we should have the QOS cgroup hierarchy for resource management
CgroupsPerQOS bool
// If non-empty, pass this to the container runtime as the root cgroup.
cgroupRoot string

View File

@ -67,6 +67,8 @@ type TestContextType struct {
DumpLogsOnFailure bool
// Name of the node to run tests on (node e2e suite only).
NodeName string
// Whether to enable the QoS Cgroup Hierarchy or not
CgroupsPerQOS bool
}
type CloudConfig struct {
@ -148,4 +150,5 @@ func RegisterClusterFlags() {
// Register flags specific to the node e2e test suite.
func RegisterNodeFlags() {
flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
}

View File

@ -0,0 +1,77 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e_node
import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
f := NewDefaultFramework("kubelet-cgroup-manager")
Describe("QOS containers", func() {
Context("On enabling QOS cgroup hierarchy", func() {
It("Top level QoS containers should have been created", func() {
if framework.TestContext.CgroupsPerQOS {
podName := "qos-pod" + string(util.NewUUID())
contName := "qos-container" + string(util.NewUUID())
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: podName,
Namespace: f.Namespace.Name,
},
Spec: api.PodSpec{
// Don't restart the Pod since it is expected to exit
RestartPolicy: api.RestartPolicyNever,
Containers: []api.Container{
{
Image: "gcr.io/google_containers/busybox:1.24",
Name: contName,
Command: []string{"sh", "-c", "if [ -d /tmp/memory/Burstable ] && [ -d /tmp/memory/BestEffort ]; then exit 0; else exit 1; fi"},
VolumeMounts: []api.VolumeMount{
{
Name: "sysfscgroup",
MountPath: "/tmp",
},
},
},
},
Volumes: []api.Volume{
{
Name: "sysfscgroup",
VolumeSource: api.VolumeSource{
HostPath: &api.HostPathVolumeSource{Path: "/sys/fs/cgroup"},
},
},
},
},
}
f.MungePodSpec(pod)
podClient := f.Client.Pods(f.Namespace.Name)
_, err := podClient.Create(pod)
Expect(err).NotTo(HaveOccurred())
err = framework.WaitForPodSuccessInNamespace(f.Client, podName, contName, f.Namespace.Name)
Expect(err).NotTo(HaveOccurred())
}
})
})
})
})

View File

@ -96,7 +96,7 @@ var _ = BeforeSuite(func() {
maskLocksmithdOnCoreos()
if *startServices {
e2es = newE2eService(framework.TestContext.NodeName)
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS)
if err := e2es.start(); err != nil {
Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
}

View File

@ -146,7 +146,7 @@ func CreateTestArchive() (string, error) {
}
// Returns the command output, whether the exit was ok, and any errors
func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, setupNode bool) (string, bool, error) {
func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, setupNode bool, testArgs string) (string, bool, error) {
if setupNode {
uname, err := user.Current()
if err != nil {
@ -211,11 +211,10 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
// Exit failure with the error
return "", false, err
}
// Run the tests
cmd = getSshCommand(" && ",
fmt.Sprintf("cd %s", tmp),
fmt.Sprintf("timeout -k 30s %ds ./ginkgo %s ./e2e_node.test -- --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d", *testTimeoutSeconds, *ginkgoFlags, cleanup, host, tmp, junitFileNumber),
fmt.Sprintf("timeout -k 30s %ds ./ginkgo %s ./e2e_node.test -- --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d %s", *testTimeoutSeconds, *ginkgoFlags, cleanup, host, tmp, junitFileNumber, testArgs),
)
aggErrs := []error{}

View File

@ -46,6 +46,7 @@ type e2eService struct {
kubeletStaticPodDir string
nodeName string
logFiles map[string]logFileData
cgroupsPerQOS bool
}
type logFileData struct {
@ -58,14 +59,18 @@ const (
LOG_VERBOSITY_LEVEL = "4"
)
func newE2eService(nodeName string) *e2eService {
func newE2eService(nodeName string, cgroupsPerQOS bool) *e2eService {
// Special log files that need to be collected for additional debugging.
var logFiles = map[string]logFileData{
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
"docker.log": {[]string{"/var/log/docker.log", "/var/log/upstart/docker.log"}, []string{"-u", "docker"}},
}
return &e2eService{nodeName: nodeName, logFiles: logFiles}
return &e2eService{
nodeName: nodeName,
logFiles: logFiles,
cgroupsPerQOS: cgroupsPerQOS,
}
}
func (es *e2eService) start() error {
@ -236,6 +241,12 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
)
if es.cgroupsPerQOS {
cmdArgs = append(cmdArgs,
"--cgroups-per-qos", "true",
"--cgroup-root", "/",
)
}
if !*disableKubenet {
cwd, err := os.Getwd()
if err != nil {
@ -245,6 +256,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
"--network-plugin=kubenet",
"--network-plugin-dir", filepath.Join(cwd, CNIDirectory, "bin")) // Enable kubenet
}
cmd := exec.Command("sudo", cmdArgs...)
hcc := newHealthCheckCommand(
"http://127.0.0.1:10255/healthz",

View File

@ -39,4 +39,4 @@ go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="g
--zone="$GCE_ZONE" --project="$GCE_PROJECT" --hosts="$GCE_HOSTS" \
--image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \
--results-dir="$ARTIFACTS" --ginkgo-flags="$GINKGO_FLAGS" \
--setup-node="$SETUP_NODE" --instance-metadata="$GCE_INSTANCE_METADATA"
--setup-node="$SETUP_NODE" --test_args="$TEST_ARGS" --instance-metadata="$GCE_INSTANCE_METADATA"

View File

@ -5,4 +5,4 @@ GCE_PROJECT=kubernetes-jenkins
CLEANUP=true
GINKGO_FLAGS=--skip=FLAKY
SETUP_NODE=false
TEST_ARGS=--cgroups-per-qos=true

View File

@ -5,3 +5,4 @@ GCE_PROJECT=kubernetes-jenkins-pull
CLEANUP=true
GINKGO_FLAGS=--skip=FLAKY
SETUP_NODE=false
TEST_ARGS=--cgroups-per-qos=true

View File

@ -17,3 +17,5 @@ GCE_IMAGE_PROJECT=
CLEANUP=true
# If true, current user will be added to the docker group on test node
SETUP_NODE=false
# If true QoS Cgroup Hierarchy is created and tests specifc to the cgroup hierarchy run
TEST_ARGS=--cgroups-per-qos=true

View File

@ -41,6 +41,7 @@ import (
"google.golang.org/api/compute/v1"
)
var testArgs = flag.String("test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
var instanceNamePrefix = flag.String("instance-name-prefix", "", "prefix for instance names")
var zone = flag.String("zone", "", "gce zone the hosts live in")
var project = flag.String("project", "", "gce project the hosts live in")
@ -254,7 +255,7 @@ func testHost(host string, deleteFiles bool, junitFileNum int, setupNode bool) *
}
}
output, exitOk, err := e2e_node.RunRemote(path, host, deleteFiles, junitFileNum, setupNode)
output, exitOk, err := e2e_node.RunRemote(path, host, deleteFiles, junitFileNum, setupNode, *testArgs)
return &TestResult{
output: output,
err: err,