From cc656ae6acf92bf7e45ea52dc7d854fddd4301e3 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Fri, 8 Jan 2016 13:19:49 -0800 Subject: [PATCH 1/2] rkt: Refactor setIsolators. Replace manually creating isolators with isolator constructors. Also add support for supplementary group IDs. --- pkg/kubelet/rkt/cap.go | 26 +---- pkg/kubelet/rkt/rkt.go | 195 ++++++++++++++++++-------------- pkg/securitycontext/provider.go | 6 +- 3 files changed, 117 insertions(+), 110 deletions(-) diff --git a/pkg/kubelet/rkt/cap.go b/pkg/kubelet/rkt/cap.go index 376215fb08f..a00057f9e1c 100644 --- a/pkg/kubelet/rkt/cap.go +++ b/pkg/kubelet/rkt/cap.go @@ -16,13 +16,6 @@ limitations under the License. package rkt -import ( - "fmt" - "strings" - - "k8s.io/kubernetes/pkg/api" -) - // TODO(yifan): Export this to higher level package. const ( CAP_CHOWN = iota @@ -107,22 +100,11 @@ var capabilityList = map[int]string{ CAP_AUDIT_READ: "CAP_AUDIT_READ", } -// getAllCapabilities returns the capability list with all capabilities. -func getAllCapabilities() string { +// allCapabilities returns the capability list with all capabilities. +func allCapabilities() []string { var capabilities []string for _, cap := range capabilityList { - capabilities = append(capabilities, fmt.Sprintf("%q", cap)) + capabilities = append(capabilities, cap) } - return strings.Join(capabilities, ",") -} - -// TODO(yifan): This assumes that api.Capability has the form of -// "CAP_SYS_ADMIN". We need to have a formal definition for -// capabilities. -func getCapabilities(caps []api.Capability) string { - var capList []string - for _, cap := range caps { - capList = append(capList, fmt.Sprintf("%q", cap)) - } - return strings.Join(capList, ",") + return capabilities } diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index e5af5aaa3b7..764ec709bc8 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -225,76 +225,42 @@ func makePodServiceFileName(uid types.UID) string { return fmt.Sprintf("%s_%s.service", kubernetesUnitPrefix, uid) } -type resource struct { - limit string - request string -} +// setIsolators sets the apps' isolators according to the security context and resource spec. +func setIsolators(app *appctypes.App, c *api.Container, ctx *api.SecurityContext) error { + var isolators []appctypes.Isolator -// rawValue converts a string to *json.RawMessage -func rawValue(value string) *json.RawMessage { - msg := json.RawMessage(value) - return &msg -} + // Capabilities isolators. + if ctx != nil { + var addCaps, dropCaps []string -// rawValue converts the request, limit to *json.RawMessage -func rawRequestLimit(request, limit string) *json.RawMessage { - if request == "" { - request = limit - } - if limit == "" { - limit = request - } - return rawValue(fmt.Sprintf(`{"request":%q,"limit":%q}`, request, limit)) -} - -// setIsolators overrides the isolators of the pod manifest if necessary. -// TODO need an apply config in security context for rkt -func setIsolators(app *appctypes.App, c *api.Container) error { - hasCapRequests := securitycontext.HasCapabilitiesRequest(c) - if hasCapRequests || len(c.Resources.Limits) > 0 || len(c.Resources.Requests) > 0 { - app.Isolators = []appctypes.Isolator{} - } - - // Retained capabilities/privileged. - privileged := false - if c.SecurityContext != nil && c.SecurityContext.Privileged != nil { - privileged = *c.SecurityContext.Privileged - } - - var addCaps string - if privileged { - addCaps = getAllCapabilities() - } else { - if hasCapRequests { - addCaps = getCapabilities(c.SecurityContext.Capabilities.Add) + if ctx.Capabilities != nil { + addCaps, dropCaps = securitycontext.MakeCapabilities(ctx.Capabilities.Add, ctx.Capabilities.Drop) + } + if ctx.Privileged != nil && *ctx.Privileged { + addCaps, dropCaps = allCapabilities(), []string{} + } + if len(addCaps) > 0 { + set, err := appctypes.NewLinuxCapabilitiesRetainSet(addCaps...) + if err != nil { + return err + } + isolators = append(isolators, set.AsIsolator()) + } + if len(dropCaps) > 0 { + set, err := appctypes.NewLinuxCapabilitiesRevokeSet(dropCaps...) + if err != nil { + return err + } + isolators = append(isolators, set.AsIsolator()) } } - if len(addCaps) > 0 { - // TODO(yifan): Replace with constructor, see: - // https://github.com/appc/spec/issues/268 - isolator := appctypes.Isolator{ - Name: "os/linux/capabilities-retain-set", - ValueRaw: rawValue(fmt.Sprintf(`{"set":[%s]}`, addCaps)), - } - app.Isolators = append(app.Isolators, isolator) + + // Resources isolators. + type resource struct { + limit string + request string } - // Removed capabilities. - var dropCaps string - if hasCapRequests { - dropCaps = getCapabilities(c.SecurityContext.Capabilities.Drop) - } - if len(dropCaps) > 0 { - // TODO(yifan): Replace with constructor, see: - // https://github.com/appc/spec/issues/268 - isolator := appctypes.Isolator{ - Name: "os/linux/capabilities-remove-set", - ValueRaw: rawValue(fmt.Sprintf(`{"set":[%s]}`, dropCaps)), - } - app.Isolators = append(app.Isolators, isolator) - } - - // Resources. resources := make(map[api.ResourceName]resource) for name, quantity := range c.Resources.Limits { resources[name] = resource{limit: quantity.String()} @@ -307,27 +273,59 @@ func setIsolators(app *appctypes.App, c *api.Container) error { r.request = quantity.String() resources[name] = r } - var acName appctypes.ACIdentifier + for name, res := range resources { switch name { case api.ResourceCPU: - acName = "resource/cpu" + cpu, err := appctypes.NewResourceCPUIsolator(res.request, res.limit) + if err != nil { + return err + } + isolators = append(isolators, cpu.AsIsolator()) case api.ResourceMemory: - acName = "resource/memory" + memory, err := appctypes.NewResourceMemoryIsolator(res.request, res.limit) + if err != nil { + return err + } + isolators = append(isolators, memory.AsIsolator()) default: return fmt.Errorf("resource type not supported: %v", name) } - // TODO(yifan): Replace with constructor, see: - // https://github.com/appc/spec/issues/268 - isolator := appctypes.Isolator{ - Name: acName, - ValueRaw: rawRequestLimit(res.request, res.limit), - } - app.Isolators = append(app.Isolators, isolator) } + + mergeIsolators(app, isolators) return nil } +// mergeIsolators replaces the app.Isolators with isolators. +func mergeIsolators(app *appctypes.App, isolators []appctypes.Isolator) { + for _, is := range isolators { + found := false + for j, js := range app.Isolators { + if is.Name.Equals(js.Name) { + switch is.Name { + case appctypes.LinuxCapabilitiesRetainSetName: + // TODO(yifan): More fine grain merge for capability set instead of override. + fallthrough + case appctypes.LinuxCapabilitiesRevokeSetName: + fallthrough + case appctypes.ResourceCPUName: + fallthrough + case appctypes.ResourceMemoryName: + app.Isolators[j] = is + default: + panic(fmt.Sprintf("unexpected isolator name: %v", is.Name)) + } + found = true + break + } + } + if !found { + app.Isolators = append(app.Isolators, is) + } + } +} + // mergeEnv merges the optEnv with the image's environments. // The environments defined in the image will be overridden by // the ones with the same name in optEnv. @@ -392,11 +390,33 @@ func mergePortMappings(app *appctypes.App, optPortMappings []kubecontainer.PortM } } -// setApp overrides the app's fields if any of them are specified in the -// container's spec. -func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContainerOptions) error { - // Override the exec. +func verifyNonRoot(app *appctypes.App, ctx *api.SecurityContext) error { + if ctx != nil && ctx.RunAsNonRoot != nil && *ctx.RunAsNonRoot { + if ctx.RunAsUser != nil && *ctx.RunAsUser == 0 { + return fmt.Errorf("container's runAsUser breaks non-root policy") + } + if ctx.RunAsUser == nil && app.User == "0" { + return fmt.Errorf("container has no runAsUser and image will run as root") + } + } + return nil +} +func setSupplementaryGIDs(app *appctypes.App, podCtx *api.PodSecurityContext) { + if podCtx != nil { + app.SupplementaryGIDs = app.SupplementaryGIDs[:0] + for _, v := range podCtx.SupplementalGroups { + app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(v)) + } + if podCtx.FSGroup != nil { + app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(*podCtx.FSGroup)) + } + } +} + +// setApp merges the container spec with the image's manifest. +func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContainerOptions, ctx *api.SecurityContext, podCtx *api.PodSecurityContext) error { + // Override the exec. if len(c.Command) > 0 { app.Exec = c.Command } @@ -404,11 +424,16 @@ func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContain app.Exec = append(app.Exec, c.Args...) } - // TODO(yifan): Use non-root user in the future, see: - // https://github.com/coreos/rkt/issues/820 - app.User, app.Group = "0", "0" + // Set UID and GIDs. + if err := verifyNonRoot(app, ctx); err != nil { + return err + } + if ctx != nil && ctx.RunAsUser != nil { + app.User = strconv.Itoa(int(*ctx.RunAsUser)) + } + setSupplementaryGIDs(app, podCtx) - // Override the working directory. + // Set working directory. if len(c.WorkingDir) > 0 { app.WorkingDirectory = c.WorkingDir } @@ -419,8 +444,7 @@ func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContain mergeEnv(app, opts.Envs) mergePortMappings(app, opts.PortMappings) - // Override isolators. - return setIsolators(app, c) + return setIsolators(app, c, ctx) } // makePodManifest transforms a kubelet pod spec to the rkt pod manifest. @@ -525,7 +549,8 @@ func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets [ return nil, nil, err } - if err := setApp(imgManifest.App, &c, opts); err != nil { + ctx := securitycontext.DetermineEffectiveSecurityContext(pod, &c) + if err := setApp(imgManifest.App, &c, opts, ctx, pod.Spec.SecurityContext); err != nil { return nil, nil, err } diff --git a/pkg/securitycontext/provider.go b/pkg/securitycontext/provider.go index 028db9646c6..9bd5b16b62c 100644 --- a/pkg/securitycontext/provider.go +++ b/pkg/securitycontext/provider.go @@ -83,7 +83,7 @@ func (p SimpleSecurityContextProvider) ModifyHostConfig(pod *api.Pod, container } if effectiveSC.Capabilities != nil { - add, drop := makeCapabilites(effectiveSC.Capabilities.Add, effectiveSC.Capabilities.Drop) + add, drop := MakeCapabilities(effectiveSC.Capabilities.Add, effectiveSC.Capabilities.Drop) hostConfig.CapAdd = add hostConfig.CapDrop = drop } @@ -105,8 +105,8 @@ func modifySecurityOption(config []string, name, value string) []string { return config } -// makeCapabilites creates string slices from Capability slices -func makeCapabilites(capAdd []api.Capability, capDrop []api.Capability) ([]string, []string) { +// MakeCapabilities creates string slices from Capability slices +func MakeCapabilities(capAdd []api.Capability, capDrop []api.Capability) ([]string, []string) { var ( addCaps []string dropCaps []string From dda62129d12da2e510e8e07dc1a93c74ed1e6dc7 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Fri, 8 Jan 2016 13:21:17 -0800 Subject: [PATCH 2/2] rkt: Add unit tests for setApp. --- pkg/kubelet/rkt/rkt_test.go | 271 +++++++++++++++++++++++++++++++++++- 1 file changed, 267 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/rkt/rkt_test.go b/pkg/kubelet/rkt/rkt_test.go index 3eca21e41bc..8822336387d 100644 --- a/pkg/kubelet/rkt/rkt_test.go +++ b/pkg/kubelet/rkt/rkt_test.go @@ -19,6 +19,7 @@ package rkt import ( "encoding/json" "fmt" + "sort" "testing" "time" @@ -26,6 +27,8 @@ import ( appctypes "github.com/appc/spec/schema/types" rktapi "github.com/coreos/rkt/api/v1alpha" "github.com/stretchr/testify/assert" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) @@ -245,7 +248,7 @@ func TestCheckVersion(t *testing.T) { for i, tt := range tests { testCaseHint := fmt.Sprintf("test case #%d", i) err := r.checkVersion(tt.minimumRktBinVersion, tt.recommendedRktBinVersion, tt.minimumAppcVersion, tt.minimumRktApiVersion, tt.minimumSystemdVersion) - assert.Equal(t, err, tt.err, testCaseHint) + assert.Equal(t, tt.err, err, testCaseHint) if tt.calledGetInfo { assert.Equal(t, fr.called, []string{"GetInfo"}, testCaseHint) @@ -254,9 +257,9 @@ func TestCheckVersion(t *testing.T) { assert.Equal(t, fs.called, []string{"Version"}, testCaseHint) } if err == nil { - assert.Equal(t, r.binVersion.String(), fr.info.RktVersion, testCaseHint) - assert.Equal(t, r.appcVersion.String(), fr.info.AppcVersion, testCaseHint) - assert.Equal(t, r.apiVersion.String(), fr.info.ApiVersion, testCaseHint) + assert.Equal(t, fr.info.RktVersion, r.binVersion.String(), testCaseHint) + assert.Equal(t, fr.info.AppcVersion, r.appcVersion.String(), testCaseHint) + assert.Equal(t, fr.info.ApiVersion, r.apiVersion.String(), testCaseHint) } fr.CleanCalls() fs.CleanCalls() @@ -662,3 +665,263 @@ func TestGetPodStatus(t *testing.T) { fr.CleanCalls() } } + +func generateCapRetainIsolator(t *testing.T, caps ...string) appctypes.Isolator { + retain, err := appctypes.NewLinuxCapabilitiesRetainSet(caps...) + if err != nil { + t.Fatalf("Error generating cap retain isolator", err) + } + return retain.AsIsolator() +} + +func generateCapRevokeIsolator(t *testing.T, caps ...string) appctypes.Isolator { + revoke, err := appctypes.NewLinuxCapabilitiesRevokeSet(caps...) + if err != nil { + t.Fatalf("Error generating cap revoke isolator", err) + } + return revoke.AsIsolator() +} + +func generateCPUIsolator(t *testing.T, request, limit string) appctypes.Isolator { + cpu, err := appctypes.NewResourceCPUIsolator(request, limit) + if err != nil { + t.Fatalf("Error generating cpu resource isolator", err) + } + return cpu.AsIsolator() +} + +func generateMemoryIsolator(t *testing.T, request, limit string) appctypes.Isolator { + memory, err := appctypes.NewResourceMemoryIsolator(request, limit) + if err != nil { + t.Fatalf("Error generating memory resource isolator", err) + } + return memory.AsIsolator() +} + +func baseApp(t *testing.T) *appctypes.App { + return &appctypes.App{ + Exec: appctypes.Exec{"/bin/foo"}, + User: "0", + Group: "22", + SupplementaryGIDs: []int{4, 5, 6}, + WorkingDirectory: "/foo", + Environment: []appctypes.EnvironmentVariable{ + {"env-foo", "bar"}, + }, + MountPoints: []appctypes.MountPoint{ + {Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-foo", ReadOnly: false}, + }, + Ports: []appctypes.Port{ + {Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 4242}, + }, + Isolators: []appctypes.Isolator{ + generateCapRetainIsolator(t, "CAP_SYS_ADMIN"), + generateCapRevokeIsolator(t, "CAP_NET_ADMIN"), + generateCPUIsolator(t, "100m", "200m"), + generateMemoryIsolator(t, "10M", "20M"), + }, + } +} + +type envByName []appctypes.EnvironmentVariable + +func (s envByName) Len() int { return len(s) } +func (s envByName) Less(i, j int) bool { return s[i].Name < s[j].Name } +func (s envByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +type mountsByName []appctypes.MountPoint + +func (s mountsByName) Len() int { return len(s) } +func (s mountsByName) Less(i, j int) bool { return s[i].Name < s[j].Name } +func (s mountsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +type portsByName []appctypes.Port + +func (s portsByName) Len() int { return len(s) } +func (s portsByName) Less(i, j int) bool { return s[i].Name < s[j].Name } +func (s portsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +type isolatorsByName []appctypes.Isolator + +func (s isolatorsByName) Len() int { return len(s) } +func (s isolatorsByName) Less(i, j int) bool { return s[i].Name < s[j].Name } +func (s isolatorsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func sortAppFields(app *appctypes.App) { + sort.Sort(envByName(app.Environment)) + sort.Sort(mountsByName(app.MountPoints)) + sort.Sort(portsByName(app.Ports)) + sort.Sort(isolatorsByName(app.Isolators)) +} + +func TestSetApp(t *testing.T) { + rootUser := int64(0) + nonRootUser := int64(42) + runAsNonRootTrue := true + fsgid := int64(3) + + tests := []struct { + container *api.Container + opts *kubecontainer.RunContainerOptions + ctx *api.SecurityContext + podCtx *api.PodSecurityContext + expect *appctypes.App + err error + }{ + // Nothing should change. + { + container: &api.Container{}, + opts: &kubecontainer.RunContainerOptions{}, + ctx: nil, + podCtx: nil, + expect: baseApp(t), + err: nil, + }, + + // error verifying non-root. + { + container: &api.Container{}, + opts: &kubecontainer.RunContainerOptions{}, + ctx: &api.SecurityContext{ + RunAsNonRoot: &runAsNonRootTrue, + RunAsUser: &rootUser, + }, + podCtx: nil, + expect: nil, + err: fmt.Errorf("container has no runAsUser and image will run as root"), + }, + + // app should be changed. + { + container: &api.Container{ + Command: []string{"/bin/bar"}, + Args: []string{"hello", "world"}, + WorkingDir: "/tmp", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("50M")}, + Requests: api.ResourceList{"cpu": resource.MustParse("5m"), "memory": resource.MustParse("5M")}, + }, + }, + opts: &kubecontainer.RunContainerOptions{ + Envs: []kubecontainer.EnvVar{ + {Name: "env-bar", Value: "foo"}, + }, + Mounts: []kubecontainer.Mount{ + {Name: "mnt-bar", ContainerPath: "/mnt-bar", ReadOnly: true}, + }, + PortMappings: []kubecontainer.PortMapping{ + {Name: "port-bar", Protocol: api.ProtocolTCP, ContainerPort: 1234}, + }, + }, + ctx: &api.SecurityContext{ + Capabilities: &api.Capabilities{ + Add: []api.Capability{"CAP_SYS_CHROOT", "CAP_SYS_BOOT"}, + Drop: []api.Capability{"CAP_SETUID", "CAP_SETGID"}, + }, + RunAsUser: &nonRootUser, + RunAsNonRoot: &runAsNonRootTrue, + }, + podCtx: &api.PodSecurityContext{ + SupplementalGroups: []int64{1, 2}, + FSGroup: &fsgid, + }, + expect: &appctypes.App{ + Exec: appctypes.Exec{"/bin/bar", "hello", "world"}, + User: "42", + Group: "22", + SupplementaryGIDs: []int{1, 2, 3}, + WorkingDirectory: "/tmp", + Environment: []appctypes.EnvironmentVariable{ + {"env-foo", "bar"}, + {"env-bar", "foo"}, + }, + MountPoints: []appctypes.MountPoint{ + {Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-foo", ReadOnly: false}, + {Name: *appctypes.MustACName("mnt-bar"), Path: "/mnt-bar", ReadOnly: true}, + }, + Ports: []appctypes.Port{ + {Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 4242}, + {Name: *appctypes.MustACName("port-bar"), Protocol: "TCP", Port: 1234}, + }, + Isolators: []appctypes.Isolator{ + generateCapRetainIsolator(t, "CAP_SYS_CHROOT", "CAP_SYS_BOOT"), + generateCapRevokeIsolator(t, "CAP_SETUID", "CAP_SETGID"), + generateCPUIsolator(t, "5m", "50m"), + generateMemoryIsolator(t, "5M", "50M"), + }, + }, + }, + + // app should be changed. (env, mounts, ports, are overrided). + { + container: &api.Container{ + Command: []string{"/bin/bar"}, + Args: []string{"hello", "world"}, + WorkingDir: "/tmp", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("50M")}, + Requests: api.ResourceList{"cpu": resource.MustParse("5m"), "memory": resource.MustParse("5M")}, + }, + }, + opts: &kubecontainer.RunContainerOptions{ + Envs: []kubecontainer.EnvVar{ + {Name: "env-foo", Value: "foo"}, + }, + Mounts: []kubecontainer.Mount{ + {Name: "mnt-foo", ContainerPath: "/mnt-bar", ReadOnly: true}, + }, + PortMappings: []kubecontainer.PortMapping{ + {Name: "port-foo", Protocol: api.ProtocolTCP, ContainerPort: 1234}, + }, + }, + ctx: &api.SecurityContext{ + Capabilities: &api.Capabilities{ + Add: []api.Capability{"CAP_SYS_CHROOT", "CAP_SYS_BOOT"}, + Drop: []api.Capability{"CAP_SETUID", "CAP_SETGID"}, + }, + RunAsUser: &nonRootUser, + RunAsNonRoot: &runAsNonRootTrue, + }, + podCtx: &api.PodSecurityContext{ + SupplementalGroups: []int64{1, 2}, + FSGroup: &fsgid, + }, + expect: &appctypes.App{ + Exec: appctypes.Exec{"/bin/bar", "hello", "world"}, + User: "42", + Group: "22", + SupplementaryGIDs: []int{1, 2, 3}, + WorkingDirectory: "/tmp", + Environment: []appctypes.EnvironmentVariable{ + {"env-foo", "foo"}, + }, + MountPoints: []appctypes.MountPoint{ + {Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-bar", ReadOnly: true}, + }, + Ports: []appctypes.Port{ + {Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 1234}, + }, + Isolators: []appctypes.Isolator{ + generateCapRetainIsolator(t, "CAP_SYS_CHROOT", "CAP_SYS_BOOT"), + generateCapRevokeIsolator(t, "CAP_SETUID", "CAP_SETGID"), + generateCPUIsolator(t, "5m", "50m"), + generateMemoryIsolator(t, "5M", "50M"), + }, + }, + }, + } + + for i, tt := range tests { + testCaseHint := fmt.Sprintf("test case #%d", i) + app := baseApp(t) + err := setApp(app, tt.container, tt.opts, tt.ctx, tt.podCtx) + if err == nil && tt.err != nil || err != nil && tt.err == nil { + t.Errorf("%s: expect %v, saw %v", testCaseHint, tt.err, err) + } + if err == nil { + sortAppFields(tt.expect) + sortAppFields(app) + assert.Equal(t, tt.expect, app, testCaseHint) + } + } +}