Merge pull request #19428 from yifan-gu/rkt_isolator

Auto commit by PR queue bot
This commit is contained in:
k8s-merge-robot 2016-01-16 08:48:38 -08:00
commit ffed4e1a3a
4 changed files with 384 additions and 114 deletions

View File

@ -16,13 +16,6 @@ limitations under the License.
package rkt
import (
"fmt"
"strings"
"k8s.io/kubernetes/pkg/api"
)
// TODO(yifan): Export this to higher level package.
const (
CAP_CHOWN = iota
@ -107,22 +100,11 @@ var capabilityList = map[int]string{
CAP_AUDIT_READ: "CAP_AUDIT_READ",
}
// getAllCapabilities returns the capability list with all capabilities.
func getAllCapabilities() string {
// allCapabilities returns the capability list with all capabilities.
func allCapabilities() []string {
var capabilities []string
for _, cap := range capabilityList {
capabilities = append(capabilities, fmt.Sprintf("%q", cap))
capabilities = append(capabilities, cap)
}
return strings.Join(capabilities, ",")
}
// TODO(yifan): This assumes that api.Capability has the form of
// "CAP_SYS_ADMIN". We need to have a formal definition for
// capabilities.
func getCapabilities(caps []api.Capability) string {
var capList []string
for _, cap := range caps {
capList = append(capList, fmt.Sprintf("%q", cap))
}
return strings.Join(capList, ",")
return capabilities
}

View File

@ -226,76 +226,42 @@ func makePodServiceFileName(uid types.UID) string {
return fmt.Sprintf("%s_%s.service", kubernetesUnitPrefix, uid)
}
type resource struct {
limit string
request string
}
// setIsolators sets the apps' isolators according to the security context and resource spec.
func setIsolators(app *appctypes.App, c *api.Container, ctx *api.SecurityContext) error {
var isolators []appctypes.Isolator
// rawValue converts a string to *json.RawMessage
func rawValue(value string) *json.RawMessage {
msg := json.RawMessage(value)
return &msg
}
// Capabilities isolators.
if ctx != nil {
var addCaps, dropCaps []string
// rawValue converts the request, limit to *json.RawMessage
func rawRequestLimit(request, limit string) *json.RawMessage {
if request == "" {
request = limit
}
if limit == "" {
limit = request
}
return rawValue(fmt.Sprintf(`{"request":%q,"limit":%q}`, request, limit))
}
// setIsolators overrides the isolators of the pod manifest if necessary.
// TODO need an apply config in security context for rkt
func setIsolators(app *appctypes.App, c *api.Container) error {
hasCapRequests := securitycontext.HasCapabilitiesRequest(c)
if hasCapRequests || len(c.Resources.Limits) > 0 || len(c.Resources.Requests) > 0 {
app.Isolators = []appctypes.Isolator{}
}
// Retained capabilities/privileged.
privileged := false
if c.SecurityContext != nil && c.SecurityContext.Privileged != nil {
privileged = *c.SecurityContext.Privileged
}
var addCaps string
if privileged {
addCaps = getAllCapabilities()
} else {
if hasCapRequests {
addCaps = getCapabilities(c.SecurityContext.Capabilities.Add)
if ctx.Capabilities != nil {
addCaps, dropCaps = securitycontext.MakeCapabilities(ctx.Capabilities.Add, ctx.Capabilities.Drop)
}
if ctx.Privileged != nil && *ctx.Privileged {
addCaps, dropCaps = allCapabilities(), []string{}
}
if len(addCaps) > 0 {
set, err := appctypes.NewLinuxCapabilitiesRetainSet(addCaps...)
if err != nil {
return err
}
isolators = append(isolators, set.AsIsolator())
}
if len(dropCaps) > 0 {
set, err := appctypes.NewLinuxCapabilitiesRevokeSet(dropCaps...)
if err != nil {
return err
}
isolators = append(isolators, set.AsIsolator())
}
}
if len(addCaps) > 0 {
// TODO(yifan): Replace with constructor, see:
// https://github.com/appc/spec/issues/268
isolator := appctypes.Isolator{
Name: "os/linux/capabilities-retain-set",
ValueRaw: rawValue(fmt.Sprintf(`{"set":[%s]}`, addCaps)),
}
app.Isolators = append(app.Isolators, isolator)
// Resources isolators.
type resource struct {
limit string
request string
}
// Removed capabilities.
var dropCaps string
if hasCapRequests {
dropCaps = getCapabilities(c.SecurityContext.Capabilities.Drop)
}
if len(dropCaps) > 0 {
// TODO(yifan): Replace with constructor, see:
// https://github.com/appc/spec/issues/268
isolator := appctypes.Isolator{
Name: "os/linux/capabilities-remove-set",
ValueRaw: rawValue(fmt.Sprintf(`{"set":[%s]}`, dropCaps)),
}
app.Isolators = append(app.Isolators, isolator)
}
// Resources.
resources := make(map[api.ResourceName]resource)
for name, quantity := range c.Resources.Limits {
resources[name] = resource{limit: quantity.String()}
@ -308,27 +274,59 @@ func setIsolators(app *appctypes.App, c *api.Container) error {
r.request = quantity.String()
resources[name] = r
}
var acName appctypes.ACIdentifier
for name, res := range resources {
switch name {
case api.ResourceCPU:
acName = "resource/cpu"
cpu, err := appctypes.NewResourceCPUIsolator(res.request, res.limit)
if err != nil {
return err
}
isolators = append(isolators, cpu.AsIsolator())
case api.ResourceMemory:
acName = "resource/memory"
memory, err := appctypes.NewResourceMemoryIsolator(res.request, res.limit)
if err != nil {
return err
}
isolators = append(isolators, memory.AsIsolator())
default:
return fmt.Errorf("resource type not supported: %v", name)
}
// TODO(yifan): Replace with constructor, see:
// https://github.com/appc/spec/issues/268
isolator := appctypes.Isolator{
Name: acName,
ValueRaw: rawRequestLimit(res.request, res.limit),
}
app.Isolators = append(app.Isolators, isolator)
}
mergeIsolators(app, isolators)
return nil
}
// mergeIsolators replaces the app.Isolators with isolators.
func mergeIsolators(app *appctypes.App, isolators []appctypes.Isolator) {
for _, is := range isolators {
found := false
for j, js := range app.Isolators {
if is.Name.Equals(js.Name) {
switch is.Name {
case appctypes.LinuxCapabilitiesRetainSetName:
// TODO(yifan): More fine grain merge for capability set instead of override.
fallthrough
case appctypes.LinuxCapabilitiesRevokeSetName:
fallthrough
case appctypes.ResourceCPUName:
fallthrough
case appctypes.ResourceMemoryName:
app.Isolators[j] = is
default:
panic(fmt.Sprintf("unexpected isolator name: %v", is.Name))
}
found = true
break
}
}
if !found {
app.Isolators = append(app.Isolators, is)
}
}
}
// mergeEnv merges the optEnv with the image's environments.
// The environments defined in the image will be overridden by
// the ones with the same name in optEnv.
@ -393,11 +391,33 @@ func mergePortMappings(app *appctypes.App, optPortMappings []kubecontainer.PortM
}
}
// setApp overrides the app's fields if any of them are specified in the
// container's spec.
func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContainerOptions) error {
// Override the exec.
func verifyNonRoot(app *appctypes.App, ctx *api.SecurityContext) error {
if ctx != nil && ctx.RunAsNonRoot != nil && *ctx.RunAsNonRoot {
if ctx.RunAsUser != nil && *ctx.RunAsUser == 0 {
return fmt.Errorf("container's runAsUser breaks non-root policy")
}
if ctx.RunAsUser == nil && app.User == "0" {
return fmt.Errorf("container has no runAsUser and image will run as root")
}
}
return nil
}
func setSupplementaryGIDs(app *appctypes.App, podCtx *api.PodSecurityContext) {
if podCtx != nil {
app.SupplementaryGIDs = app.SupplementaryGIDs[:0]
for _, v := range podCtx.SupplementalGroups {
app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(v))
}
if podCtx.FSGroup != nil {
app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(*podCtx.FSGroup))
}
}
}
// setApp merges the container spec with the image's manifest.
func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContainerOptions, ctx *api.SecurityContext, podCtx *api.PodSecurityContext) error {
// Override the exec.
if len(c.Command) > 0 {
app.Exec = c.Command
}
@ -405,11 +425,16 @@ func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContain
app.Exec = append(app.Exec, c.Args...)
}
// TODO(yifan): Use non-root user in the future, see:
// https://github.com/coreos/rkt/issues/820
app.User, app.Group = "0", "0"
// Set UID and GIDs.
if err := verifyNonRoot(app, ctx); err != nil {
return err
}
if ctx != nil && ctx.RunAsUser != nil {
app.User = strconv.Itoa(int(*ctx.RunAsUser))
}
setSupplementaryGIDs(app, podCtx)
// Override the working directory.
// Set working directory.
if len(c.WorkingDir) > 0 {
app.WorkingDirectory = c.WorkingDir
}
@ -420,8 +445,7 @@ func setApp(app *appctypes.App, c *api.Container, opts *kubecontainer.RunContain
mergeEnv(app, opts.Envs)
mergePortMappings(app, opts.PortMappings)
// Override isolators.
return setIsolators(app, c)
return setIsolators(app, c, ctx)
}
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest.
@ -526,7 +550,8 @@ func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets [
return nil, nil, err
}
if err := setApp(imgManifest.App, &c, opts); err != nil {
ctx := securitycontext.DetermineEffectiveSecurityContext(pod, &c)
if err := setApp(imgManifest.App, &c, opts, ctx, pod.Spec.SecurityContext); err != nil {
return nil, nil, err
}

View File

@ -19,6 +19,7 @@ package rkt
import (
"encoding/json"
"fmt"
"sort"
"testing"
"time"
@ -26,6 +27,8 @@ import (
appctypes "github.com/appc/spec/schema/types"
rktapi "github.com/coreos/rkt/api/v1alpha"
"github.com/stretchr/testify/assert"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -245,7 +248,7 @@ func TestCheckVersion(t *testing.T) {
for i, tt := range tests {
testCaseHint := fmt.Sprintf("test case #%d", i)
err := r.checkVersion(tt.minimumRktBinVersion, tt.recommendedRktBinVersion, tt.minimumAppcVersion, tt.minimumRktApiVersion, tt.minimumSystemdVersion)
assert.Equal(t, err, tt.err, testCaseHint)
assert.Equal(t, tt.err, err, testCaseHint)
if tt.calledGetInfo {
assert.Equal(t, fr.called, []string{"GetInfo"}, testCaseHint)
@ -254,9 +257,9 @@ func TestCheckVersion(t *testing.T) {
assert.Equal(t, fs.called, []string{"Version"}, testCaseHint)
}
if err == nil {
assert.Equal(t, r.binVersion.String(), fr.info.RktVersion, testCaseHint)
assert.Equal(t, r.appcVersion.String(), fr.info.AppcVersion, testCaseHint)
assert.Equal(t, r.apiVersion.String(), fr.info.ApiVersion, testCaseHint)
assert.Equal(t, fr.info.RktVersion, r.binVersion.String(), testCaseHint)
assert.Equal(t, fr.info.AppcVersion, r.appcVersion.String(), testCaseHint)
assert.Equal(t, fr.info.ApiVersion, r.apiVersion.String(), testCaseHint)
}
fr.CleanCalls()
fs.CleanCalls()
@ -662,3 +665,263 @@ func TestGetPodStatus(t *testing.T) {
fr.CleanCalls()
}
}
func generateCapRetainIsolator(t *testing.T, caps ...string) appctypes.Isolator {
retain, err := appctypes.NewLinuxCapabilitiesRetainSet(caps...)
if err != nil {
t.Fatalf("Error generating cap retain isolator", err)
}
return retain.AsIsolator()
}
func generateCapRevokeIsolator(t *testing.T, caps ...string) appctypes.Isolator {
revoke, err := appctypes.NewLinuxCapabilitiesRevokeSet(caps...)
if err != nil {
t.Fatalf("Error generating cap revoke isolator", err)
}
return revoke.AsIsolator()
}
func generateCPUIsolator(t *testing.T, request, limit string) appctypes.Isolator {
cpu, err := appctypes.NewResourceCPUIsolator(request, limit)
if err != nil {
t.Fatalf("Error generating cpu resource isolator", err)
}
return cpu.AsIsolator()
}
func generateMemoryIsolator(t *testing.T, request, limit string) appctypes.Isolator {
memory, err := appctypes.NewResourceMemoryIsolator(request, limit)
if err != nil {
t.Fatalf("Error generating memory resource isolator", err)
}
return memory.AsIsolator()
}
func baseApp(t *testing.T) *appctypes.App {
return &appctypes.App{
Exec: appctypes.Exec{"/bin/foo"},
User: "0",
Group: "22",
SupplementaryGIDs: []int{4, 5, 6},
WorkingDirectory: "/foo",
Environment: []appctypes.EnvironmentVariable{
{"env-foo", "bar"},
},
MountPoints: []appctypes.MountPoint{
{Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-foo", ReadOnly: false},
},
Ports: []appctypes.Port{
{Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 4242},
},
Isolators: []appctypes.Isolator{
generateCapRetainIsolator(t, "CAP_SYS_ADMIN"),
generateCapRevokeIsolator(t, "CAP_NET_ADMIN"),
generateCPUIsolator(t, "100m", "200m"),
generateMemoryIsolator(t, "10M", "20M"),
},
}
}
type envByName []appctypes.EnvironmentVariable
func (s envByName) Len() int { return len(s) }
func (s envByName) Less(i, j int) bool { return s[i].Name < s[j].Name }
func (s envByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type mountsByName []appctypes.MountPoint
func (s mountsByName) Len() int { return len(s) }
func (s mountsByName) Less(i, j int) bool { return s[i].Name < s[j].Name }
func (s mountsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type portsByName []appctypes.Port
func (s portsByName) Len() int { return len(s) }
func (s portsByName) Less(i, j int) bool { return s[i].Name < s[j].Name }
func (s portsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type isolatorsByName []appctypes.Isolator
func (s isolatorsByName) Len() int { return len(s) }
func (s isolatorsByName) Less(i, j int) bool { return s[i].Name < s[j].Name }
func (s isolatorsByName) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func sortAppFields(app *appctypes.App) {
sort.Sort(envByName(app.Environment))
sort.Sort(mountsByName(app.MountPoints))
sort.Sort(portsByName(app.Ports))
sort.Sort(isolatorsByName(app.Isolators))
}
func TestSetApp(t *testing.T) {
rootUser := int64(0)
nonRootUser := int64(42)
runAsNonRootTrue := true
fsgid := int64(3)
tests := []struct {
container *api.Container
opts *kubecontainer.RunContainerOptions
ctx *api.SecurityContext
podCtx *api.PodSecurityContext
expect *appctypes.App
err error
}{
// Nothing should change.
{
container: &api.Container{},
opts: &kubecontainer.RunContainerOptions{},
ctx: nil,
podCtx: nil,
expect: baseApp(t),
err: nil,
},
// error verifying non-root.
{
container: &api.Container{},
opts: &kubecontainer.RunContainerOptions{},
ctx: &api.SecurityContext{
RunAsNonRoot: &runAsNonRootTrue,
RunAsUser: &rootUser,
},
podCtx: nil,
expect: nil,
err: fmt.Errorf("container has no runAsUser and image will run as root"),
},
// app should be changed.
{
container: &api.Container{
Command: []string{"/bin/bar"},
Args: []string{"hello", "world"},
WorkingDir: "/tmp",
Resources: api.ResourceRequirements{
Limits: api.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("50M")},
Requests: api.ResourceList{"cpu": resource.MustParse("5m"), "memory": resource.MustParse("5M")},
},
},
opts: &kubecontainer.RunContainerOptions{
Envs: []kubecontainer.EnvVar{
{Name: "env-bar", Value: "foo"},
},
Mounts: []kubecontainer.Mount{
{Name: "mnt-bar", ContainerPath: "/mnt-bar", ReadOnly: true},
},
PortMappings: []kubecontainer.PortMapping{
{Name: "port-bar", Protocol: api.ProtocolTCP, ContainerPort: 1234},
},
},
ctx: &api.SecurityContext{
Capabilities: &api.Capabilities{
Add: []api.Capability{"CAP_SYS_CHROOT", "CAP_SYS_BOOT"},
Drop: []api.Capability{"CAP_SETUID", "CAP_SETGID"},
},
RunAsUser: &nonRootUser,
RunAsNonRoot: &runAsNonRootTrue,
},
podCtx: &api.PodSecurityContext{
SupplementalGroups: []int64{1, 2},
FSGroup: &fsgid,
},
expect: &appctypes.App{
Exec: appctypes.Exec{"/bin/bar", "hello", "world"},
User: "42",
Group: "22",
SupplementaryGIDs: []int{1, 2, 3},
WorkingDirectory: "/tmp",
Environment: []appctypes.EnvironmentVariable{
{"env-foo", "bar"},
{"env-bar", "foo"},
},
MountPoints: []appctypes.MountPoint{
{Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-foo", ReadOnly: false},
{Name: *appctypes.MustACName("mnt-bar"), Path: "/mnt-bar", ReadOnly: true},
},
Ports: []appctypes.Port{
{Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 4242},
{Name: *appctypes.MustACName("port-bar"), Protocol: "TCP", Port: 1234},
},
Isolators: []appctypes.Isolator{
generateCapRetainIsolator(t, "CAP_SYS_CHROOT", "CAP_SYS_BOOT"),
generateCapRevokeIsolator(t, "CAP_SETUID", "CAP_SETGID"),
generateCPUIsolator(t, "5m", "50m"),
generateMemoryIsolator(t, "5M", "50M"),
},
},
},
// app should be changed. (env, mounts, ports, are overrided).
{
container: &api.Container{
Command: []string{"/bin/bar"},
Args: []string{"hello", "world"},
WorkingDir: "/tmp",
Resources: api.ResourceRequirements{
Limits: api.ResourceList{"cpu": resource.MustParse("50m"), "memory": resource.MustParse("50M")},
Requests: api.ResourceList{"cpu": resource.MustParse("5m"), "memory": resource.MustParse("5M")},
},
},
opts: &kubecontainer.RunContainerOptions{
Envs: []kubecontainer.EnvVar{
{Name: "env-foo", Value: "foo"},
},
Mounts: []kubecontainer.Mount{
{Name: "mnt-foo", ContainerPath: "/mnt-bar", ReadOnly: true},
},
PortMappings: []kubecontainer.PortMapping{
{Name: "port-foo", Protocol: api.ProtocolTCP, ContainerPort: 1234},
},
},
ctx: &api.SecurityContext{
Capabilities: &api.Capabilities{
Add: []api.Capability{"CAP_SYS_CHROOT", "CAP_SYS_BOOT"},
Drop: []api.Capability{"CAP_SETUID", "CAP_SETGID"},
},
RunAsUser: &nonRootUser,
RunAsNonRoot: &runAsNonRootTrue,
},
podCtx: &api.PodSecurityContext{
SupplementalGroups: []int64{1, 2},
FSGroup: &fsgid,
},
expect: &appctypes.App{
Exec: appctypes.Exec{"/bin/bar", "hello", "world"},
User: "42",
Group: "22",
SupplementaryGIDs: []int{1, 2, 3},
WorkingDirectory: "/tmp",
Environment: []appctypes.EnvironmentVariable{
{"env-foo", "foo"},
},
MountPoints: []appctypes.MountPoint{
{Name: *appctypes.MustACName("mnt-foo"), Path: "/mnt-bar", ReadOnly: true},
},
Ports: []appctypes.Port{
{Name: *appctypes.MustACName("port-foo"), Protocol: "TCP", Port: 1234},
},
Isolators: []appctypes.Isolator{
generateCapRetainIsolator(t, "CAP_SYS_CHROOT", "CAP_SYS_BOOT"),
generateCapRevokeIsolator(t, "CAP_SETUID", "CAP_SETGID"),
generateCPUIsolator(t, "5m", "50m"),
generateMemoryIsolator(t, "5M", "50M"),
},
},
},
}
for i, tt := range tests {
testCaseHint := fmt.Sprintf("test case #%d", i)
app := baseApp(t)
err := setApp(app, tt.container, tt.opts, tt.ctx, tt.podCtx)
if err == nil && tt.err != nil || err != nil && tt.err == nil {
t.Errorf("%s: expect %v, saw %v", testCaseHint, tt.err, err)
}
if err == nil {
sortAppFields(tt.expect)
sortAppFields(app)
assert.Equal(t, tt.expect, app, testCaseHint)
}
}
}

View File

@ -83,7 +83,7 @@ func (p SimpleSecurityContextProvider) ModifyHostConfig(pod *api.Pod, container
}
if effectiveSC.Capabilities != nil {
add, drop := makeCapabilites(effectiveSC.Capabilities.Add, effectiveSC.Capabilities.Drop)
add, drop := MakeCapabilities(effectiveSC.Capabilities.Add, effectiveSC.Capabilities.Drop)
hostConfig.CapAdd = add
hostConfig.CapDrop = drop
}
@ -105,8 +105,8 @@ func modifySecurityOption(config []string, name, value string) []string {
return config
}
// makeCapabilites creates string slices from Capability slices
func makeCapabilites(capAdd []api.Capability, capDrop []api.Capability) ([]string, []string) {
// MakeCapabilities creates string slices from Capability slices
func MakeCapabilities(capAdd []api.Capability, capDrop []api.Capability) ([]string, []string) {
var (
addCaps []string
dropCaps []string