rkt: Support alternate stage1's via annotation

This provides a basic implementation for setting a stage1 on a per-pod
basis via an annotation.

It's possible this feature should be gated behind additional knobs, such
as a kubelet flag to filter allowed stage1s, or a check akin to what
priviliged gets in the apiserver.
Currently, it checks `AllowPrivileged`, as a means to let people disable
this feature, though overloading it as stage1 and privileged isn't
ideal.
This commit is contained in:
Euan Kemp 2016-04-26 15:20:06 -07:00 committed by Euan Kemp
parent dc236d926d
commit 136da158c5
3 changed files with 197 additions and 20 deletions

View File

@ -14,7 +14,6 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
// Generated via `mockgen k8s.io/kubernetes/pkg/kubelet/rkt VolumeGetter > mock_rkt/mock_volume_getter.go`
// Edited to include required boilerplate
// Source: k8s.io/kubernetes/pkg/kubelet/rkt (interfaces: VolumeGetter)

View File

@ -41,6 +41,7 @@ import (
"golang.org/x/net/context"
"google.golang.org/grpc"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/capabilities"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -89,7 +90,18 @@ const (
k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash"
k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count"
k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
dockerPrefix = "docker://"
// TODO(euank): This has significant security concerns as a stage1 image is
// effectively root.
// Furthermore, this (using an annotation) is a hack to pass an extra
// non-portable argument in. It should not be relied on to be stable.
// In the future, this might be subsumed by a first-class api object, or by a
// kitchen-sink params object (#17064).
// See discussion in #23944
// Also, do we want more granularity than path-at-the-kubelet-level and
// image/name-at-the-pod-level?
k8sRktStage1NameAnno = "rkt.alpha.kubernetes.io/stage1-name-override"
dockerPrefix = "docker://"
authDir = "auth.d"
dockerAuthTemplate = `{"rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials":{"user":%q,"password":%q}}`
@ -130,7 +142,7 @@ type Runtime struct {
runtimeHelper kubecontainer.RuntimeHelper
recorder record.EventRecorder
livenessManager proberesults.Manager
volumeGetter volumeGetter
volumeGetter VolumeGetter
imagePuller kubecontainer.ImagePuller
runner kubecontainer.HandlerRunner
execer utilexec.Interface
@ -154,7 +166,7 @@ type Runtime struct {
var _ kubecontainer.Runtime = &Runtime{}
// TODO(yifan): Remove this when volumeManager is moved to separate package.
type volumeGetter interface {
type VolumeGetter interface {
GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool)
}
@ -181,7 +193,7 @@ func New(
containerRefManager *kubecontainer.RefManager,
podGetter podGetter,
livenessManager proberesults.Manager,
volumeGetter volumeGetter,
volumeGetter VolumeGetter,
httpClient kubetypes.HttpGetter,
networkPlugin network.NetworkPlugin,
hairpinMode bool,
@ -264,10 +276,8 @@ func New(
}
func (r *Runtime) buildCommand(args ...string) *exec.Cmd {
cmd := exec.Command(r.config.Path)
cmd.Args = append(cmd.Args, r.config.buildGlobalOptions()...)
cmd.Args = append(cmd.Args, args...)
return cmd
allArgs := append(r.config.buildGlobalOptions(), args...)
return exec.Command(r.config.Path, allArgs...)
}
// convertToACName converts a string into ACName.
@ -285,7 +295,8 @@ func (r *Runtime) RunCommand(args ...string) ([]string, error) {
var stdout, stderr bytes.Buffer
cmd := r.buildCommand(args...)
cmd.Stdout, cmd.Stderr = &stdout, &stderr
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String())
}
@ -595,14 +606,19 @@ func (r *Runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appc
}
}
requiresPrivileged := false
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID))
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount))
if stage1Name, ok := pod.Annotations[k8sRktStage1NameAnno]; ok {
requiresPrivileged = true
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktStage1NameAnno), stage1Name)
}
for _, c := range pod.Spec.Containers {
err := r.newAppcRuntimeApp(pod, c, pullSecrets, manifest)
err := r.newAppcRuntimeApp(pod, c, requiresPrivileged, pullSecrets, manifest)
if err != nil {
return nil, err
}
@ -707,7 +723,11 @@ func (r *Runtime) makeContainerLogMount(opts *kubecontainer.RunContainerOptions,
return &mnt, nil
}
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, requiresPrivileged bool, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
if requiresPrivileged && !capabilities.Get().AllowPrivileged {
return fmt.Errorf("cannot make %q: running a custom stage1 requires a privileged security context", format.Pod(pod))
}
if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
return nil
}
@ -950,6 +970,27 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
return teardownErr
}
func (r *Runtime) preparePodArgs(manifest *appcschema.PodManifest, manifestFileName string) []string {
// Order of precedence for the stage1:
// 1) pod annotation (stage1 name)
// 2) kubelet configured stage1 (stage1 path)
// 3) empty; whatever rkt's compiled to default to
stage1ImageCmd := ""
if r.config.Stage1Image != "" {
stage1ImageCmd = "--stage1-path=" + r.config.Stage1Image
}
if stage1Name, ok := manifest.Annotations.Get(k8sRktStage1NameAnno); ok {
stage1ImageCmd = "--stage1-name=" + stage1Name
}
// Run 'rkt prepare' to get the rkt UUID.
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFileName}
if stage1ImageCmd != "" {
cmds = append(cmds, stage1ImageCmd)
}
return cmds
}
// preparePod will:
//
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
@ -958,7 +999,7 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
// On success, it will return a string that represents name of the unit file
// and the runtime pod.
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) {
// Generate the pod manifest from the pod spec.
// Generate the appc pod manifest from the k8s pod spec.
manifest, err := r.makePodManifest(pod, pullSecrets)
if err != nil {
return "", nil, err
@ -986,12 +1027,8 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName s
return "", nil, err
}
// Run 'rkt prepare' to get the rkt UUID.
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()}
if r.config.Stage1Image != "" {
cmds = append(cmds, "--stage1-path", r.config.Stage1Image)
}
output, err := r.cli.RunCommand(cmds...)
prepareCmd := r.preparePodArgs(manifest, manifestFile.Name())
output, err := r.RunCommand(prepareCmd...)
if err != nil {
return "", nil, err
}
@ -1809,7 +1846,7 @@ func (r *Runtime) ExecInContainer(containerID kubecontainer.ContainerID, cmd []s
if err != nil {
return err
}
args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid)
args := []string{"enter", fmt.Sprintf("--app=%s", id.appName), id.uuid}
args = append(args, cmd...)
command := r.buildCommand(args...)

View File

@ -36,8 +36,10 @@ import (
kubetesting "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_os"
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_rkt"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/errors"
utilexec "k8s.io/kubernetes/pkg/util/exec"
utiltesting "k8s.io/kubernetes/pkg/util/testing"
)
@ -1194,6 +1196,11 @@ func TestGenerateRunCommand(t *testing.T) {
for i, tt := range tests {
testCaseHint := fmt.Sprintf("test case #%d", i)
rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err}
rkt.execer = &utilexec.FakeExec{CommandScript: []utilexec.FakeCommandAction{func(cmd string, args ...string) utilexec.Cmd {
return utilexec.InitFakeCmd(&utilexec.FakeCmd{}, cmd, args...)
}}}
// a command should be created of this form, but the returned command shouldn't be called (asserted by having no expectations on it)
result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName)
assert.Equal(t, tt.err, err, testCaseHint)
@ -1617,3 +1624,137 @@ func TestGarbageCollect(t *testing.T) {
getter.pods = make(map[types.UID]*api.Pod)
}
}
type annotationsByName []appctypes.Annotation
func (a annotationsByName) Len() int { return len(a) }
func (a annotationsByName) Less(x, y int) bool { return a[x].Name < a[y].Name }
func (a annotationsByName) Swap(x, y int) { a[x], a[y] = a[y], a[x] }
func TestMakePodManifestAnnotations(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
mockVolumeGetter := mock_rkt.NewMockVolumeGetter(ctrl)
fr := newFakeRktInterface()
fs := newFakeSystemd()
r := &Runtime{apisvc: fr, systemd: fs, volumeGetter: mockVolumeGetter}
testCases := []struct {
in *api.Pod
out *appcschema.PodManifest
outerr error
}{
{
in: &api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "uid-1",
Name: "name-1",
Namespace: "namespace-1",
Annotations: map[string]string{
k8sRktStage1NameAnno: "stage1-override-img",
},
},
},
out: &appcschema.PodManifest{
Annotations: []appctypes.Annotation{
{
Name: appctypes.ACIdentifier(k8sRktStage1NameAnno),
Value: "stage1-override-img",
},
{
Name: appctypes.ACIdentifier(k8sRktUIDAnno),
Value: "uid-1",
},
{
Name: appctypes.ACIdentifier(k8sRktNameAnno),
Value: "name-1",
},
{
Name: appctypes.ACIdentifier(k8sRktKubeletAnno),
Value: "true",
},
{
Name: appctypes.ACIdentifier(k8sRktNamespaceAnno),
Value: "namespace-1",
},
{
Name: appctypes.ACIdentifier(k8sRktRestartCountAnno),
Value: "0",
},
},
},
},
}
for i, testCase := range testCases {
hint := fmt.Sprintf("case #%d", i)
mockVolumeGetter.EXPECT().GetVolumes(gomock.Any()).Return(kubecontainer.VolumeMap{}, true)
result, err := r.makePodManifest(testCase.in, []api.Secret{})
assert.Equal(t, err, testCase.outerr, hint)
if err == nil {
sort.Sort(annotationsByName(result.Annotations))
sort.Sort(annotationsByName(testCase.out.Annotations))
assert.Equal(t, result.Annotations, testCase.out.Annotations, hint)
}
}
}
func TestPreparePodArgs(t *testing.T) {
r := &Runtime{
config: &Config{},
}
testCases := []struct {
manifest appcschema.PodManifest
stage1Config string
cmd []string
}{
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{
{
Name: k8sRktStage1NameAnno,
Value: "stage1-image",
},
},
},
"",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{
{
Name: k8sRktStage1NameAnno,
Value: "stage1-image",
},
},
},
"stage1-path",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{},
},
"stage1-path",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-path=stage1-path"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{},
},
"",
[]string{"prepare", "--quiet", "--pod-manifest", "file"},
},
}
for i, testCase := range testCases {
r.config.Stage1Image = testCase.stage1Config
cmd := r.preparePodArgs(&testCase.manifest, "file")
assert.Equal(t, testCase.cmd, cmd, fmt.Sprintf("Test case #%d", i))
}
}