From 5960d87d2142055cd29ebbce0243652c4adc5742 Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Mon, 25 Jul 2016 16:25:36 -0700 Subject: [PATCH] dockershim: Implement sandbox methods --- pkg/kubelet/dockershim/convert.go | 23 ++ pkg/kubelet/dockershim/docker_container.go | 28 +-- pkg/kubelet/dockershim/docker_sandbox.go | 222 +++++++++++++++++- pkg/kubelet/dockershim/docker_sandbox_test.go | 58 +++++ pkg/kubelet/dockershim/docker_service.go | 10 + pkg/kubelet/dockershim/docker_service_test.go | 7 +- pkg/kubelet/dockershim/helpers.go | 99 ++++++++ pkg/kubelet/dockertools/fake_docker_client.go | 13 + 8 files changed, 434 insertions(+), 26 deletions(-) create mode 100644 pkg/kubelet/dockershim/docker_sandbox_test.go diff --git a/pkg/kubelet/dockershim/convert.go b/pkg/kubelet/dockershim/convert.go index 156171bcb93..0672e78fba0 100644 --- a/pkg/kubelet/dockershim/convert.go +++ b/pkg/kubelet/dockershim/convert.go @@ -21,6 +21,7 @@ import ( "strings" dockertypes "github.com/docker/engine-api/types" + runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) @@ -89,3 +90,25 @@ func toRuntimeAPIContainerState(state string) runtimeApi.ContainerState { return runtimeApi.ContainerState_UNKNOWN } } + +func toRuntimeAPISandboxState(state string) runtimeApi.PodSandBoxState { + // Parse the state string in dockertypes.Container. This could break when + // we upgrade docker. + switch { + case strings.HasPrefix(state, statusRunningPrefix): + return runtimeApi.PodSandBoxState_READY + default: + return runtimeApi.PodSandBoxState_NOTREADY + } +} + +func toRuntimeAPISandbox(c *dockertypes.Container) *runtimeApi.PodSandbox { + state := toRuntimeAPISandboxState(c.Status) + return &runtimeApi.PodSandbox{ + Id: &c.ID, + Name: &c.Names[0], + State: &state, + CreatedAt: &c.Created, // TODO: Why do we need CreateAt timestamp for sandboxes? + Labels: c.Labels, // TODO: Need to disthinguish annotaions and labels. + } +} diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go index aef66ff157b..4acd4c280d7 100644 --- a/pkg/kubelet/dockershim/docker_container.go +++ b/pkg/kubelet/dockershim/docker_container.go @@ -35,15 +35,17 @@ func (ds *dockerService) ListContainers(filter *runtimeApi.ContainerFilter) ([]* opts := dockertypes.ContainerListOptions{All: true} opts.Filter = dockerfilters.NewArgs() + f := newDockerFilter(&opts.Filter) + if filter != nil { if filter.Name != nil { - opts.Filter.Add("name", filter.GetName()) + f.Add("name", filter.GetName()) } if filter.Id != nil { - opts.Filter.Add("id", filter.GetId()) + f.Add("id", filter.GetId()) } if filter.State != nil { - opts.Filter.Add("status", toDockerContainerStatus(filter.GetState())) + f.Add("status", toDockerContainerStatus(filter.GetState())) } if filter.PodSandboxId != nil { // TODO: implement this after sandbox functions are implemented. @@ -51,9 +53,11 @@ func (ds *dockerService) ListContainers(filter *runtimeApi.ContainerFilter) ([]* if filter.LabelSelector != nil { for k, v := range filter.LabelSelector { - opts.Filter.Add("label", fmt.Sprintf("%s=%s", k, v)) + f.AddLabel(k, v) } } + // Filter out sandbox containers. + f.AddLabel(containerTypeLabelKey, containerTypeLabelContainer) } containers, err := ds.client.ListContainers(opts) if err != nil { @@ -81,14 +85,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi // Merge annotations and labels because docker supports only labels. // TODO: add a prefix to annotations so that we can distinguish labels and // annotations when reading back them from the docker container. - // TODO: should we apply docker-specific labels? - labels := config.GetLabels() - for k, v := range config.GetAnnotations() { - if _, ok := labels[k]; !ok { - // Only write to labels if the key doesn't exist. - labels[k] = v - } - } + labels := makeLabels(config.GetLabels(), config.GetAnnotations()) + // Apply a the container type label. + labels[containerTypeLabelKey] = containerTypeLabelContainer image := "" if iSpec := config.GetImage(); iSpec != nil { @@ -163,10 +162,7 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeApi // Note: ShmSize is handled in kube_docker_client.go } - // TODO: Seccomp support. Need to figure out how to pass seccomp options - // through the runtime API (annotations?).See dockerManager.getSecurityOpts() - // for the details. Always set the default seccomp profile for now. - hc.SecurityOpt = []string{fmt.Sprintf("%s=%s", "seccomp", defaultSeccompProfile)} + hc.SecurityOpt = []string{getSeccompOpts()} // TODO: Add or drop capabilities. createConfig.HostConfig = hc diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 7825a9909a8..0c75be26ba5 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -19,33 +19,243 @@ package dockershim import ( "fmt" + dockertypes "github.com/docker/engine-api/types" + dockercontainer "github.com/docker/engine-api/types/container" + dockerfilters "github.com/docker/engine-api/types/filters" + runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) +const ( + defaultSandboxImage = "gcr.io/google_containers/pause-amd64:3.0" + + // Various default sandbox resources requests/limits. + defaultSandboxCPUshares int64 = 2 + defaultSandboxOOMScore int = -999 + + // Termination grace period + defaultSandboxGracePeriod int = 10 +) + // CreatePodSandbox creates a pod-level sandbox. // The definition of PodSandbox is at https://github.com/kubernetes/kubernetes/pull/25899 +// For docker, PodSandbox is implemented by a container holding the network +// namespace for the pod. +// Note: docker doesn't use LogDirectory (yet). func (ds *dockerService) CreatePodSandbox(config *runtimeApi.PodSandboxConfig) (string, error) { - return "", fmt.Errorf("Not implemented") + // Step 1: Pull the image for the sandbox. + // TODO: How should we handle pulling custom pod infra container image + // (with credentials)? + image := defaultSandboxImage + if err := ds.client.PullImage(image, dockertypes.AuthConfig{}, dockertypes.ImagePullOptions{}); err != nil { + return "", fmt.Errorf("unable to pull image for the sandbox container: %v", err) + } + + // Step 2: Create the sandbox container. + createConfig := makeSandboxDockerConfig(config, image) + createResp, err := ds.client.CreateContainer(*createConfig) + if err != nil || createResp == nil { + return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.GetName(), err) + } + + // Step 3: Start the sandbox container. + // Assume kubelet's garbage collector would remove the sandbox later, if + // startContainer failed. + err = ds.StartContainer(createResp.ID) + return createResp.ID, err } // StopPodSandbox stops the sandbox. If there are any running containers in the // sandbox, they should be force terminated. func (ds *dockerService) StopPodSandbox(podSandboxID string) error { - return fmt.Errorf("Not implemented") + return ds.client.StopContainer(podSandboxID, defaultSandboxGracePeriod) + // TODO: Stop all running containers in the sandbox. } // DeletePodSandbox deletes the sandbox. If there are running containers in the // sandbox, they should be forcibly deleted. func (ds *dockerService) DeletePodSandbox(podSandboxID string) error { - return fmt.Errorf("Not implemented") + return ds.client.RemoveContainer(podSandboxID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}) + // TODO: remove all containers in the sandbox. } -// PodSandboxStatus returns the Status of the PodSandbox. +// PodSandboxStatus returns the status of the PodSandbox. func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeApi.PodSandboxStatus, error) { - return nil, fmt.Errorf("Not implemented") + // Inspect the container. + r, err := ds.client.InspectContainer(podSandboxID) + if err != nil { + return nil, err + } + + // Parse the timstamps. + createdAt, _, _, err := getContainerTimestamps(r) + if err != nil { + return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err) + } + ct := createdAt.Unix() + + // Translate container to sandbox state. + state := runtimeApi.PodSandBoxState_NOTREADY + if r.State.Running { + state = runtimeApi.PodSandBoxState_READY + } + + // TODO: We can't really get the IP address from the network plugin, which + // is handled by kubelet as of now. Should we amend the interface? How is + // this handled in the new remote runtime integration? + // See DockerManager.determineContainerIP() for more details. + // For now, just assume that there is no network plugin. + // Related issue: https://github.com/kubernetes/kubernetes/issues/28667 + var IP string + if r.NetworkSettings != nil { + IP = r.NetworkSettings.IPAddress + // Fall back to IPv6 address if no IPv4 address is present + if IP == "" { + IP = r.NetworkSettings.GlobalIPv6Address + } + } + network := &runtimeApi.PodSandboxNetworkStatus{Ip: &IP} + netNS := getNetworkNamespace(r) + + return &runtimeApi.PodSandboxStatus{ + Id: &r.ID, + Name: &r.Name, + State: &state, + CreatedAt: &ct, + // TODO: We write annotations as labels on the docker containers. All + // these annotations will be read back as labels. Need to fix this. + // Also filter out labels only relevant to this shim. + Labels: r.Config.Labels, + Network: network, + Linux: &runtimeApi.LinuxPodSandboxStatus{Namespaces: &runtimeApi.Namespace{Network: &netNS}}, + }, nil } // ListPodSandbox returns a list of Sandbox. func (ds *dockerService) ListPodSandbox(filter *runtimeApi.PodSandboxFilter) ([]*runtimeApi.PodSandbox, error) { - return nil, fmt.Errorf("Not implemented") + // By default, list all containers whether they are running or not. + opts := dockertypes.ContainerListOptions{All: true} + filterOutReadySandboxes := false + + opts.Filter = dockerfilters.NewArgs() + f := newDockerFilter(&opts.Filter) + if filter != nil { + if filter.Name != nil { + f.Add("name", filter.GetName()) + } + if filter.Id != nil { + f.Add("id", filter.GetId()) + } + if filter.State != nil { + if filter.GetState() == runtimeApi.PodSandBoxState_READY { + // Only list running containers. + opts.All = false + } else { + // runtimeApi.PodSandBoxState_NOTREADY can mean the + // container is in any of the non-running state (e.g., created, + // exited). We can't tell docker to filter out running + // containers directly, so we'll need to filter them out + // ourselves after getting the results. + filterOutReadySandboxes = true + } + } + + if filter.LabelSelector != nil { + for k, v := range filter.LabelSelector { + f.AddLabel(k, v) + } + } + // Filter out sandbox containers. + f.AddLabel(containerTypeLabelKey, containerTypeLabelSandbox) + } + containers, err := ds.client.ListContainers(opts) + if err != nil { + return nil, err + } + + // Convert docker containers to runtime api sandboxes. + result := []*runtimeApi.PodSandbox{} + for _, c := range containers { + s := toRuntimeAPISandbox(&c) + if filterOutReadySandboxes && s.GetState() == runtimeApi.PodSandBoxState_READY { + continue + } + result = append(result, s) + } + return result, nil +} + +func makeSandboxDockerConfig(c *runtimeApi.PodSandboxConfig, image string) *dockertypes.ContainerCreateConfig { + // Merge annotations and labels because docker supports only labels. + labels := makeLabels(c.GetLabels(), c.GetAnnotations()) + // Apply a label to distinguish sandboxes from regular containers. + labels[containerTypeLabelKey] = containerTypeLabelSandbox + + hc := &dockercontainer.HostConfig{} + createConfig := &dockertypes.ContainerCreateConfig{ + Name: c.GetName(), + Config: &dockercontainer.Config{ + Hostname: c.GetHostname(), + // TODO: Handle environment variables. + Image: image, + Labels: labels, + }, + HostConfig: hc, + } + + // Apply linux-specific options. + if lc := c.GetLinux(); lc != nil { + // Apply Cgroup options. + // TODO: Check if this works with per-pod cgroups. + hc.CgroupParent = lc.GetCgroupParent() + + // Apply namespace options. + hc.NetworkMode, hc.UTSMode, hc.PidMode = "", "", "" + nsOpts := lc.GetNamespaceOptions() + if nsOpts != nil { + if nsOpts.GetHostNetwork() { + hc.NetworkMode = namespaceModeHost + } else { + // Assume kubelet uses either the cni or the kubenet plugin. + // TODO: support docker networking. + hc.NetworkMode = "none" + } + if nsOpts.GetHostIpc() { + hc.IpcMode = namespaceModeHost + } + if nsOpts.GetHostPid() { + hc.PidMode = namespaceModeHost + } + } + } + // Set port mappings. + exposedPorts, portBindings := makePortsAndBindings(c.GetPortMappings()) + createConfig.Config.ExposedPorts = exposedPorts + hc.PortBindings = portBindings + + // Set DNS options. + if dnsOpts := c.GetDnsOptions(); dnsOpts != nil { + hc.DNS = dnsOpts.GetServers() + hc.DNSSearch = dnsOpts.GetSearches() + } + + // Apply resource options. + setSandboxResources(c.GetResources(), hc) + + // Set security options. + hc.SecurityOpt = []string{getSeccompOpts()} + + return createConfig +} + +func setSandboxResources(_ *runtimeApi.PodSandboxResources, hc *dockercontainer.HostConfig) { + // Ignore the resource requests and limits for now and just use the docker + // defaults. + // TODO: apply resource limits based on the configuration. + hc.Resources = dockercontainer.Resources{ + MemorySwap: -1, // Always disable memory swap. + CPUShares: defaultSandboxCPUshares, + // Use docker's default cpu quota/period. + } + hc.OomScoreAdj = defaultSandboxOOMScore } diff --git a/pkg/kubelet/dockershim/docker_sandbox_test.go b/pkg/kubelet/dockershim/docker_sandbox_test.go new file mode 100644 index 00000000000..b5066bf6d9e --- /dev/null +++ b/pkg/kubelet/dockershim/docker_sandbox_test.go @@ -0,0 +1,58 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dockershim + +import ( + "testing" + + dockertypes "github.com/docker/engine-api/types" + + runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" +) + +func TestCreateSandbox(t *testing.T) { + ds, fakeDocker := newTestDockerSevice() + name := "FOO" + // We don't really want to test k8s name format and parsing, + // but FakeDockerClient parses the name internally during AssertCreated(). + // TODO: fix this. + fullName := "k8s_" + name + ".abcde_foo_new_12345678_0" + config := &runtimeApi.PodSandboxConfig{Name: &fullName} + id, err := ds.CreatePodSandbox(config) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if err := fakeDocker.AssertCreated([]string{name}); err != nil { + t.Errorf("%v", err) + } + if err := fakeDocker.AssertStarted([]string{id}); err != nil { + t.Errorf("%v", err) + } + + // List running containers and verify that there is one (and only one) + // running container that we just created. + containers, err := fakeDocker.ListContainers(dockertypes.ContainerListOptions{All: false}) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if len(containers) != 1 { + t.Errorf("More than one running containers: %+v", containers) + } + if containers[0].ID != id { + t.Errorf("Expected id %q, got %v", id, containers[0].ID) + } +} diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index e499158eacd..408cc460f08 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -36,7 +36,17 @@ const ( // '{{.HostConfig.NetworkMode}}'. namespaceModeHost = "host" + dockerNetNSFmt = "/proc/%v/ns/net" + defaultSeccompProfile = "unconfined" + + // Internal docker labels used to identify whether a container is a sandbox + // or a regular container. + // TODO: This is not backward compatible with older containers. We will + // need to add filtering based on names. + containerTypeLabelKey = "io.kubernetes.docker.type" + containerTypeLabelSandbox = "podsandbox" + containerTypeLabelContainer = "container" ) func NewDockerSevice(client dockertools.DockerInterface) DockerLegacyService { diff --git a/pkg/kubelet/dockershim/docker_service_test.go b/pkg/kubelet/dockershim/docker_service_test.go index 5d33bc2297c..e1890851710 100644 --- a/pkg/kubelet/dockershim/docker_service_test.go +++ b/pkg/kubelet/dockershim/docker_service_test.go @@ -20,8 +20,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/dockertools" ) -func newFakeDockerSevice() *dockerService { - return &dockerService{ - client: dockertools.NewFakeDockerClient(), - } +func newTestDockerSevice() (*dockerService, *dockertools.FakeDockerClient) { + c := dockertools.NewFakeDockerClient() + return &dockerService{client: c}, c } diff --git a/pkg/kubelet/dockershim/helpers.go b/pkg/kubelet/dockershim/helpers.go index eb438216c64..a8aa51d3cbd 100644 --- a/pkg/kubelet/dockershim/helpers.go +++ b/pkg/kubelet/dockershim/helpers.go @@ -18,8 +18,15 @@ package dockershim import ( "fmt" + "strconv" + "strings" + dockertypes "github.com/docker/engine-api/types" + dockerfilters "github.com/docker/engine-api/types/filters" dockerapiversion "github.com/docker/engine-api/types/versions" + dockernat "github.com/docker/go-connections/nat" + "github.com/golang/glog" + runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" ) @@ -50,6 +57,23 @@ func generateEnvList(envs []*runtimeApi.KeyValue) (result []string) { return } +// Merge annotations and labels because docker supports only labels. +// TODO: Need to be able to distinguish annotations from labels; otherwise, we +// couldn't restore the information when reading the labels back from docker. +func makeLabels(labels, annotations map[string]string) map[string]string { + merged := make(map[string]string) + for k, v := range labels { + merged[k] = v + } + for k, v := range annotations { + if _, ok := merged[k]; !ok { + // Don't overwrite the key if it already exists. + merged[k] = v + } + } + return merged +} + // generateMountBindings converts the mount list to a list of strings that // can be understood by docker. // Each element in the string is in the form of: @@ -76,3 +100,78 @@ func generateMountBindings(mounts []*runtimeApi.Mount) (result []string) { } return } + +func makePortsAndBindings(pm []*runtimeApi.PortMapping) (map[dockernat.Port]struct{}, map[dockernat.Port][]dockernat.PortBinding) { + exposedPorts := map[dockernat.Port]struct{}{} + portBindings := map[dockernat.Port][]dockernat.PortBinding{} + for _, port := range pm { + exteriorPort := port.GetHostPort() + if exteriorPort == 0 { + // No need to do port binding when HostPort is not specified + continue + } + interiorPort := port.GetContainerPort() + // Some of this port stuff is under-documented voodoo. + // See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api + var protocol string + switch strings.ToUpper(string(port.GetProtocol())) { + case "UDP": + protocol = "/udp" + case "TCP": + protocol = "/tcp" + default: + glog.Warningf("Unknown protocol %q: defaulting to TCP", port.Protocol) + protocol = "/tcp" + } + + dockerPort := dockernat.Port(strconv.Itoa(int(interiorPort)) + protocol) + exposedPorts[dockerPort] = struct{}{} + + hostBinding := dockernat.PortBinding{ + HostPort: strconv.Itoa(int(exteriorPort)), + HostIP: port.GetHostIp(), + } + + // Allow multiple host ports bind to same docker port + if existedBindings, ok := portBindings[dockerPort]; ok { + // If a docker port already map to a host port, just append the host ports + portBindings[dockerPort] = append(existedBindings, hostBinding) + } else { + // Otherwise, it's fresh new port binding + portBindings[dockerPort] = []dockernat.PortBinding{ + hostBinding, + } + } + } + return exposedPorts, portBindings +} + +// TODO: Seccomp support. Need to figure out how to pass seccomp options +// through the runtime API (annotations?).See dockerManager.getSecurityOpts() +// for the details. Always set the default seccomp profile for now. +// Also need to support syntax for different docker versions. +func getSeccompOpts() string { + return fmt.Sprintf("%s=%s", "seccomp", defaultSeccompProfile) +} + +func getNetworkNamespace(c *dockertypes.ContainerJSON) string { + return fmt.Sprintf(dockerNetNSFmt, c.State.Pid) +} + +// dockerFilter wraps around dockerfilters.Args and provides methods to modify +// the filter easily. +type dockerFilter struct { + f *dockerfilters.Args +} + +func newDockerFilter(args *dockerfilters.Args) *dockerFilter { + return &dockerFilter{f: args} +} + +func (f *dockerFilter) Add(key, value string) { + f.Add(key, value) +} + +func (f *dockerFilter) AddLabel(key, value string) { + f.Add("label", fmt.Sprintf("%s=%s", key, value)) +} diff --git a/pkg/kubelet/dockertools/fake_docker_client.go b/pkg/kubelet/dockertools/fake_docker_client.go index 822c570dc1c..b0d5ce88987 100644 --- a/pkg/kubelet/dockertools/fake_docker_client.go +++ b/pkg/kubelet/dockertools/fake_docker_client.go @@ -51,6 +51,7 @@ type FakeDockerClient struct { // Created, Stopped and Removed all container docker ID Created []string + Started []string Stopped []string Removed []string VersionInfo dockertypes.Version @@ -229,6 +230,17 @@ func (f *FakeDockerClient) AssertCreated(created []string) error { return nil } +func (f *FakeDockerClient) AssertStarted(started []string) error { + f.Lock() + defer f.Unlock() + sort.StringSlice(started).Sort() + sort.StringSlice(f.Started).Sort() + if !reflect.DeepEqual(started, f.Started) { + return fmt.Errorf("expected %#v, got %#v", started, f.Started) + } + return nil +} + func (f *FakeDockerClient) AssertStopped(stopped []string) error { f.Lock() defer f.Unlock() @@ -339,6 +351,7 @@ func (f *FakeDockerClient) StartContainer(id string) error { if err := f.popError("start"); err != nil { return err } + f.Started = append(f.Started, id) container, ok := f.ContainerMap[id] if !ok { container = convertFakeContainer(&FakeContainer{ID: id, Name: id})