diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 840d2a3ac76..09595df99f9 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -28,6 +28,6 @@ MASTER_TAG="${INSTANCE_PREFIX}-master" MINION_TAG="${INSTANCE_PREFIX}-minion" MINION_NAMES=($(eval echo ${INSTANCE_PREFIX}-minion-{1..${NUM_MINIONS}})) MINION_IP_RANGES=($(eval echo "10.244.{1..${NUM_MINIONS}}.0/24")) -MINION_SCOPES="" +MINION_SCOPES="compute-rw" # Increase the sleep interval value if concerned about API rate limits. 3, in seconds, is the default. POLL_SLEEP_INTERVAL=3 diff --git a/pkg/api/types.go b/pkg/api/types.go index a4491e3fb9b..e293ee353a9 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -92,6 +92,9 @@ type VolumeSource struct { HostDir *HostDir `yaml:"hostDir" json:"hostDir"` // EmptyDir represents a temporary directory that shares a pod's lifetime. EmptyDir *EmptyDir `yaml:"emptyDir" json:"emptyDir"` + // GCEPersistentDisk represents a GCE Disk resource that is attached to a + // kubelet's host machine and then exposed to the pod. + GCEPersistentDisk *GCEPersistentDisk `yaml:"persistentDisk" json:"persistentDisk"` } // HostDir represents bare host directory volume. @@ -111,7 +114,28 @@ const ( ProtocolUDP Protocol = "UDP" ) -// Port represents a network port in a single container. +// GCEPersistent Disk resource. +// A GCE PD must exist and be formatted before mounting to a container. +// The disk must also be in the same GCE project and zone as the kubelet. +// A GCE PD can only be mounted as read/write once. +type GCEPersistentDisk struct { + // Unique name of the PD resource. Used to identify the disk in GCE + PDName string `yaml:"pdName" json:"pdName"` + // Required: Filesystem type to mount. + // Must be a filesystem type supported by the host operating system. + // Ex. "ext4", "xfs", "ntfs" + // TODO: how do we prevent errors in the filesystem from compromising the machine + FSType string `yaml:"fsType,omitempty" json:"fsType,omitempty"` + // Optional: Partition on the disk to mount. + // If omitted, kubelet will attempt to mount the device name. + // Ex. For /dev/sda1, this field is "1", for /dev/sda, this field is 0 or empty. + Partition int `yaml:"partition,omitempty" json:"partition,omitempty"` + // Optional: Defaults to false (read/write). ReadOnly here will force + // the ReadOnly setting in VolumeMounts. + ReadOnly bool `yaml:"readOnly,omitempty" json:"readOnly,omitempty"` +} + +// Port represents a network port in a single container type Port struct { // Optional: If specified, this must be a DNS_LABEL. Each named port // in a pod must have a unique name. diff --git a/pkg/api/v1beta1/types.go b/pkg/api/v1beta1/types.go index b0180d3ddb5..835961fc9c7 100644 --- a/pkg/api/v1beta1/types.go +++ b/pkg/api/v1beta1/types.go @@ -90,6 +90,9 @@ type VolumeSource struct { HostDir *HostDir `yaml:"hostDir" json:"hostDir"` // EmptyDir represents a temporary directory that shares a pod's lifetime. EmptyDir *EmptyDir `yaml:"emptyDir" json:"emptyDir"` + // GCEPersistentDisk represents a GCE Disk resource that is attached to a + // kubelet's host machine and then exposed to the pod. + GCEPersistentDisk *GCEPersistentDisk `yaml:"persistentDisk" json:"persistentDisk"` } // HostDir represents bare host directory volume. @@ -109,7 +112,28 @@ const ( ProtocolUDP Protocol = "UDP" ) -// Port represents a network port in a single container. +// GCEPersistent Disk resource. +// A GCE PD must exist before mounting to a container. The disk must +// also be in the same GCE project and zone as the kubelet. +// A GCE PD can only be mounted as read/write once. +type GCEPersistentDisk struct { + // Unique name of the PD resource. Used to identify the disk in GCE + PDName string `yaml:"pdName" json:"pdName"` + // Required: Filesystem type to mount. + // Must be a filesystem type supported by the host operating system. + // Ex. "ext4", "xfs", "ntfs" + // TODO: how do we prevent errors in the filesystem from compromising the machine + FSType string `yaml:"fsType,omitempty" json:"fsType,omitempty"` + // Optional: Partition on the disk to mount. + // If omitted, kubelet will attempt to mount the device name. + // Ex. For /dev/sda1, this field is "1", for /dev/sda, this field 0 or empty. + Partition int `yaml:"partition,omitempty" json:"partition,omitempty"` + // Optional: Defaults to false (read/write). ReadOnly here will force + // the ReadOnly setting in VolumeMounts. + ReadOnly bool `yaml:"readOnly,omitempty" json:"readOnly,omitempty"` +} + +// Port represents a network port in a single container type Port struct { // Optional: If specified, this must be a DNS_LABEL. Each named port // in a pod must have a unique name. diff --git a/pkg/api/v1beta2/types.go b/pkg/api/v1beta2/types.go index 0d45d527943..77c9a1d5b6f 100644 --- a/pkg/api/v1beta2/types.go +++ b/pkg/api/v1beta2/types.go @@ -90,6 +90,9 @@ type VolumeSource struct { HostDir *HostDir `yaml:"hostDir" json:"hostDir"` // EmptyDir represents a temporary directory that shares a pod's lifetime. EmptyDir *EmptyDir `yaml:"emptyDir" json:"emptyDir"` + // A persistent disk that is mounted to the + // kubelet's host machine and then exposed to the pod. + GCEPersistentDisk *GCEPersistentDisk `yaml:"persistentDisk" json:"persistentDisk"` } // HostDir represents bare host directory volume. @@ -124,6 +127,27 @@ type Port struct { HostIP string `yaml:"hostIP,omitempty" json:"hostIP,omitempty"` } +// GCEPersistent Disk resource. +// A GCE PD must exist before mounting to a container. The disk must +// also be in the same GCE project and zone as the kubelet. +// A GCE PD can only be mounted as read/write once. +type GCEPersistentDisk struct { + // Unique name of the PD resource. Used to identify the disk in GCE + PDName string `yaml:"pdName" json:"pdName"` + // Required: Filesystem type to mount. + // Must be a filesystem type supported by the host operating system. + // Ex. "ext4", "xfs", "ntfs" + // TODO: how do we prevent errors in the filesystem from compromising the machine + FSType string `yaml:"fsType,omitempty" json:"fsType,omitempty"` + // Optional: Partition on the disk to mount. + // If omitted, kubelet will attempt to mount the device name. + // Ex. For /dev/sda1, this field is "1", for /dev/sda, this field 0 or empty. + Partition int `yaml:"partition,omitempty" json:"partition,omitempty"` + // Optional: Defaults to false (read/write). ReadOnly here will force + // the ReadOnly setting in VolumeMounts. + ReadOnly bool `yaml:"readOnly,omitempty" json:"readOnly,omitempty"` +} + // VolumeMount describes a mounting of a Volume within a container. type VolumeMount struct { // Required: This must match the Name of a Volume [above]. diff --git a/pkg/api/v1beta3/types.go b/pkg/api/v1beta3/types.go index 7d59fc90a73..f2b89fbd2c2 100644 --- a/pkg/api/v1beta3/types.go +++ b/pkg/api/v1beta3/types.go @@ -175,6 +175,9 @@ type VolumeSource struct { HostDir *HostDir `json:"hostDir" yaml:"hostDir"` // EmptyDir represents a temporary directory that shares a pod's lifetime. EmptyDir *EmptyDir `json:"emptyDir" yaml:"emptyDir"` + // GCEPersistentDisk represents a GCE Disk resource that is attached to a + // kubelet's host machine and then exposed to the pod. + GCEPersistentDisk *GCEPersistentDisk `yaml:"persistentDisk" json:"persistentDisk"` } // HostDir represents bare host directory volume. @@ -194,6 +197,27 @@ const ( ProtocolUDP Protocol = "UDP" ) +// GCEPersistent Disk resource. +// A GCE PD must exist and be formatted before mounting to a container. +// The disk must also be in the same GCE project and zone as the kubelet. +// A GCE PD can only be mounted as read/write once. +type GCEPersistentDisk struct { + // Unique name of the PD resource. Used to identify the disk in GCE + PDName string `yaml:"pdName" json:"pdName"` + // Required: Filesystem type to mount. + // Must be a filesystem type supported by the host operating system. + // Ex. "ext4", "xfs", "ntfs" + // TODO: how do we prevent errors in the filesystem from compromising the machine + FSType string `yaml:"fsType,omitempty" json:"fsType,omitempty"` + // Optional: Partition on the disk to mount. + // If omitted, kubelet will attempt to mount the device name. + // Ex. For /dev/sda1, this field is "1", for /dev/sda, this field is 0 or empty. + Partition int `yaml:"partition,omitempty" json:"partition,omitempty"` + // Optional: Defaults to false (read/write). ReadOnly here will force + // the ReadOnly setting in VolumeMounts. + ReadOnly bool `yaml:"readOnly,omitempty" json:"readOnly,omitempty"` +} + // Port represents a network port in a single container. type Port struct { // Optional: If specified, this must be a DNS_LABEL. Each named port diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index a260d3a1f5e..3b1e597518c 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -64,6 +64,10 @@ func validateSource(source *api.VolumeSource) errs.ErrorList { numVolumes++ //EmptyDirs have nothing to validate } + if source.GCEPersistentDisk != nil { + numVolumes++ + allErrs = append(allErrs, validateGCEPersistentDisk(source.GCEPersistentDisk)...) + } if numVolumes != 1 { allErrs = append(allErrs, errs.NewFieldInvalid("", source)) } @@ -80,6 +84,20 @@ func validateHostDir(hostDir *api.HostDir) errs.ErrorList { var supportedPortProtocols = util.NewStringSet(string(api.ProtocolTCP), string(api.ProtocolUDP)) +func validateGCEPersistentDisk(PD *api.GCEPersistentDisk) errs.ErrorList { + allErrs := errs.ErrorList{} + if PD.PDName == "" { + allErrs = append(allErrs, errs.NewFieldInvalid("PD.PDName", PD.PDName)) + } + if PD.FSType == "" { + allErrs = append(allErrs, errs.NewFieldInvalid("PD.FSType", PD.FSType)) + } + if PD.Partition < 0 || PD.Partition > 255 { + allErrs = append(allErrs, errs.NewFieldInvalid("PD.Partition", PD.Partition)) + } + return allErrs +} + func validatePorts(ports []api.Port) errs.ErrorList { allErrs := errs.ErrorList{} @@ -373,5 +391,17 @@ func ValidateReplicationControllerState(state *api.ReplicationControllerState) e allErrs = append(allErrs, errs.NewFieldInvalid("replicas", state.Replicas)) } allErrs = append(allErrs, ValidateManifest(&state.PodTemplate.DesiredState.Manifest).Prefix("podTemplate.desiredState.manifest")...) + allErrs = append(allErrs, ValidateReadOnlyPersistentDisks(state.PodTemplate.DesiredState.Manifest.Volumes).Prefix("podTemplate.desiredState.manifest")...) + return allErrs +} +func ValidateReadOnlyPersistentDisks(volumes []api.Volume) errs.ErrorList { + allErrs := errs.ErrorList{} + for _, vol := range volumes { + if vol.Source.GCEPersistentDisk != nil { + if vol.Source.GCEPersistentDisk.ReadOnly == false { + allErrs = append(allErrs, errs.NewFieldInvalid("GCEPersistentDisk.ReadOnly", false)) + } + } + } return allErrs } diff --git a/pkg/api/validation/validation_test.go b/pkg/api/validation/validation_test.go index ca2ea7de104..4e802b5444c 100644 --- a/pkg/api/validation/validation_test.go +++ b/pkg/api/validation/validation_test.go @@ -40,12 +40,13 @@ func TestValidateVolumes(t *testing.T) { {Name: "123", Source: &api.VolumeSource{HostDir: &api.HostDir{"/mnt/path2"}}}, {Name: "abc-123", Source: &api.VolumeSource{HostDir: &api.HostDir{"/mnt/path3"}}}, {Name: "empty", Source: &api.VolumeSource{EmptyDir: &api.EmptyDir{}}}, + {Name: "gcepd", Source: &api.VolumeSource{GCEPersistentDisk: &api.GCEPersistentDisk{"my-PD", "ext4", 1, false}}}, } names, errs := validateVolumes(successCase) if len(errs) != 0 { t.Errorf("expected success: %v", errs) } - if len(names) != 4 || !names.HasAll("abc", "123", "abc-123", "empty") { + if len(names) != 5 || !names.HasAll("abc", "123", "abc-123", "empty", "gcepd") { t.Errorf("wrong names result: %v", names) } @@ -552,7 +553,14 @@ func TestValidateReplicationController(t *testing.T) { }, Labels: validSelector, } - + invalidVolumePodTemplate := api.PodTemplate{ + DesiredState: api.PodState{ + Manifest: api.ContainerManifest{ + Version: "v1beta1", + Volumes: []api.Volume{{Name: "gcepd", Source: &api.VolumeSource{GCEPersistentDisk: &api.GCEPersistentDisk{"my-PD", "ext4", 1, false}}}}, + }, + }, + } successCases := []api.ReplicationController{ { TypeMeta: api.TypeMeta{ID: "abc", Namespace: api.NamespaceDefault}, @@ -609,6 +617,13 @@ func TestValidateReplicationController(t *testing.T) { ReplicaSelector: validSelector, }, }, + "read-write presistent disk": { + TypeMeta: api.TypeMeta{ID: "abc"}, + DesiredState: api.ReplicationControllerState{ + ReplicaSelector: validSelector, + PodTemplate: invalidVolumePodTemplate, + }, + }, "negative_replicas": { TypeMeta: api.TypeMeta{ID: "abc", Namespace: api.NamespaceDefault}, DesiredState: api.ReplicationControllerState{ @@ -628,6 +643,7 @@ func TestValidateReplicationController(t *testing.T) { field != "id" && field != "namespace" && field != "desiredState.replicaSelector" && + field != "GCEPersistentDisk.ReadOnly" && field != "desiredState.replicas" { t.Errorf("%s: missing prefix for: %v", k, errs[i]) } diff --git a/pkg/cloudprovider/gce/gce.go b/pkg/cloudprovider/gce/gce.go index 4012b17e002..2a90a4dfbd5 100644 --- a/pkg/cloudprovider/gce/gce.go +++ b/pkg/cloudprovider/gce/gce.go @@ -23,6 +23,7 @@ import ( "net" "net/http" "os/exec" + "path" "strconv" "strings" "time" @@ -41,43 +42,71 @@ type GCECloud struct { service *compute.Service projectID string zone string - instanceRE string + instanceID string } func init() { cloudprovider.RegisterCloudProvider("gce", func(config io.Reader) (cloudprovider.Interface, error) { return newGCECloud() }) } -func getProjectAndZone() (string, string, error) { +func getMetadata(url string) (string, error) { client := http.Client{} - url := "http://metadata/computeMetadata/v1/instance/zone" req, err := http.NewRequest("GET", url, nil) if err != nil { - return "", "", err + return "", err } req.Header.Add("X-Google-Metadata-Request", "True") res, err := client.Do(req) if err != nil { - return "", "", err + return "", err } defer res.Body.Close() data, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + return string(data), nil +} + +func getProjectAndZone() (string, string, error) { + url := "http://metadata/computeMetadata/v1/instance/zone" + result, err := getMetadata(url) if err != nil { return "", "", err } - parts := strings.Split(string(data), "/") + parts := strings.Split(result, "/") if len(parts) != 4 { - return "", "", fmt.Errorf("Unexpected response: %s", string(data)) + return "", "", fmt.Errorf("Unexpected response: %s", result) } return parts[1], parts[3], nil } +func getInstanceID() (string, error) { + url := "http://metadata/computeMetadata/v1/instance/hostname" + result, err := getMetadata(url) + if err != nil { + return "", err + } + parts := strings.Split(result, ".") + if len(parts) == 0 { + return "", fmt.Errorf("Unexpected response: %s", result) + } + return parts[0], nil +} + // newGCECloud creates a new instance of GCECloud. func newGCECloud() (*GCECloud, error) { projectID, zone, err := getProjectAndZone() if err != nil { return nil, err } + // TODO: if we want to use this on a machine that doesn't have the http://metadata server + // e.g. on a user's machine (not VM) somewhere, we need to have an alternative for + // instance id lookup. + instanceID, err := getInstanceID() + if err != nil { + return nil, err + } client, err := serviceaccount.NewClient(&serviceaccount.Options{}) if err != nil { return nil, err @@ -87,9 +116,10 @@ func newGCECloud() (*GCECloud, error) { return nil, err } return &GCECloud{ - service: svc, - projectID: projectID, - zone: zone, + service: svc, + projectID: projectID, + zone: zone, + instanceID: instanceID, }, nil } @@ -310,6 +340,29 @@ func (gce *GCECloud) GetZone() (cloudprovider.Zone, error) { }, nil } +func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error { + disk, err := gce.getDisk(diskName) + if err != nil { + return err + } + readWrite := "READ_WRITE" + if readOnly { + readWrite = "READ_ONLY" + } + attachedDisk := gce.convertDiskToAttachedDisk(disk, readWrite) + _, err = gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do() + return err +} + +func (gce *GCECloud) DetachDisk(devicePath string) error { + _, err := gce.service.Instances.DetachDisk(gce.projectID, gce.zone, gce.instanceID, devicePath).Do() + return err +} + +func (gce *GCECloud) getDisk(diskName string) (*compute.Disk, error) { + return gce.service.Disks.Get(gce.projectID, gce.zone, diskName).Do() +} + // getGceRegion returns region of the gce zone. Zone names // are of the form: ${region-name}-${ix}. // For example "us-central1-b" has a region of "us-central1". @@ -321,3 +374,14 @@ func getGceRegion(zone string) (string, error) { } return zone[:ix], nil } + +// Converts a Disk resource to an AttachedDisk resource. +func (gce *GCECloud) convertDiskToAttachedDisk(disk *compute.Disk, readWrite string) *compute.AttachedDisk { + return &compute.AttachedDisk{ + DeviceName: disk.Name, + Kind: disk.Kind, + Mode: readWrite, + Source: "https://" + path.Join("www.googleapis.com/compute/v1/projects/", gce.projectID, "zones", gce.zone, "disks", disk.Name), + Type: "PERSISTENT", + } +} diff --git a/pkg/volume/gce_util.go b/pkg/volume/gce_util.go new file mode 100644 index 00000000000..b01b7320da0 --- /dev/null +++ b/pkg/volume/gce_util.go @@ -0,0 +1,136 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volume + +import ( + "errors" + "fmt" + "os" + "path" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider" + "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/gce" +) + +const partitionRegex = "[a-z][a-z]*(?P[0-9][0-9]*)?" + +var regexMatcher = regexp.MustCompile(partitionRegex) + +type GCEDiskUtil struct{} + +// Attaches a disk specified by a volume.GCEPersistentDisk to the current kubelet. +// Mounts the disk to it's global path. +func (util *GCEDiskUtil) AttachDisk(GCEPD *GCEPersistentDisk) error { + gce, err := cloudprovider.GetCloudProvider("gce", nil) + if err != nil { + return err + } + flags := uintptr(0) + if GCEPD.ReadOnly { + flags = MOUNT_MS_RDONLY + } + if err := gce.(*gce_cloud.GCECloud).AttachDisk(GCEPD.PDName, GCEPD.ReadOnly); err != nil { + return err + } + devicePath := path.Join("/dev/disk/by-id/", "google-"+GCEPD.PDName) + if GCEPD.Partition != "" { + devicePath = devicePath + "-part" + GCEPD.Partition + } + //TODO(jonesdl) There should probably be better method than busy-waiting here. + numTries := 0 + for { + _, err := os.Stat(devicePath) + if err == nil { + break + } + if err != nil && !os.IsNotExist(err) { + return err + } + numTries++ + if numTries == 10 { + return errors.New("Could not attach disk: Timeout after 10s") + } + time.Sleep(time.Second) + } + globalPDPath := makeGlobalPDName(GCEPD.RootDir, GCEPD.PDName, GCEPD.ReadOnly) + // Only mount the PD globally once. + _, err = os.Stat(globalPDPath) + if os.IsNotExist(err) { + err = os.MkdirAll(globalPDPath, 0750) + if err != nil { + return err + } + err = GCEPD.mounter.Mount(devicePath, globalPDPath, GCEPD.FSType, flags, "") + if err != nil { + os.RemoveAll(globalPDPath) + return err + } + } else if err != nil { + return err + } + return nil +} + +func getDeviceName(devicePath, canonicalDevicePath string) (string, error) { + isMatch := regexMatcher.MatchString(path.Base(canonicalDevicePath)) + if !isMatch { + return "", fmt.Errorf("unexpected device: %s", canonicalDevicePath) + } + if isMatch { + result := make(map[string]string) + substrings := regexMatcher.FindStringSubmatch(path.Base(canonicalDevicePath)) + for i, label := range regexMatcher.SubexpNames() { + result[label] = substrings[i] + } + partition := result["partition"] + devicePath = strings.TrimSuffix(devicePath, "-part"+partition) + } + return strings.TrimPrefix(path.Base(devicePath), "google-"), nil +} + +// Unmounts the device and detaches the disk from the kubelet's host machine. +// Expects a GCE device path symlink. Ex: /dev/disk/by-id/google-mydisk-part1 +func (util *GCEDiskUtil) DetachDisk(GCEPD *GCEPersistentDisk, devicePath string) error { + // Follow the symlink to the actual device path. + canonicalDevicePath, err := filepath.EvalSymlinks(devicePath) + if err != nil { + return err + } + deviceName, err := getDeviceName(devicePath, canonicalDevicePath) + if err != nil { + return err + } + globalPDPath := makeGlobalPDName(GCEPD.RootDir, deviceName, GCEPD.ReadOnly) + if err := GCEPD.mounter.Unmount(globalPDPath, 0); err != nil { + return err + } + if err := os.RemoveAll(globalPDPath); err != nil { + return err + } + gce, err := cloudprovider.GetCloudProvider("gce", nil) + if err != nil { + return err + } + if err := gce.(*gce_cloud.GCECloud).DetachDisk(deviceName); err != nil { + return err + } + return nil +} diff --git a/pkg/volume/gce_util_test.go b/pkg/volume/gce_util_test.go new file mode 100644 index 00000000000..089e87fb561 --- /dev/null +++ b/pkg/volume/gce_util_test.go @@ -0,0 +1,55 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volume + +import ( + "testing" +) + +func TestGetDeviceName(t *testing.T) { + tests := []struct { + deviceName string + canonicalName string + expectedName string + expectError bool + }{ + { + deviceName: "/dev/google-sd0-part0", + canonicalName: "/dev/google/sd0P1", + expectedName: "sd0", + }, + { + canonicalName: "0123456", + expectError: true, + }, + } + for _, test := range tests { + name, err := getDeviceName(test.deviceName, test.canonicalName) + if test.expectError { + if err == nil { + t.Error("unexpected non-error") + } + continue + } + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if name != test.expectedName { + t.Errorf("expected: %s, got %s", test.expectedName, name) + } + } +} diff --git a/pkg/volume/mounter_linux.go b/pkg/volume/mounter_linux.go new file mode 100644 index 00000000000..fc7e6f28ded --- /dev/null +++ b/pkg/volume/mounter_linux.go @@ -0,0 +1,86 @@ +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volume + +import ( + "bufio" + "io" + "os" + "regexp" + "strings" + "syscall" +) + +const MOUNT_MS_BIND = syscall.MS_BIND +const MOUNT_MS_RDONLY = syscall.MS_RDONLY + +type DiskMounter struct{} + +// Wraps syscall.Mount() +func (mounter *DiskMounter) Mount(source string, target string, fstype string, flags uintptr, data string) error { + return syscall.Mount(source, target, fstype, flags, data) +} + +// Wraps syscall.Unmount() +func (mounter *DiskMounter) Unmount(target string, flags int) error { + return syscall.Unmount(target, flags) +} + +// Examines /proc/mounts to find the source device of the PD resource and the +// number of references to that device. Returns both the full device path under +// the /dev tree and the number of references. +func (mounter *DiskMounter) RefCount(mount Interface) (string, int, error) { + // TODO(jonesdl) This can be split up into two procedures, finding the device path + // and finding the number of references. The parsing could also be separated and another + // utility could determine if a volume's path is an active mount point. + file, err := os.Open("/proc/mounts") + if err != nil { + return "", -1, err + } + defer file.Close() + scanner := bufio.NewReader(file) + refCount := 0 + var deviceName string + // Find the actual device path. + for { + line, err := scanner.ReadString('\n') + if err == io.EOF { + break + } + success, err := regexp.MatchString(mount.GetPath(), line) + if err != nil { + return "", -1, err + } + if success { + deviceName = strings.Split(line, " ")[0] + } + } + file.Close() + file, err = os.Open("/proc/mounts") + scanner.Reset(bufio.NewReader(file)) + // Find the number of references to the device. + for { + line, err := scanner.ReadString('\n') + if err == io.EOF { + break + } + if strings.Split(line, " ")[0] == deviceName { + refCount++ + } + } + return deviceName, refCount, nil +} diff --git a/pkg/volume/mounter_unsupported.go b/pkg/volume/mounter_unsupported.go new file mode 100644 index 00000000000..d861045fc8b --- /dev/null +++ b/pkg/volume/mounter_unsupported.go @@ -0,0 +1,36 @@ +// +build !linux + +/* +Copyright 2014 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volume + +const MOUNT_MS_BIND = 0 +const MOUNT_MS_RDONLY = 0 + +type DiskMounter struct{} + +func (mounter *DiskMounter) Mount(source string, target string, fstype string, flags uintptr, data string) error { + return nil +} + +func (mounter *DiskMounter) Unmount(target string, flags int) error { + return nil +} + +func (mounter *DiskMounter) RefCount(PD Interface) (string, int, error) { + return "", 0, nil +} diff --git a/pkg/volume/volume.go b/pkg/volume/volume.go index a34accfddfe..99d5adc3e37 100644 --- a/pkg/volume/volume.go +++ b/pkg/volume/volume.go @@ -21,6 +21,7 @@ import ( "io/ioutil" "os" "path" + "strconv" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/golang/glog" @@ -49,6 +50,22 @@ type Cleaner interface { TearDown() error } +type gcePersistentDiskUtil interface { + // Attaches the disk to the kubelet's host machine. + AttachDisk(PD *GCEPersistentDisk) error + // Detaches the disk from the kubelet's host machine. + DetachDisk(PD *GCEPersistentDisk, devicePath string) error +} + +// Mounters wrap os/system specific calls to perform mounts. +type mounter interface { + Mount(source string, target string, fstype string, flags uintptr, data string) error + Unmount(target string, flags int) error + // RefCount returns the device path for the source disk of a volume, and + // the number of references to that target disk. + RefCount(vol Interface) (string, int, error) +} + // HostDir volumes represent a bare host directory mount. // The directory in Path will be directly exposed to the container. type HostDir struct { @@ -118,11 +135,128 @@ func createHostDir(volume *api.Volume) *HostDir { return &HostDir{volume.Source.HostDir.Path} } +// GCEPersistentDisk volumes are disk resources provided by Google Compute Engine +// that are attached to the kubelet's host machine and exposed to the pod. +type GCEPersistentDisk struct { + Name string + PodID string + RootDir string + // Unique identifier of the PD, used to find the disk resource in the provider. + PDName string + // Filesystem type, optional. + FSType string + // Specifies the partition to mount + Partition string + // Specifies whether the disk will be attached as ReadOnly. + ReadOnly bool + // Utility interface that provides API calls to the provider to attach/detach disks. + util gcePersistentDiskUtil + // Mounter interface that provides system calls to mount the disks. + mounter mounter +} + +func (PD *GCEPersistentDisk) GetPath() string { + return path.Join(PD.RootDir, PD.PodID, "volumes", "gce-pd", PD.Name) +} + +// Attaches the disk and bind mounts to the volume path. +func (PD *GCEPersistentDisk) SetUp() error { + // TODO: handle failed mounts here. + if _, err := os.Stat(PD.GetPath()); !os.IsNotExist(err) { + return nil + } + err := PD.util.AttachDisk(PD) + if err != nil { + return err + } + flags := uintptr(0) + if PD.ReadOnly { + flags = MOUNT_MS_RDONLY + } + //Perform a bind mount to the full path to allow duplicate mounts of the same PD. + if _, err = os.Stat(PD.GetPath()); os.IsNotExist(err) { + err = os.MkdirAll(PD.GetPath(), 0750) + if err != nil { + return err + } + globalPDPath := makeGlobalPDName(PD.RootDir, PD.PDName, PD.ReadOnly) + err = PD.mounter.Mount(globalPDPath, PD.GetPath(), "", MOUNT_MS_BIND|flags, "") + if err != nil { + os.RemoveAll(PD.GetPath()) + return err + } + } + return nil +} + +// Unmounts the bind mount, and detaches the disk only if the PD +// resource was the last reference to that disk on the kubelet. +func (PD *GCEPersistentDisk) TearDown() error { + devicePath, refCount, err := PD.mounter.RefCount(PD) + if err != nil { + return err + } + if err := PD.mounter.Unmount(PD.GetPath(), 0); err != nil { + return err + } + refCount-- + if err := os.RemoveAll(PD.GetPath()); err != nil { + return err + } + if err != nil { + return err + } + // If refCount is 1, then all bind mounts have been removed, and the + // remaining reference is the global mount. It is safe to detach. + if refCount == 1 { + if err := PD.util.DetachDisk(PD, devicePath); err != nil { + return err + } + } + return nil +} + +//TODO(jonesdl) prevent name collisions by using designated pod space as well. +// Ex. (ROOT_DIR)/pods/... +func makeGlobalPDName(rootDir, devName string, readOnly bool) string { + var mode string + if readOnly { + mode = "ro" + } else { + mode = "rw" + } + return path.Join(rootDir, "global", "pd", mode, devName) +} + // createEmptyDir interprets API volume as an EmptyDir. func createEmptyDir(volume *api.Volume, podID string, rootDir string) *EmptyDir { return &EmptyDir{volume.Name, podID, rootDir} } +// Interprets API volume as a PersistentDisk +func createGCEPersistentDisk(volume *api.Volume, podID string, rootDir string) (*GCEPersistentDisk, error) { + PDName := volume.Source.GCEPersistentDisk.PDName + FSType := volume.Source.GCEPersistentDisk.FSType + partition := strconv.Itoa(volume.Source.GCEPersistentDisk.Partition) + if partition == "0" { + partition = "" + } + readOnly := volume.Source.GCEPersistentDisk.ReadOnly + // TODO: move these up into the Kubelet. + util := &GCEDiskUtil{} + mounter := &DiskMounter{} + return &GCEPersistentDisk{ + Name: volume.Name, + PodID: podID, + RootDir: rootDir, + PDName: PDName, + FSType: FSType, + Partition: partition, + ReadOnly: readOnly, + util: util, + mounter: mounter}, nil +} + // CreateVolumeBuilder returns a Builder capable of mounting a volume described by an // *api.Volume, or an error. func CreateVolumeBuilder(volume *api.Volume, podID string, rootDir string) (Builder, error) { @@ -133,12 +267,18 @@ func CreateVolumeBuilder(volume *api.Volume, podID string, rootDir string) (Buil return nil, nil } var vol Builder + var err error // TODO(jonesdl) We should probably not check every pointer and directly // resolve these types instead. if source.HostDir != nil { vol = createHostDir(volume) } else if source.EmptyDir != nil { vol = createEmptyDir(volume, podID, rootDir) + } else if source.GCEPersistentDisk != nil { + vol, err = createGCEPersistentDisk(volume, podID, rootDir) + if err != nil { + return nil, err + } } else { return nil, ErrUnsupportedVolumeType } @@ -150,6 +290,13 @@ func CreateVolumeCleaner(kind string, name string, podID string, rootDir string) switch kind { case "empty": return &EmptyDir{name, podID, rootDir}, nil + case "gce-pd": + return &GCEPersistentDisk{ + Name: name, + PodID: podID, + RootDir: rootDir, + util: &GCEDiskUtil{}, + mounter: &DiskMounter{}}, nil default: return nil, ErrUnsupportedVolumeType } @@ -159,10 +306,9 @@ func CreateVolumeCleaner(kind string, name string, podID string, rootDir string) // presently active and mounted. Returns a map of Cleaner types. func GetCurrentVolumes(rootDirectory string) map[string]Cleaner { currentVolumes := make(map[string]Cleaner) - mountPath := rootDirectory - podIDDirs, err := ioutil.ReadDir(mountPath) + podIDDirs, err := ioutil.ReadDir(rootDirectory) if err != nil { - glog.Errorf("Could not read directory: %s, (%s)", mountPath, err) + glog.Errorf("Could not read directory: %s, (%s)", rootDirectory, err) } // Volume information is extracted from the directory structure: // (ROOT_DIR)/(POD_ID)/volumes/(VOLUME_KIND)/(VOLUME_NAME) @@ -171,7 +317,10 @@ func GetCurrentVolumes(rootDirectory string) map[string]Cleaner { continue } podID := podIDDir.Name() - podIDPath := path.Join(mountPath, podID, "volumes") + podIDPath := path.Join(rootDirectory, podID, "volumes") + if _, err := os.Stat(podIDPath); os.IsNotExist(err) { + continue + } volumeKindDirs, err := ioutil.ReadDir(podIDPath) if err != nil { glog.Errorf("Could not read directory: %s, (%s)", podIDPath, err) @@ -189,7 +338,7 @@ func GetCurrentVolumes(rootDirectory string) map[string]Cleaner { // TODO(thockin) This should instead return a reference to an extant volume object cleaner, err := CreateVolumeCleaner(volumeKind, volumeName, podID, rootDirectory) if err != nil { - glog.Errorf("Could not create volume cleaner: %s, (%s)", volumeNameDirs, err) + glog.Errorf("Could not create volume cleaner: %s, (%s)", volumeNameDir.Name(), err) continue } currentVolumes[identifier] = cleaner diff --git a/pkg/volume/volume_test.go b/pkg/volume/volume_test.go index c2eda876af1..a62bda30db1 100644 --- a/pkg/volume/volume_test.go +++ b/pkg/volume/volume_test.go @@ -20,22 +20,52 @@ import ( "io/ioutil" "os" "path" + "reflect" "testing" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" ) -func TestCreateVolumeBuilders(t *testing.T) { - tempDir, err := ioutil.TempDir("", "CreateVolumes") +type MockDiskUtil struct{} + +// TODO(jonesdl) To fully test this, we could create a loopback device +// and mount that instead. +func (util *MockDiskUtil) AttachDisk(PD *GCEPersistentDisk) error { + err := os.MkdirAll(path.Join(PD.RootDir, "global", "pd", PD.PDName), 0750) if err != nil { - t.Errorf("Unexpected error: %v", err) + return err } - defer os.RemoveAll(tempDir) + return nil +} + +func (util *MockDiskUtil) DetachDisk(PD *GCEPersistentDisk, devicePath string) error { + err := os.RemoveAll(path.Join(PD.RootDir, "global", "pd", PD.PDName)) + if err != nil { + return err + } + return nil +} + +type MockMounter struct{} + +func (mounter *MockMounter) Mount(source string, target string, fstype string, flags uintptr, data string) error { + return nil +} + +func (mounter *MockMounter) Unmount(target string, flags int) error { + return nil +} + +func (mounter *MockMounter) RefCount(vol Interface) (string, int, error) { + return "", 0, nil +} + +func TestCreateVolumeBuilders(t *testing.T) { + tempDir := "CreateVolumes" createVolumesTests := []struct { volume api.Volume path string podID string - kind string }{ { api.Volume{ @@ -45,7 +75,6 @@ func TestCreateVolumeBuilders(t *testing.T) { }, }, "/dir/path", - "my-id", "", }, { @@ -57,9 +86,18 @@ func TestCreateVolumeBuilders(t *testing.T) { }, path.Join(tempDir, "/my-id/volumes/empty/empty-dir"), "my-id", - "empty", }, - {api.Volume{}, "", "", ""}, + { + api.Volume{ + Name: "gce-pd", + Source: &api.VolumeSource{ + GCEPersistentDisk: &api.GCEPersistentDisk{"my-disk", "ext4", 0, false}, + }, + }, + path.Join(tempDir, "/my-id/volumes/gce-pd/gce-pd"), + "my-id", + }, + {api.Volume{}, "", ""}, { api.Volume{ Name: "empty-dir", @@ -67,7 +105,6 @@ func TestCreateVolumeBuilders(t *testing.T) { }, "", "", - "", }, } for _, createVolumesTest := range createVolumesTests { @@ -79,7 +116,7 @@ func TestCreateVolumeBuilders(t *testing.T) { } continue } - if tt.volume.Source.HostDir == nil && tt.volume.Source.EmptyDir == nil { + if tt.volume.Source.HostDir == nil && tt.volume.Source.EmptyDir == nil && tt.volume.Source.GCEPersistentDisk == nil { if err != ErrUnsupportedVolumeType { t.Errorf("Unexpected error: %v", err) } @@ -88,22 +125,68 @@ func TestCreateVolumeBuilders(t *testing.T) { if err != nil { t.Errorf("Unexpected error: %v", err) } - err = vb.SetUp() - if err != nil { - t.Errorf("Unexpected error: %v", err) - } path := vb.GetPath() if path != tt.path { t.Errorf("Unexpected bind path. Expected %v, got %v", tt.path, path) } - vc, err := CreateVolumeCleaner(tt.kind, tt.volume.Name, tt.podID, tempDir) - if tt.kind == "" { - if err != ErrUnsupportedVolumeType { - t.Errorf("Unexpected error: %v", err) - } + } +} + +func TestCreateVolumeCleaners(t *testing.T) { + tempDir := "CreateVolumeCleaners" + createVolumeCleanerTests := []struct { + kind string + name string + podID string + }{ + {"empty", "empty-vol", "my-id"}, + {"", "", ""}, + {"gce-pd", "gce-pd-vol", "my-id"}, + } + for _, tt := range createVolumeCleanerTests { + vol, err := CreateVolumeCleaner(tt.kind, tt.name, tt.podID, tempDir) + if tt.kind == "" && err != nil && vol == nil { continue } - err = vc.TearDown() + if err != nil { + t.Errorf("Unexpected error occured: %s", err) + } + actualKind := reflect.TypeOf(vol).Elem().Name() + if tt.kind == "empty" && actualKind != "EmptyDir" { + t.Errorf("CreateVolumeCleaner returned invalid type. Expected EmptyDirectory, got %v, %v", tt.kind, actualKind) + } + if tt.kind == "gce-pd" && actualKind != "GCEPersistentDisk" { + t.Errorf("CreateVolumeCleaner returned invalid type. Expected PersistentDisk, got %v, %v", tt.kind, actualKind) + } + } +} + +func TestSetUpAndTearDown(t *testing.T) { + tempDir, err := ioutil.TempDir("", "CreateVolumes") + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + defer os.RemoveAll(tempDir) + fakeID := "my-id" + type VolumeTester interface { + Builder + Cleaner + } + volumes := []VolumeTester{ + &EmptyDir{"empty", fakeID, tempDir}, + &GCEPersistentDisk{"pd", fakeID, tempDir, "pd-disk", "ext4", "", false, &MockDiskUtil{}, &MockMounter{}}, + } + + for _, vol := range volumes { + err = vol.SetUp() + path := vol.GetPath() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Errorf("SetUp() failed, volume path not created: %v", path) + } + err = vol.TearDown() if err != nil { t.Errorf("Unexpected error: %v", err) }