From 41bbaf9cfb8e9fa485d7699b4828a1cc93d33968 Mon Sep 17 00:00:00 2001 From: haruband <haruband@gmail.com> Date: Thu, 4 May 2023 12:20:38 +0900 Subject: [PATCH] Fix multiple connections problems to one same nqn in a single node (#23) * fix: make multiple connections to one same nqn possible in a single node Multiple connections to one same nqn in a single node return -EALREADY error. So, we should use a hostnqn to make every connections seperate. It is appropriate to use a target path from a volume request as a hostnqn. * fix: make ro and rw mounts for one same nqn possible in a single node ReadOnly and ReadWrite mounts for one same nqn in a single node return -EBUSY error. So, we should remove readonly and readwrite options when mounting a block device in a csi driver. A cri runtime makes a mount as readonly inside containers if needed. * fix: add fallback supports for no hostnqn sysfs file supports Directories per each nqn will be created in /run/nvmf and files per each hostnqn will be created in /run/nvmf/{nqn}. If linux kernel has no hostnqn sysfs file supports, we will disconnect all connections for a nqn at once when a directory for the nqn is empty. * refactor: rename sysfs_nqn_path to sysfs_subsysnqn_path * refactor: use filepath.Join instead of strings.Join * fix: add explicitly UnsupportedHostnqnError for fallback supports If linux kernel has no hostnqn sysfs file supports, return UnsupportedHostnqnError and switch to fallback mode which will disconnect all connections at once when a last controller in a nqn is disconnected. * fix: return directly after fallback mode * fix: mount the host's /run/nvmf directory to csi-node-driver --- deploy/kubernetes/csi-nvmf-node.yaml | 8 +- pkg/nvmf/const.go | 1 + pkg/nvmf/errors.go | 36 +++++++ pkg/nvmf/fabrics.go | 149 ++++++++++++++++++++++++--- pkg/nvmf/nodeserver.go | 1 - pkg/nvmf/nvmf.go | 18 +--- 6 files changed, 178 insertions(+), 35 deletions(-) create mode 100644 pkg/nvmf/errors.go diff --git a/deploy/kubernetes/csi-nvmf-node.yaml b/deploy/kubernetes/csi-nvmf-node.yaml index 011feb4..56c8ada 100644 --- a/deploy/kubernetes/csi-nvmf-node.yaml +++ b/deploy/kubernetes/csi-nvmf-node.yaml @@ -73,6 +73,8 @@ spec: - name: pods-mount-dir mountPath: /var/lib/kubelet/pods mountPropagation: "Bidirectional" + - name: run-nvmf-dir + mountPath: /run/nvmf - name: host-dev mountPath: /dev mountPropagation: "HostToContainer" @@ -94,6 +96,10 @@ spec: hostPath: path: /var/lib/kubelet/pods type: Directory + - name: run-nvmf-dir + hostPath: + path: /run/nvmf + type: DirectoryOrCreate - name: host-dev hostPath: path: /dev @@ -102,4 +108,4 @@ spec: path: /sys - name: lib-modules hostPath: - path: /lib/modules \ No newline at end of file + path: /lib/modules diff --git a/pkg/nvmf/const.go b/pkg/nvmf/const.go index 3ed73cb..6a5c3fc 100644 --- a/pkg/nvmf/const.go +++ b/pkg/nvmf/const.go @@ -18,6 +18,7 @@ package nvmf const ( NVMF_NQN_SIZE = 223 SYS_NVMF = "/sys/class/nvme" + RUN_NVMF = "/run/nvmf" ) // Here erron diff --git a/pkg/nvmf/errors.go b/pkg/nvmf/errors.go new file mode 100644 index 0000000..736a417 --- /dev/null +++ b/pkg/nvmf/errors.go @@ -0,0 +1,36 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nvmf + +import "fmt" + +type NoControllerError struct { + Nqn string + Hostnqn string +} + +func (e *NoControllerError) Error() string { + return fmt.Sprintf("not found controller: nqn=%s, hostnqn=%s", e.Nqn, e.Hostnqn) +} + +type UnsupportedHostnqnError struct { + Target string +} + +func (e *UnsupportedHostnqnError) Error() string { + return fmt.Sprintf("unsupported hostnqn sysfs file: target=%s", e.Target) +} diff --git a/pkg/nvmf/fabrics.go b/pkg/nvmf/fabrics.go index 7b0672f..4fe4a80 100644 --- a/pkg/nvmf/fabrics.go +++ b/pkg/nvmf/fabrics.go @@ -17,10 +17,12 @@ limitations under the License. package nvmf import ( + b64 "encoding/base64" "encoding/json" "fmt" "io/ioutil" "os" + "path/filepath" "strings" "github.com/kubernetes-csi/csi-driver-nvmf/pkg/utils" @@ -34,11 +36,12 @@ type Connector struct { TargetAddr string TargetPort string Transport string + HostNqn string RetryCount int32 CheckInterval int32 } -func getNvmfConnector(nvmfInfo *nvmfDiskInfo) *Connector { +func getNvmfConnector(nvmfInfo *nvmfDiskInfo, hostnqn string) *Connector { return &Connector{ VolumeID: nvmfInfo.VolName, DeviceUUID: nvmfInfo.DeviceUUID, @@ -46,6 +49,7 @@ func getNvmfConnector(nvmfInfo *nvmfDiskInfo) *Connector { TargetAddr: nvmfInfo.Addr, TargetPort: nvmfInfo.Port, Transport: nvmfInfo.Transport, + HostNqn: hostnqn, } } @@ -84,44 +88,109 @@ func _disconnect(sysfs_path string) error { return nil } -func disconnectSubsys(nqn, ctrl string) (res bool) { - sysfs_nqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl) +func disconnectSubsysWithHostNqn(nqn, hostnqn, ctrl string) error { + sysfs_subsysnqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl) + sysfs_hostnqn_path := fmt.Sprintf("%s/%s/hostnqn", SYS_NVMF, ctrl) sysfs_del_path := fmt.Sprintf("%s/%s/delete_controller", SYS_NVMF, ctrl) - file, err := os.Open(sysfs_nqn_path) + file, err := os.Open(sysfs_subsysnqn_path) if err != nil { - klog.Errorf("Disconnect: open file %s err: %v", file.Name(), err) - return false + klog.Errorf("Disconnect: open file %s err: %v", sysfs_subsysnqn_path, err) + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} } defer file.Close() lines, err := utils.ReadLinesFromFile(file) if err != nil { klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err) - return false + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} } if lines[0] != nqn { klog.Warningf("Disconnect: not this subsystem, skip") - return false + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} + } + + file, err = os.Open(sysfs_hostnqn_path) + if err != nil { + klog.Errorf("Disconnect: open file %s err: %v", sysfs_hostnqn_path, err) + return &UnsupportedHostnqnError{Target: sysfs_hostnqn_path} + } + defer file.Close() + + lines, err = utils.ReadLinesFromFile(file) + if err != nil { + klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err) + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} + } + + if lines[0] != hostnqn { + klog.Warningf("Disconnect: not this subsystem, skip") + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} } err = _disconnect(sysfs_del_path) if err != nil { klog.Errorf("Disconnect: disconnect error: %s", err) - return false + return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn} } - return true + return nil } -func disconnectByNqn(nqn string) int { +func disconnectSubsys(nqn, ctrl string) error { + sysfs_subsysnqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl) + sysfs_del_path := fmt.Sprintf("%s/%s/delete_controller", SYS_NVMF, ctrl) + + file, err := os.Open(sysfs_subsysnqn_path) + if err != nil { + klog.Errorf("Disconnect: open file %s err: %v", sysfs_subsysnqn_path, err) + return &NoControllerError{Nqn: nqn, Hostnqn: ""} + } + defer file.Close() + + lines, err := utils.ReadLinesFromFile(file) + if err != nil { + klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err) + return &NoControllerError{Nqn: nqn, Hostnqn: ""} + } + + if lines[0] != nqn { + klog.Warningf("Disconnect: not this subsystem, skip") + return &NoControllerError{Nqn: nqn, Hostnqn: ""} + } + + err = _disconnect(sysfs_del_path) + if err != nil { + klog.Errorf("Disconnect: disconnect error: %s", err) + return &NoControllerError{Nqn: nqn, Hostnqn: ""} + } + + return nil +} + +func disconnectByNqn(nqn, hostnqn string) int { ret := 0 if len(nqn) > NVMF_NQN_SIZE { klog.Errorf("Disconnect: nqn %s is too long ", nqn) return -EINVAL } + // delete hostnqn file + hostnqnPath := filepath.Join(RUN_NVMF, nqn, b64.StdEncoding.EncodeToString([]byte(hostnqn))) + os.Remove(hostnqnPath) + + // delete nqn directory if has no hostnqn files + nqnPath := filepath.Join(RUN_NVMF, nqn) + hostnqns, err := ioutil.ReadDir(nqnPath) + if err != nil { + klog.Errorf("Disconnect: readdir %s err: %v", nqnPath, err) + return -ENOENT + } + if len(hostnqns) <= 0 { + os.RemoveAll(nqnPath) + } + devices, err := ioutil.ReadDir(SYS_NVMF) if err != nil { klog.Errorf("Disconnect: readdir %s err: %s", SYS_NVMF, err) @@ -129,10 +198,32 @@ func disconnectByNqn(nqn string) int { } for _, device := range devices { - if disconnectSubsys(nqn, device.Name()) { + if err := disconnectSubsysWithHostNqn(nqn, hostnqn, device.Name()); err != nil { + if _, ok := err.(*UnsupportedHostnqnError); ok { + klog.Infof("Fallback because you have no hostnqn supports!") + + // disconnect all controllers if has no hostnqn files + if len(hostnqns) <= 0 { + devices, err := ioutil.ReadDir(SYS_NVMF) + if err != nil { + klog.Errorf("Disconnect: readdir %s err: %s", SYS_NVMF, err) + return -ENOENT + } + + for _, device := range devices { + if err := disconnectSubsys(nqn, device.Name()); err == nil { + ret++ + } + } + } + + return ret + } + } else { ret++ } } + return ret } @@ -154,7 +245,7 @@ func (c *Connector) Connect() (string, error) { return "", fmt.Errorf("csi transport only support tcp/rdma ") } - baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort) + baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s,hostnqn=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort, c.HostNqn) devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", c.DeviceUUID}, ".") // connect to nvmf disk @@ -162,25 +253,49 @@ func (c *Connector) Connect() (string, error) { if err != nil { return "", err } - klog.Infof("Connect Volume %s success nqn: %s", c.VolumeID, c.TargetNqn) + klog.Infof("Connect Volume %s success nqn: %s, hostnqn: %s", c.VolumeID, c.TargetNqn, c.HostNqn) retries := int(c.RetryCount / c.CheckInterval) if exists, err := waitForPathToExist(devicePath, retries, int(c.CheckInterval), c.Transport); !exists { klog.Errorf("connect nqn %s error %v, rollback!!!", c.TargetNqn, err) - ret := disconnectByNqn(c.TargetNqn) + ret := disconnectByNqn(c.TargetNqn, c.HostNqn) if ret < 0 { klog.Errorf("rollback error !!!") } return "", err } + // create nqn directory + nqnPath := filepath.Join(RUN_NVMF, c.TargetNqn) + if err := os.MkdirAll(nqnPath, 0750); err != nil { + klog.Errorf("create nqn directory %s error %v, rollback!!!", c.TargetNqn, err) + ret := disconnectByNqn(c.TargetNqn, c.HostNqn) + if ret < 0 { + klog.Errorf("rollback error !!!") + } + return "", err + } + + // create hostnqn file + hostnqnPath := filepath.Join(RUN_NVMF, c.TargetNqn, b64.StdEncoding.EncodeToString([]byte(c.HostNqn))) + file, err := os.Create(hostnqnPath) + if err != nil { + klog.Errorf("create hostnqn file %s:%s error %v, rollback!!!", c.TargetNqn, c.HostNqn, err) + ret := disconnectByNqn(c.TargetNqn, c.HostNqn) + if ret < 0 { + klog.Errorf("rollback error !!!") + } + return "", err + } + defer file.Close() + klog.Infof("After connect we're returning devicePath: %s", devicePath) return devicePath, nil } // we disconnect only by nqn func (c *Connector) Disconnect() error { - ret := disconnectByNqn(c.TargetNqn) - if ret == 0 { + ret := disconnectByNqn(c.TargetNqn, c.HostNqn) + if ret < 0 { return fmt.Errorf("Disconnect: failed to disconnect by nqn: %s ", c.TargetNqn) } diff --git a/pkg/nvmf/nodeserver.go b/pkg/nvmf/nodeserver.go index d104ce3..97cf460 100644 --- a/pkg/nvmf/nodeserver.go +++ b/pkg/nvmf/nodeserver.go @@ -53,7 +53,6 @@ func (n *NodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCa } func (n *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { - // 1. check parameters if req.GetVolumeCapability() == nil { return nil, status.Errorf(codes.InvalidArgument, "NodePublishVolume missing Volume Capability in req.") diff --git a/pkg/nvmf/nvmf.go b/pkg/nvmf/nvmf.go index 18950de..491f7db 100644 --- a/pkg/nvmf/nvmf.go +++ b/pkg/nvmf/nvmf.go @@ -88,7 +88,7 @@ func getNVMfDiskMounter(nvmfInfo *nvmfDiskInfo, req *csi.NodePublishVolumeReques mounter: &mount.SafeFormatAndMount{Interface: mount.New(""), Exec: exec.New()}, exec: exec.New(), targetPath: req.GetTargetPath(), - connector: getNvmfConnector(nvmfInfo), + connector: getNvmfConnector(nvmfInfo, req.GetTargetPath()), } } @@ -142,11 +142,6 @@ func AttachDisk(req *csi.NodePublishVolumeRequest, nm nvmfDiskMounter) (string, // Tips: use k8s mounter to mount fs and only support "ext4" var options []string - if nm.readOnly { - options = append(options, "ro") - } else { - options = append(options, "rw") - } options = append(options, nm.mountOptions...) err = nm.mounter.FormatAndMount(devicePath, mntPath, nm.fsType, options) if err != nil { @@ -161,25 +156,16 @@ func AttachDisk(req *csi.NodePublishVolumeRequest, nm nvmfDiskMounter) (string, } func DetachDisk(volumeID string, num *nvmfDiskUnMounter, targetPath string) error { - _, cnt, err := mount.GetDeviceNameFromMount(num.mounter, targetPath) - if err != nil { - klog.Errorf("nvmf detach disk: failed to get device from mnt: %s\nError: %v", targetPath, err) - return err - } if pathExists, pathErr := mount.PathExists(targetPath); pathErr != nil { return fmt.Errorf("Error checking if path exists: %v", pathErr) } else if !pathExists { klog.Warningf("Warning: Unmount skipped because path does not exist: %v", targetPath) return nil } - if err = num.mounter.Unmount(targetPath); err != nil { + if err := num.mounter.Unmount(targetPath); err != nil { klog.Errorf("iscsi detach disk: failed to unmount: %s\nError: %v", targetPath, err) return err } - cnt-- - if cnt != 0 { - return nil - } connector, err := GetConnectorFromFile(targetPath + ".json") if err != nil {