From 41bbaf9cfb8e9fa485d7699b4828a1cc93d33968 Mon Sep 17 00:00:00 2001
From: haruband <haruband@gmail.com>
Date: Thu, 4 May 2023 12:20:38 +0900
Subject: [PATCH] Fix multiple connections problems to one same nqn in a single
 node (#23)

* fix: make multiple connections to one same nqn possible in a single node

Multiple connections to one same nqn in a single node return -EALREADY error.
So, we should use a hostnqn to make every connections seperate. It is appropriate
to use a target path from a volume request as a hostnqn.

* fix: make ro and rw mounts for one same nqn possible in a single node

ReadOnly and ReadWrite mounts for one same nqn in a single node return -EBUSY error.
So, we should remove readonly and readwrite options when mounting a block device in a
csi driver. A cri runtime makes a mount as readonly inside containers if needed.

* fix: add fallback supports for no hostnqn sysfs file supports

Directories per each nqn will be created in /run/nvmf and files per each hostnqn
will be created in /run/nvmf/{nqn}. If linux kernel has no hostnqn sysfs file
supports, we will disconnect all connections for a nqn at once when a directory for
the nqn is empty.

* refactor: rename sysfs_nqn_path to sysfs_subsysnqn_path

* refactor: use filepath.Join instead of strings.Join

* fix: add explicitly UnsupportedHostnqnError for fallback supports

If linux kernel has no hostnqn sysfs file supports, return UnsupportedHostnqnError
and switch to fallback mode which will disconnect all connections at once when
a last controller in a nqn is disconnected.

* fix: return directly after fallback mode

* fix: mount the host's /run/nvmf directory to csi-node-driver
---
 deploy/kubernetes/csi-nvmf-node.yaml |   8 +-
 pkg/nvmf/const.go                    |   1 +
 pkg/nvmf/errors.go                   |  36 +++++++
 pkg/nvmf/fabrics.go                  | 149 ++++++++++++++++++++++++---
 pkg/nvmf/nodeserver.go               |   1 -
 pkg/nvmf/nvmf.go                     |  18 +---
 6 files changed, 178 insertions(+), 35 deletions(-)
 create mode 100644 pkg/nvmf/errors.go

diff --git a/deploy/kubernetes/csi-nvmf-node.yaml b/deploy/kubernetes/csi-nvmf-node.yaml
index 011feb4..56c8ada 100644
--- a/deploy/kubernetes/csi-nvmf-node.yaml
+++ b/deploy/kubernetes/csi-nvmf-node.yaml
@@ -73,6 +73,8 @@ spec:
             - name: pods-mount-dir
               mountPath: /var/lib/kubelet/pods
               mountPropagation: "Bidirectional"
+            - name: run-nvmf-dir
+              mountPath: /run/nvmf
             - name: host-dev
               mountPath: /dev
               mountPropagation: "HostToContainer"
@@ -94,6 +96,10 @@ spec:
           hostPath:
             path: /var/lib/kubelet/pods
             type: Directory
+        - name: run-nvmf-dir
+          hostPath:
+            path: /run/nvmf
+            type: DirectoryOrCreate
         - name: host-dev
           hostPath:
             path: /dev
@@ -102,4 +108,4 @@ spec:
             path: /sys
         - name: lib-modules
           hostPath:
-            path: /lib/modules
\ No newline at end of file
+            path: /lib/modules
diff --git a/pkg/nvmf/const.go b/pkg/nvmf/const.go
index 3ed73cb..6a5c3fc 100644
--- a/pkg/nvmf/const.go
+++ b/pkg/nvmf/const.go
@@ -18,6 +18,7 @@ package nvmf
 const (
 	NVMF_NQN_SIZE = 223
 	SYS_NVMF      = "/sys/class/nvme"
+	RUN_NVMF      = "/run/nvmf"
 )
 
 // Here erron
diff --git a/pkg/nvmf/errors.go b/pkg/nvmf/errors.go
new file mode 100644
index 0000000..736a417
--- /dev/null
+++ b/pkg/nvmf/errors.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package nvmf
+
+import "fmt"
+
+type NoControllerError struct {
+	Nqn     string
+	Hostnqn string
+}
+
+func (e *NoControllerError) Error() string {
+	return fmt.Sprintf("not found controller: nqn=%s, hostnqn=%s", e.Nqn, e.Hostnqn)
+}
+
+type UnsupportedHostnqnError struct {
+	Target string
+}
+
+func (e *UnsupportedHostnqnError) Error() string {
+	return fmt.Sprintf("unsupported hostnqn sysfs file: target=%s", e.Target)
+}
diff --git a/pkg/nvmf/fabrics.go b/pkg/nvmf/fabrics.go
index 7b0672f..4fe4a80 100644
--- a/pkg/nvmf/fabrics.go
+++ b/pkg/nvmf/fabrics.go
@@ -17,10 +17,12 @@ limitations under the License.
 package nvmf
 
 import (
+	b64 "encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"os"
+	"path/filepath"
 	"strings"
 
 	"github.com/kubernetes-csi/csi-driver-nvmf/pkg/utils"
@@ -34,11 +36,12 @@ type Connector struct {
 	TargetAddr    string
 	TargetPort    string
 	Transport     string
+	HostNqn       string
 	RetryCount    int32
 	CheckInterval int32
 }
 
-func getNvmfConnector(nvmfInfo *nvmfDiskInfo) *Connector {
+func getNvmfConnector(nvmfInfo *nvmfDiskInfo, hostnqn string) *Connector {
 	return &Connector{
 		VolumeID:   nvmfInfo.VolName,
 		DeviceUUID: nvmfInfo.DeviceUUID,
@@ -46,6 +49,7 @@ func getNvmfConnector(nvmfInfo *nvmfDiskInfo) *Connector {
 		TargetAddr: nvmfInfo.Addr,
 		TargetPort: nvmfInfo.Port,
 		Transport:  nvmfInfo.Transport,
+		HostNqn:    hostnqn,
 	}
 }
 
@@ -84,44 +88,109 @@ func _disconnect(sysfs_path string) error {
 	return nil
 }
 
-func disconnectSubsys(nqn, ctrl string) (res bool) {
-	sysfs_nqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl)
+func disconnectSubsysWithHostNqn(nqn, hostnqn, ctrl string) error {
+	sysfs_subsysnqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl)
+	sysfs_hostnqn_path := fmt.Sprintf("%s/%s/hostnqn", SYS_NVMF, ctrl)
 	sysfs_del_path := fmt.Sprintf("%s/%s/delete_controller", SYS_NVMF, ctrl)
 
-	file, err := os.Open(sysfs_nqn_path)
+	file, err := os.Open(sysfs_subsysnqn_path)
 	if err != nil {
-		klog.Errorf("Disconnect: open file %s err: %v", file.Name(), err)
-		return false
+		klog.Errorf("Disconnect: open file %s err: %v", sysfs_subsysnqn_path, err)
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
 	}
 	defer file.Close()
 
 	lines, err := utils.ReadLinesFromFile(file)
 	if err != nil {
 		klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err)
-		return false
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
 	}
 
 	if lines[0] != nqn {
 		klog.Warningf("Disconnect: not this subsystem, skip")
-		return false
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
+	}
+
+	file, err = os.Open(sysfs_hostnqn_path)
+	if err != nil {
+		klog.Errorf("Disconnect: open file %s err: %v", sysfs_hostnqn_path, err)
+		return &UnsupportedHostnqnError{Target: sysfs_hostnqn_path}
+	}
+	defer file.Close()
+
+	lines, err = utils.ReadLinesFromFile(file)
+	if err != nil {
+		klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err)
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
+	}
+
+	if lines[0] != hostnqn {
+		klog.Warningf("Disconnect: not this subsystem, skip")
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
 	}
 
 	err = _disconnect(sysfs_del_path)
 	if err != nil {
 		klog.Errorf("Disconnect: disconnect error: %s", err)
-		return false
+		return &NoControllerError{Nqn: nqn, Hostnqn: hostnqn}
 	}
 
-	return true
+	return nil
 }
 
-func disconnectByNqn(nqn string) int {
+func disconnectSubsys(nqn, ctrl string) error {
+	sysfs_subsysnqn_path := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, ctrl)
+	sysfs_del_path := fmt.Sprintf("%s/%s/delete_controller", SYS_NVMF, ctrl)
+
+	file, err := os.Open(sysfs_subsysnqn_path)
+	if err != nil {
+		klog.Errorf("Disconnect: open file %s err: %v", sysfs_subsysnqn_path, err)
+		return &NoControllerError{Nqn: nqn, Hostnqn: ""}
+	}
+	defer file.Close()
+
+	lines, err := utils.ReadLinesFromFile(file)
+	if err != nil {
+		klog.Errorf("Disconnect: read file %s err: %v", file.Name(), err)
+		return &NoControllerError{Nqn: nqn, Hostnqn: ""}
+	}
+
+	if lines[0] != nqn {
+		klog.Warningf("Disconnect: not this subsystem, skip")
+		return &NoControllerError{Nqn: nqn, Hostnqn: ""}
+	}
+
+	err = _disconnect(sysfs_del_path)
+	if err != nil {
+		klog.Errorf("Disconnect: disconnect error: %s", err)
+		return &NoControllerError{Nqn: nqn, Hostnqn: ""}
+	}
+
+	return nil
+}
+
+func disconnectByNqn(nqn, hostnqn string) int {
 	ret := 0
 	if len(nqn) > NVMF_NQN_SIZE {
 		klog.Errorf("Disconnect: nqn %s is too long ", nqn)
 		return -EINVAL
 	}
 
+	// delete hostnqn file
+	hostnqnPath := filepath.Join(RUN_NVMF, nqn, b64.StdEncoding.EncodeToString([]byte(hostnqn)))
+	os.Remove(hostnqnPath)
+
+	// delete nqn directory if has no hostnqn files
+	nqnPath := filepath.Join(RUN_NVMF, nqn)
+	hostnqns, err := ioutil.ReadDir(nqnPath)
+	if err != nil {
+		klog.Errorf("Disconnect: readdir %s err: %v", nqnPath, err)
+		return -ENOENT
+	}
+	if len(hostnqns) <= 0 {
+		os.RemoveAll(nqnPath)
+	}
+
 	devices, err := ioutil.ReadDir(SYS_NVMF)
 	if err != nil {
 		klog.Errorf("Disconnect: readdir %s err: %s", SYS_NVMF, err)
@@ -129,10 +198,32 @@ func disconnectByNqn(nqn string) int {
 	}
 
 	for _, device := range devices {
-		if disconnectSubsys(nqn, device.Name()) {
+		if err := disconnectSubsysWithHostNqn(nqn, hostnqn, device.Name()); err != nil {
+			if _, ok := err.(*UnsupportedHostnqnError); ok {
+				klog.Infof("Fallback because you have no hostnqn supports!")
+
+				// disconnect all controllers if has no hostnqn files
+				if len(hostnqns) <= 0 {
+					devices, err := ioutil.ReadDir(SYS_NVMF)
+					if err != nil {
+						klog.Errorf("Disconnect: readdir %s err: %s", SYS_NVMF, err)
+						return -ENOENT
+					}
+
+					for _, device := range devices {
+						if err := disconnectSubsys(nqn, device.Name()); err == nil {
+							ret++
+						}
+					}
+				}
+
+				return ret
+			}
+		} else {
 			ret++
 		}
 	}
+
 	return ret
 }
 
@@ -154,7 +245,7 @@ func (c *Connector) Connect() (string, error) {
 		return "", fmt.Errorf("csi transport only support tcp/rdma ")
 	}
 
-	baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort)
+	baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s,hostnqn=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort, c.HostNqn)
 	devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", c.DeviceUUID}, ".")
 
 	// connect to nvmf disk
@@ -162,25 +253,49 @@ func (c *Connector) Connect() (string, error) {
 	if err != nil {
 		return "", err
 	}
-	klog.Infof("Connect Volume %s success nqn: %s", c.VolumeID, c.TargetNqn)
+	klog.Infof("Connect Volume %s success nqn: %s, hostnqn: %s", c.VolumeID, c.TargetNqn, c.HostNqn)
 	retries := int(c.RetryCount / c.CheckInterval)
 	if exists, err := waitForPathToExist(devicePath, retries, int(c.CheckInterval), c.Transport); !exists {
 		klog.Errorf("connect nqn %s error %v, rollback!!!", c.TargetNqn, err)
-		ret := disconnectByNqn(c.TargetNqn)
+		ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
 		if ret < 0 {
 			klog.Errorf("rollback error !!!")
 		}
 		return "", err
 	}
 
+	// create nqn directory
+	nqnPath := filepath.Join(RUN_NVMF, c.TargetNqn)
+	if err := os.MkdirAll(nqnPath, 0750); err != nil {
+		klog.Errorf("create nqn directory %s error %v, rollback!!!", c.TargetNqn, err)
+		ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
+		if ret < 0 {
+			klog.Errorf("rollback error !!!")
+		}
+		return "", err
+	}
+
+	// create hostnqn file
+	hostnqnPath := filepath.Join(RUN_NVMF, c.TargetNqn, b64.StdEncoding.EncodeToString([]byte(c.HostNqn)))
+	file, err := os.Create(hostnqnPath)
+	if err != nil {
+		klog.Errorf("create hostnqn file %s:%s error %v, rollback!!!", c.TargetNqn, c.HostNqn, err)
+		ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
+		if ret < 0 {
+			klog.Errorf("rollback error !!!")
+		}
+		return "", err
+	}
+	defer file.Close()
+
 	klog.Infof("After connect we're returning devicePath: %s", devicePath)
 	return devicePath, nil
 }
 
 // we disconnect only by nqn
 func (c *Connector) Disconnect() error {
-	ret := disconnectByNqn(c.TargetNqn)
-	if ret == 0 {
+	ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
+	if ret < 0 {
 		return fmt.Errorf("Disconnect: failed to disconnect by nqn: %s ", c.TargetNqn)
 	}
 
diff --git a/pkg/nvmf/nodeserver.go b/pkg/nvmf/nodeserver.go
index d104ce3..97cf460 100644
--- a/pkg/nvmf/nodeserver.go
+++ b/pkg/nvmf/nodeserver.go
@@ -53,7 +53,6 @@ func (n *NodeServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCa
 }
 
 func (n *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
-
 	// 1. check parameters
 	if req.GetVolumeCapability() == nil {
 		return nil, status.Errorf(codes.InvalidArgument, "NodePublishVolume missing Volume Capability in req.")
diff --git a/pkg/nvmf/nvmf.go b/pkg/nvmf/nvmf.go
index 18950de..491f7db 100644
--- a/pkg/nvmf/nvmf.go
+++ b/pkg/nvmf/nvmf.go
@@ -88,7 +88,7 @@ func getNVMfDiskMounter(nvmfInfo *nvmfDiskInfo, req *csi.NodePublishVolumeReques
 		mounter:      &mount.SafeFormatAndMount{Interface: mount.New(""), Exec: exec.New()},
 		exec:         exec.New(),
 		targetPath:   req.GetTargetPath(),
-		connector:    getNvmfConnector(nvmfInfo),
+		connector:    getNvmfConnector(nvmfInfo, req.GetTargetPath()),
 	}
 }
 
@@ -142,11 +142,6 @@ func AttachDisk(req *csi.NodePublishVolumeRequest, nm nvmfDiskMounter) (string,
 
 	// Tips: use k8s mounter to mount fs and only support "ext4"
 	var options []string
-	if nm.readOnly {
-		options = append(options, "ro")
-	} else {
-		options = append(options, "rw")
-	}
 	options = append(options, nm.mountOptions...)
 	err = nm.mounter.FormatAndMount(devicePath, mntPath, nm.fsType, options)
 	if err != nil {
@@ -161,25 +156,16 @@ func AttachDisk(req *csi.NodePublishVolumeRequest, nm nvmfDiskMounter) (string,
 }
 
 func DetachDisk(volumeID string, num *nvmfDiskUnMounter, targetPath string) error {
-	_, cnt, err := mount.GetDeviceNameFromMount(num.mounter, targetPath)
-	if err != nil {
-		klog.Errorf("nvmf detach disk: failed to get device from mnt: %s\nError: %v", targetPath, err)
-		return err
-	}
 	if pathExists, pathErr := mount.PathExists(targetPath); pathErr != nil {
 		return fmt.Errorf("Error checking if path exists: %v", pathErr)
 	} else if !pathExists {
 		klog.Warningf("Warning: Unmount skipped because path does not exist: %v", targetPath)
 		return nil
 	}
-	if err = num.mounter.Unmount(targetPath); err != nil {
+	if err := num.mounter.Unmount(targetPath); err != nil {
 		klog.Errorf("iscsi detach disk: failed to unmount: %s\nError: %v", targetPath, err)
 		return err
 	}
-	cnt--
-	if cnt != 0 {
-		return nil
-	}
 
 	connector, err := GetConnectorFromFile(targetPath + ".json")
 	if err != nil {