mirror of
https://github.com/kubernetes-csi/csi-driver-nvmf.git
synced 2025-08-31 12:48:35 +00:00
fix: improve device discovery by using NQN as primary identifier
- Add device path discovery functionality using only NQN - Implement tracking file system to maintain connection state by NQN and host NQN - Support more dynamic provisioning patterns where only target endpoints need to be specified Signed-off-by: cheolho.kang <cheolho.kang@samsung.com>
This commit is contained in:
@@ -15,5 +15,4 @@ spec:
|
||||
targetTrAddr: "192.168.122.18"
|
||||
targetTrPort: "49153"
|
||||
targetTrType: "tcp"
|
||||
deviceUUID: "58668891-c3e4-45d0-b90e-824525c16080"
|
||||
nqn: "nqn.2022-08.org.test-nvmf.example"
|
@@ -30,7 +30,6 @@ import (
|
||||
|
||||
type Connector struct {
|
||||
VolumeID string
|
||||
DeviceUUID string
|
||||
TargetNqn string
|
||||
TargetAddr string
|
||||
TargetPort string
|
||||
@@ -43,7 +42,6 @@ type Connector struct {
|
||||
func getNvmfConnector(nvmfInfo *nvmfDiskInfo, hostnqn string) *Connector {
|
||||
return &Connector{
|
||||
VolumeID: nvmfInfo.VolName,
|
||||
DeviceUUID: nvmfInfo.DeviceUUID,
|
||||
TargetNqn: nvmfInfo.Nqn,
|
||||
TargetAddr: nvmfInfo.Addr,
|
||||
TargetPort: nvmfInfo.Port,
|
||||
@@ -240,7 +238,6 @@ func (c *Connector) Connect() (string, error) {
|
||||
}
|
||||
|
||||
baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s,hostnqn=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort, c.HostNqn)
|
||||
devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", c.DeviceUUID}, ".")
|
||||
|
||||
// connect to nvmf disk
|
||||
err := _connect(baseString)
|
||||
@@ -248,8 +245,10 @@ func (c *Connector) Connect() (string, error) {
|
||||
return "", err
|
||||
}
|
||||
klog.Infof("Connect Volume %s success nqn: %s, hostnqn: %s", c.VolumeID, c.TargetNqn, c.HostNqn)
|
||||
retries := int(c.RetryCount / c.CheckInterval)
|
||||
if exists, err := waitForPathToExist(devicePath, retries, int(c.CheckInterval), c.Transport); !exists {
|
||||
|
||||
// Wait for device to be ready (find UUID and check path)
|
||||
devicePath, err := findPathWithRetry(c.TargetNqn, c.RetryCount, c.CheckInterval)
|
||||
if err != nil {
|
||||
klog.Errorf("connect nqn %s error %v, rollback!!!", c.TargetNqn, err)
|
||||
ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
|
||||
if ret < 0 {
|
||||
|
@@ -35,12 +35,11 @@ const (
|
||||
)
|
||||
|
||||
type nvmfDiskInfo struct {
|
||||
VolName string
|
||||
Nqn string `json:"subnqn"`
|
||||
Addr string `json:"traddr"`
|
||||
Port string `json:"trsvcid"`
|
||||
DeviceUUID string
|
||||
Transport string `json:"trtype"`
|
||||
VolName string
|
||||
Nqn string `json:"subnqn"`
|
||||
Addr string `json:"traddr"`
|
||||
Port string `json:"trsvcid"`
|
||||
Transport string `json:"trtype"`
|
||||
}
|
||||
|
||||
type nvmfDiskMounter struct {
|
||||
@@ -68,20 +67,18 @@ func getNVMfDiskInfo(volID string, params map[string]string) (*nvmfDiskInfo, err
|
||||
targetTrAddr := params[paramAddr]
|
||||
targetTrPort := params[paramPort]
|
||||
targetTrType := params[paramType]
|
||||
deviceUUID := params["deviceUUID"]
|
||||
nqn := volID
|
||||
|
||||
if targetTrAddr == "" || nqn == "" || targetTrPort == "" || targetTrType == "" || deviceUUID == "" {
|
||||
if targetTrAddr == "" || nqn == "" || targetTrPort == "" || targetTrType == "" {
|
||||
return nil, fmt.Errorf("some nvme target info is missing, volID: %s ", volID)
|
||||
}
|
||||
|
||||
return &nvmfDiskInfo{
|
||||
VolName: volID,
|
||||
Addr: targetTrAddr,
|
||||
Port: targetTrPort,
|
||||
Nqn: nqn,
|
||||
DeviceUUID: deviceUUID,
|
||||
Transport: targetTrType,
|
||||
VolName: volID,
|
||||
Addr: targetTrAddr,
|
||||
Port: targetTrPort,
|
||||
Nqn: nqn,
|
||||
Transport: targetTrType,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@@ -91,3 +91,137 @@ func logGRPC(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, h
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// findPathWithRetry waits until the NVMe device with the specified NQN is fully connected
|
||||
// and returns its device path. It retries up to maxRetries times with intervalSeconds between attempts.
|
||||
func findPathWithRetry(targetNqn string, maxRetries, intervalSeconds int32) (string, error) {
|
||||
for i := int32(0); i < maxRetries; i++ {
|
||||
time.Sleep(time.Second * time.Duration(intervalSeconds))
|
||||
|
||||
// Step 1: Find the device name
|
||||
deviceName := getDeviceNameBySubNqn(targetNqn)
|
||||
if deviceName == "" {
|
||||
if i == maxRetries-1 {
|
||||
klog.Infof("Failed to find device name for target NQN %s after %d attempts", targetNqn, maxRetries)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Step 2: Find the UUID
|
||||
uuid := getDeviceUUID(deviceName)
|
||||
if uuid == "" {
|
||||
if i == maxRetries-1 {
|
||||
klog.Infof("Failed to find UUID for device %s after %d attempts", deviceName, maxRetries)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Step 3: Check if device path exists
|
||||
devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", uuid}, ".")
|
||||
if exists := utils.IsFileExisting(devicePath); !exists {
|
||||
if i == maxRetries-1 {
|
||||
klog.Infof("Device path %s does not exist after %d attempts", devicePath, maxRetries)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// All steps successful
|
||||
klog.Infof("Found device path %s for target NQN %s", devicePath, targetNqn)
|
||||
return devicePath, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("device for target NQN %s not ready after %d attempts",
|
||||
targetNqn, maxRetries)
|
||||
}
|
||||
|
||||
// getDeviceNameBySubNqn finds a device's name based on its subsystem NQN
|
||||
func getDeviceNameBySubNqn(targetNqn string) string {
|
||||
devices, err := os.ReadDir(SYS_NVMF)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to read NVMe devices directory: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
for _, device := range devices {
|
||||
subsysNqnPath := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, device.Name())
|
||||
|
||||
file, err := os.Open(subsysNqnPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
lines, err := utils.ReadLinesFromFile(file)
|
||||
if err != nil || len(lines) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if lines[0] == targetNqn {
|
||||
return device.Name()
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getDeviceUUID returns the UUID for the given device name
|
||||
func getDeviceUUID(deviceName string) string {
|
||||
// Try uuid first, then nguid
|
||||
identifierTypes := []string{"uuid", "nguid"}
|
||||
|
||||
for _, idType := range identifierTypes {
|
||||
identifier, err := getDeviceIdentifierFromSysfs(deviceName, idType)
|
||||
if err == nil {
|
||||
return identifier
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getDeviceIdentifierFromSysfs extracts device identifiers from sysfs
|
||||
func getDeviceIdentifierFromSysfs(deviceName string, identifierType string) (string, error) {
|
||||
// Find namespaces - supports both standard (nvme0n1) and controller-based (nvme2c2n1) namespaces
|
||||
namespacePattern := filepath.Join(SYS_NVMF, deviceName, "nvme*n*")
|
||||
namespaces, err := filepath.Glob(namespacePattern)
|
||||
if err != nil || len(namespaces) == 0 {
|
||||
return "", fmt.Errorf("no namespace found for device %s: %v", deviceName, err)
|
||||
}
|
||||
|
||||
nsDir := filepath.Base(namespaces[0])
|
||||
identifierPath := filepath.Join(SYS_NVMF, deviceName, nsDir, identifierType)
|
||||
|
||||
if _, err := os.Stat(identifierPath); os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("%s file does not exist for device %s", identifierType, deviceName)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(identifierPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read %s from sysfs: %v", identifierType, err)
|
||||
}
|
||||
|
||||
identifier := strings.TrimSpace(string(data))
|
||||
if identifier == "" {
|
||||
return "", fmt.Errorf("empty %s for device %s", identifierType, deviceName)
|
||||
}
|
||||
|
||||
// Convert NGUID to UUID if applicable
|
||||
if identifierType == "nguid" {
|
||||
identifier := strings.ReplaceAll(identifier, " ", "")
|
||||
if len(identifier) != 32 {
|
||||
return "", fmt.Errorf("invalid NGUID length: got %d, expected 32", len(identifier))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s-%s-%s-%s-%s",
|
||||
identifier[0:8],
|
||||
identifier[8:12],
|
||||
identifier[12:16],
|
||||
identifier[16:20],
|
||||
identifier[20:]), nil
|
||||
}
|
||||
|
||||
return identifier, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user