fix: improve device discovery by using NQN as primary identifier

- Add device path discovery functionality using only NQN
- Implement tracking file system to maintain connection state by NQN and host NQN
- Support more dynamic provisioning patterns where only target endpoints need to be specified

Signed-off-by: cheolho.kang <cheolho.kang@samsung.com>
This commit is contained in:
cheolho.kang
2025-03-24 10:03:43 +09:00
parent 1a63f5b908
commit 879ba7b890
4 changed files with 149 additions and 20 deletions

View File

@@ -15,5 +15,4 @@ spec:
targetTrAddr: "192.168.122.18"
targetTrPort: "49153"
targetTrType: "tcp"
deviceUUID: "58668891-c3e4-45d0-b90e-824525c16080"
nqn: "nqn.2022-08.org.test-nvmf.example"

View File

@@ -30,7 +30,6 @@ import (
type Connector struct {
VolumeID string
DeviceUUID string
TargetNqn string
TargetAddr string
TargetPort string
@@ -43,7 +42,6 @@ type Connector struct {
func getNvmfConnector(nvmfInfo *nvmfDiskInfo, hostnqn string) *Connector {
return &Connector{
VolumeID: nvmfInfo.VolName,
DeviceUUID: nvmfInfo.DeviceUUID,
TargetNqn: nvmfInfo.Nqn,
TargetAddr: nvmfInfo.Addr,
TargetPort: nvmfInfo.Port,
@@ -240,7 +238,6 @@ func (c *Connector) Connect() (string, error) {
}
baseString := fmt.Sprintf("nqn=%s,transport=%s,traddr=%s,trsvcid=%s,hostnqn=%s", c.TargetNqn, c.Transport, c.TargetAddr, c.TargetPort, c.HostNqn)
devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", c.DeviceUUID}, ".")
// connect to nvmf disk
err := _connect(baseString)
@@ -248,8 +245,10 @@ func (c *Connector) Connect() (string, error) {
return "", err
}
klog.Infof("Connect Volume %s success nqn: %s, hostnqn: %s", c.VolumeID, c.TargetNqn, c.HostNqn)
retries := int(c.RetryCount / c.CheckInterval)
if exists, err := waitForPathToExist(devicePath, retries, int(c.CheckInterval), c.Transport); !exists {
// Wait for device to be ready (find UUID and check path)
devicePath, err := findPathWithRetry(c.TargetNqn, c.RetryCount, c.CheckInterval)
if err != nil {
klog.Errorf("connect nqn %s error %v, rollback!!!", c.TargetNqn, err)
ret := disconnectByNqn(c.TargetNqn, c.HostNqn)
if ret < 0 {

View File

@@ -35,12 +35,11 @@ const (
)
type nvmfDiskInfo struct {
VolName string
Nqn string `json:"subnqn"`
Addr string `json:"traddr"`
Port string `json:"trsvcid"`
DeviceUUID string
Transport string `json:"trtype"`
VolName string
Nqn string `json:"subnqn"`
Addr string `json:"traddr"`
Port string `json:"trsvcid"`
Transport string `json:"trtype"`
}
type nvmfDiskMounter struct {
@@ -68,20 +67,18 @@ func getNVMfDiskInfo(volID string, params map[string]string) (*nvmfDiskInfo, err
targetTrAddr := params[paramAddr]
targetTrPort := params[paramPort]
targetTrType := params[paramType]
deviceUUID := params["deviceUUID"]
nqn := volID
if targetTrAddr == "" || nqn == "" || targetTrPort == "" || targetTrType == "" || deviceUUID == "" {
if targetTrAddr == "" || nqn == "" || targetTrPort == "" || targetTrType == "" {
return nil, fmt.Errorf("some nvme target info is missing, volID: %s ", volID)
}
return &nvmfDiskInfo{
VolName: volID,
Addr: targetTrAddr,
Port: targetTrPort,
Nqn: nqn,
DeviceUUID: deviceUUID,
Transport: targetTrType,
VolName: volID,
Addr: targetTrAddr,
Port: targetTrPort,
Nqn: nqn,
Transport: targetTrType,
}, nil
}

View File

@@ -91,3 +91,137 @@ func logGRPC(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, h
}
return resp, err
}
// findPathWithRetry waits until the NVMe device with the specified NQN is fully connected
// and returns its device path. It retries up to maxRetries times with intervalSeconds between attempts.
func findPathWithRetry(targetNqn string, maxRetries, intervalSeconds int32) (string, error) {
for i := int32(0); i < maxRetries; i++ {
time.Sleep(time.Second * time.Duration(intervalSeconds))
// Step 1: Find the device name
deviceName := getDeviceNameBySubNqn(targetNqn)
if deviceName == "" {
if i == maxRetries-1 {
klog.Infof("Failed to find device name for target NQN %s after %d attempts", targetNqn, maxRetries)
break
}
continue
}
// Step 2: Find the UUID
uuid := getDeviceUUID(deviceName)
if uuid == "" {
if i == maxRetries-1 {
klog.Infof("Failed to find UUID for device %s after %d attempts", deviceName, maxRetries)
break
}
continue
}
// Step 3: Check if device path exists
devicePath := strings.Join([]string{"/dev/disk/by-id/nvme-uuid", uuid}, ".")
if exists := utils.IsFileExisting(devicePath); !exists {
if i == maxRetries-1 {
klog.Infof("Device path %s does not exist after %d attempts", devicePath, maxRetries)
break
}
continue
}
// All steps successful
klog.Infof("Found device path %s for target NQN %s", devicePath, targetNqn)
return devicePath, nil
}
return "", fmt.Errorf("device for target NQN %s not ready after %d attempts",
targetNqn, maxRetries)
}
// getDeviceNameBySubNqn finds a device's name based on its subsystem NQN
func getDeviceNameBySubNqn(targetNqn string) string {
devices, err := os.ReadDir(SYS_NVMF)
if err != nil {
klog.Errorf("Failed to read NVMe devices directory: %v", err)
return ""
}
for _, device := range devices {
subsysNqnPath := fmt.Sprintf("%s/%s/subsysnqn", SYS_NVMF, device.Name())
file, err := os.Open(subsysNqnPath)
if err != nil {
continue
}
defer file.Close()
lines, err := utils.ReadLinesFromFile(file)
if err != nil || len(lines) == 0 {
continue
}
if lines[0] == targetNqn {
return device.Name()
}
}
return ""
}
// getDeviceUUID returns the UUID for the given device name
func getDeviceUUID(deviceName string) string {
// Try uuid first, then nguid
identifierTypes := []string{"uuid", "nguid"}
for _, idType := range identifierTypes {
identifier, err := getDeviceIdentifierFromSysfs(deviceName, idType)
if err == nil {
return identifier
}
}
return ""
}
// getDeviceIdentifierFromSysfs extracts device identifiers from sysfs
func getDeviceIdentifierFromSysfs(deviceName string, identifierType string) (string, error) {
// Find namespaces - supports both standard (nvme0n1) and controller-based (nvme2c2n1) namespaces
namespacePattern := filepath.Join(SYS_NVMF, deviceName, "nvme*n*")
namespaces, err := filepath.Glob(namespacePattern)
if err != nil || len(namespaces) == 0 {
return "", fmt.Errorf("no namespace found for device %s: %v", deviceName, err)
}
nsDir := filepath.Base(namespaces[0])
identifierPath := filepath.Join(SYS_NVMF, deviceName, nsDir, identifierType)
if _, err := os.Stat(identifierPath); os.IsNotExist(err) {
return "", fmt.Errorf("%s file does not exist for device %s", identifierType, deviceName)
}
data, err := os.ReadFile(identifierPath)
if err != nil {
return "", fmt.Errorf("failed to read %s from sysfs: %v", identifierType, err)
}
identifier := strings.TrimSpace(string(data))
if identifier == "" {
return "", fmt.Errorf("empty %s for device %s", identifierType, deviceName)
}
// Convert NGUID to UUID if applicable
if identifierType == "nguid" {
identifier := strings.ReplaceAll(identifier, " ", "")
if len(identifier) != 32 {
return "", fmt.Errorf("invalid NGUID length: got %d, expected 32", len(identifier))
}
return fmt.Sprintf("%s-%s-%s-%s-%s",
identifier[0:8],
identifier[8:12],
identifier[12:16],
identifier[16:20],
identifier[20:]), nil
}
return identifier, nil
}