parse kubelet checkpoint file for pod devices

Enabling kubelete checkpoint file  parsing to get Pod device info
so that these device information can be passed into CNI plugins
that need specific device information to work on.

Change-Id: I6630f56adc0a8307f575fc09ce9090c1ffca0337
This commit is contained in:
Abdul Halim
2018-09-10 16:27:07 +01:00
committed by Kuralamudhan Ramakrishnan
parent 46a0f7590c
commit e3d14b2732
7 changed files with 218 additions and 9 deletions

76
checkpoint/checkpoint.go Normal file
View File

@@ -0,0 +1,76 @@
package checkpoint
import (
"encoding/json"
"fmt"
"io/ioutil"
"github.com/intel/multus-cni/types"
)
const (
checkPointfile = "/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint"
)
type PodDevicesEntry struct {
PodUID string
ContainerName string
ResourceName string
DeviceIDs []string
AllocResp []byte
}
type checkpointData struct {
PodDeviceEntries []PodDevicesEntry
RegisteredDevices map[string][]string
}
type Data struct {
Data checkpointData
Checksum uint64
}
// getPodEntries gets all Pod device allocation entries from checkpoint file
func getPodEntries() ([]PodDevicesEntry, error) {
podEntries := []PodDevicesEntry{}
cpd := &Data{}
rawBytes, err := ioutil.ReadFile(checkPointfile)
if err != nil {
return podEntries, fmt.Errorf("getPodEntries(): error reading file %s\n%v\n", checkPointfile, err)
}
if err = json.Unmarshal(rawBytes, cpd); err != nil {
return podEntries, fmt.Errorf("getPodEntries(): error unmarshalling raw bytes %v", err)
}
return cpd.Data.PodDeviceEntries, nil
}
// GetComputeDeviceMap returns a map of resourceName to list of device IDs
func GetComputeDeviceMap(podID string) (map[string]*types.ResourceInfo, error) {
resourceMap := make(map[string]*types.ResourceInfo)
podEntires, err := getPodEntries()
if err != nil {
return nil, err
}
for _, pod := range podEntires {
if pod.PodUID == podID {
entry, ok := resourceMap[pod.ResourceName]
if ok {
// already exists; append to it
entry.DeviceIDs = append(entry.DeviceIDs, pod.DeviceIDs...)
} else {
// new entry
resourceMap[pod.ResourceName] = &types.ResourceInfo{DeviceIDs: pod.DeviceIDs}
}
}
}
return resourceMap, nil
}

View File

@@ -60,3 +60,35 @@ A sample `cni-configuration.conf` is provided, typically this file is placed in
## Other considerations ## Other considerations
Primarily in this setup one thing that one should consider are the aspects of the `macvlan-conf.yml`, which is likely specific to the configuration of the node on which this resides. Primarily in this setup one thing that one should consider are the aspects of the `macvlan-conf.yml`, which is likely specific to the configuration of the node on which this resides.
## Passing down device information
Some CNI plugins require specific device information which maybe pre-allocated by K8s device plugin. This could be indicated by providing `k8s.v1.cni.cncf.io/resourceName` annotaton in its network attachment definition CRD. The file [`examples/sriov-net.yaml`](./sriov-net.yaml) shows an example on how to define a Network attachment definition with specific device allocation information. Multus will get allocated device information and make them available for CNI plugin to work on.
In this exmaple (shown below), it is expected that an [SRIOV Device Plugin](https://github.com/intel/sriov-network-device-plugin/tree/dev/k8s-deviceid-model) making a pool of SRIOV VFs available to the K8s with `intel.com/sriov` as their resourceName. Any device allocated from this resource pool will be passed down by Multus to the [sriov-cni](https://github.com/intel/sriov-cni/tree/dev/k8s-deviceid-model) plugin in `deviceID` field. This is up to the sriov-cni plugin to capture this information and work with this specific device information.
```yaml
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: sriov-net-a
annotations:
k8s.v1.cni.cncf.io/resourceName: intel.com/sriov
spec:
config: '{
"type": "sriov",
"vlan": 1000,
"ipam": {
"type": "host-local",
"subnet": "10.56.217.0/24",
"rangeStart": "10.56.217.171",
"rangeEnd": "10.56.217.181",
"routes": [{
"dst": "0.0.0.0/0"
}],
"gateway": "10.56.217.1"
}
}'
```
The [net-resource-sample-pod.yaml](./net-resource-sample-pod.yaml) is an exmaple Pod manifest file that requesting a SRIOV device from a host which is then configured using the above network attachement definition.
>For further information on how to configure SRIOV Device Plugin and SRIOV-CNI please refer to the links given above.

View File

@@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
name: testpod1
labels:
env: test
annotations:
k8s.v1.cni.cncf.io/networks: sriov-net-a
spec:
containers:
- name: appcntr1
image: centos/tools
imagePullPolicy: IfNotPresent
command: [ "/bin/bash", "-c", "--" ]
args: [ "while true; do sleep 300000; done;" ]
resources:
requests:
intel.com/sriov: '1'
limits:
intel.com/sriov: '1'
restartPolicy: "Never"

21
examples/sriov-net.yaml Normal file
View File

@@ -0,0 +1,21 @@
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: sriov-net-a
annotations:
k8s.v1.cni.cncf.io/resourceName: intel.com/sriov
spec:
config: '{
"type": "sriov",
"vlan": 1000,
"ipam": {
"type": "host-local",
"subnet": "10.56.217.0/24",
"rangeStart": "10.56.217.171",
"rangeEnd": "10.56.217.181",
"routes": [{
"dst": "0.0.0.0/0"
}],
"gateway": "10.56.217.1"
}
}'

View File

@@ -31,10 +31,15 @@ import (
"github.com/containernetworking/cni/libcni" "github.com/containernetworking/cni/libcni"
"github.com/containernetworking/cni/pkg/skel" "github.com/containernetworking/cni/pkg/skel"
cnitypes "github.com/containernetworking/cni/pkg/types" cnitypes "github.com/containernetworking/cni/pkg/types"
"github.com/intel/multus-cni/checkpoint"
"github.com/intel/multus-cni/logging" "github.com/intel/multus-cni/logging"
"github.com/intel/multus-cni/types" "github.com/intel/multus-cni/types"
) )
const (
resourceNameAnnot = "k8s.v1.cni.cncf.io/resourceName"
)
// NoK8sNetworkError indicates error, no network in kubernetes // NoK8sNetworkError indicates error, no network in kubernetes
type NoK8sNetworkError struct { type NoK8sNetworkError struct {
message string message string
@@ -131,16 +136,16 @@ func setPodNetworkAnnotation(client KubeClient, namespace string, pod *v1.Pod, n
return pod, nil return pod, nil
} }
func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, error) { func getPodNetworkAnnotation(client KubeClient, k8sArgs *types.K8sArgs) (string, string, string, error) {
var err error var err error
logging.Debugf("getPodNetworkAnnotation: %v, %v", client, k8sArgs) logging.Debugf("getPodNetworkAnnotation: %v, %v", client, k8sArgs)
pod, err := client.GetPod(string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME)) pod, err := client.GetPod(string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
if err != nil { if err != nil {
return "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err) return "", "", "", logging.Errorf("getPodNetworkAnnotation: failed to query the pod %v in out of cluster comm: %v", string(k8sArgs.K8S_POD_NAME), err)
} }
return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, nil return pod.Annotations["k8s.v1.cni.cncf.io/networks"], pod.ObjectMeta.Namespace, string(pod.UID), nil
} }
func parsePodNetworkObjectName(podnetwork string) (string, string, string, error) { func parsePodNetworkObjectName(podnetwork string) (string, string, string, error) {
@@ -326,7 +331,8 @@ func cniConfigFromNetworkResource(customResource *types.NetworkAttachmentDefinit
return config, nil return config, nil
} }
func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string) (*types.DelegateNetConf, error) { func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement, confdir string, resourceMap map[string]*types.ResourceInfo) (*types.DelegateNetConf, error) {
logging.Debugf("getKubernetesDelegate: %v, %v, %s", client, net, confdir) logging.Debugf("getKubernetesDelegate: %v, %v, %s", client, net, confdir)
rawPath := fmt.Sprintf("/apis/k8s.cni.cncf.io/v1/namespaces/%s/network-attachment-definitions/%s", net.Namespace, net.Name) rawPath := fmt.Sprintf("/apis/k8s.cni.cncf.io/v1/namespaces/%s/network-attachment-definitions/%s", net.Namespace, net.Name)
netData, err := client.GetRawWithPath(rawPath) netData, err := client.GetRawWithPath(rawPath)
@@ -339,12 +345,26 @@ func getKubernetesDelegate(client KubeClient, net *types.NetworkSelectionElement
return nil, logging.Errorf("getKubernetesDelegate: failed to get the netplugin data: %v", err) return nil, logging.Errorf("getKubernetesDelegate: failed to get the netplugin data: %v", err)
} }
// Get resourceName annotation from NetDefinition
deviceID := ""
resourceName, ok := customResource.Metadata.Annotations[resourceNameAnnot]
if ok {
// ResourceName annotation is found; try to get device info from resourceMap
entry, ok := resourceMap[resourceName]
if ok {
if idCount := len(entry.DeviceIDs); idCount > 0 && idCount > entry.Index {
deviceID = entry.DeviceIDs[entry.Index]
entry.Index++ // increment Index for next delegate
}
}
}
configBytes, err := cniConfigFromNetworkResource(customResource, confdir) configBytes, err := cniConfigFromNetworkResource(customResource, confdir)
if err != nil { if err != nil {
return nil, err return nil, err
} }
delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest) delegate, err := types.LoadDelegateNetConf(configBytes, net.InterfaceRequest, deviceID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -447,11 +467,17 @@ func GetK8sClient(kubeconfig string, kubeClient KubeClient) (KubeClient, error)
func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) ([]*types.DelegateNetConf, error) { func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string) ([]*types.DelegateNetConf, error) {
logging.Debugf("GetK8sNetwork: %v, %v, %v", k8sclient, k8sArgs, confdir) logging.Debugf("GetK8sNetwork: %v, %v, %v", k8sclient, k8sArgs, confdir)
netAnnot, defaultNamespace, err := getPodNetworkAnnotation(k8sclient, k8sArgs) netAnnot, defaultNamespace, podID, err := getPodNetworkAnnotation(k8sclient, k8sArgs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Get Pod ComputeDevices info
resourceMap, err := checkpoint.GetComputeDeviceMap(podID)
if err != nil {
return nil, logging.Errorf("GetK8sNetwork: failed to get resourceMap for PodUID: %v %v", podID, err)
}
if len(netAnnot) == 0 { if len(netAnnot) == 0 {
return nil, &NoK8sNetworkError{"no kubernetes network found"} return nil, &NoK8sNetworkError{"no kubernetes network found"}
} }
@@ -464,7 +490,7 @@ func GetK8sNetwork(k8sclient KubeClient, k8sArgs *types.K8sArgs, confdir string)
// Read all network objects referenced by 'networks' // Read all network objects referenced by 'networks'
var delegates []*types.DelegateNetConf var delegates []*types.DelegateNetConf
for _, net := range networks { for _, net := range networks {
delegate, err := getKubernetesDelegate(k8sclient, net, confdir) delegate, err := getKubernetesDelegate(k8sclient, net, confdir, resourceMap)
if err != nil { if err != nil {
return nil, logging.Errorf("GetK8sNetwork: failed getting the delegate: %v", err) return nil, logging.Errorf("GetK8sNetwork: failed getting the delegate: %v", err)
} }

View File

@@ -50,7 +50,16 @@ func LoadDelegateNetConfList(bytes []byte, delegateConf *DelegateNetConf) error
} }
// Convert raw CNI JSON into a DelegateNetConf structure // Convert raw CNI JSON into a DelegateNetConf structure
func LoadDelegateNetConf(bytes []byte, ifnameRequest string) (*DelegateNetConf, error) { func LoadDelegateNetConf(bytes []byte, ifnameRequest, deviceID string) (*DelegateNetConf, error) {
// If deviceID is present, inject this into delegate config
if deviceID != "" {
if updatedBytes, err := delegateAddDeviceID(bytes, deviceID); err != nil {
return nil, logging.Errorf("error in LoadDelegateNetConf - delegateAddDeviceID unable to update delegate config: %v", err)
} else {
bytes = updatedBytes
}
}
delegateConf := &DelegateNetConf{} delegateConf := &DelegateNetConf{}
logging.Debugf("LoadDelegateNetConf: %s, %s", string(bytes), ifnameRequest) logging.Debugf("LoadDelegateNetConf: %s, %s", string(bytes), ifnameRequest)
if err := json.Unmarshal(bytes, &delegateConf.Conf); err != nil { if err := json.Unmarshal(bytes, &delegateConf.Conf); err != nil {
@@ -196,7 +205,7 @@ func LoadNetConf(bytes []byte) (*NetConf, error) {
if err != nil { if err != nil {
return nil, logging.Errorf("error marshalling delegate %d config: %v", idx, err) return nil, logging.Errorf("error marshalling delegate %d config: %v", idx, err)
} }
delegateConf, err := LoadDelegateNetConf(bytes, "") delegateConf, err := LoadDelegateNetConf(bytes, "", "")
if err != nil { if err != nil {
return nil, logging.Errorf("failed to load delegate %d config: %v", idx, err) return nil, logging.Errorf("failed to load delegate %d config: %v", idx, err)
} }
@@ -216,3 +225,21 @@ func (n *NetConf) AddDelegates(newDelegates []*DelegateNetConf) error {
n.Delegates = append(n.Delegates, newDelegates...) n.Delegates = append(n.Delegates, newDelegates...)
return nil return nil
} }
// delegateAddDeviceID injects deviceID information in delegate bytes
func delegateAddDeviceID(inBytes []byte, deviceID string) ([]byte, error) {
var rawConfig map[string]interface{}
var err error
err = json.Unmarshal(inBytes, &rawConfig)
if err != nil {
return nil, logging.Errorf("delegateAddDeviceID: failed to unmarshal inBytes: %v", err)
}
// Inject deviceID
rawConfig["deviceID"] = deviceID
configBytes, err := json.Marshal(rawConfig)
if err != nil {
return nil, logging.Errorf("delegateAddDeviceID: failed to re-marshal Spec.Config: %v", err)
}
return configBytes, nil
}

View File

@@ -131,3 +131,9 @@ type K8sArgs struct {
K8S_POD_NAMESPACE types.UnmarshallableString K8S_POD_NAMESPACE types.UnmarshallableString
K8S_POD_INFRA_CONTAINER_ID types.UnmarshallableString K8S_POD_INFRA_CONTAINER_ID types.UnmarshallableString
} }
// ResourceInfo is struct to hold Pod device allocation information
type ResourceInfo struct {
Index int
DeviceIDs []string
}