Merge pull request #128299 from SergeyKanzhelev/updateDHS

Update Device Health fields description for KEP-4680
This commit is contained in:
Kubernetes Prow Robot 2024-10-28 22:19:01 +00:00 committed by GitHub
commit 86b99869cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 176 additions and 41 deletions

View File

@ -10626,7 +10626,7 @@
"x-kubernetes-map-type": "atomic"
},
"io.k8s.api.core.v1.ResourceHealth": {
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.",
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"properties": {
"health": {
"description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
@ -10789,11 +10789,11 @@
"io.k8s.api.core.v1.ResourceStatus": {
"properties": {
"name": {
"description": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.",
"description": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"type": "string"
},
"resources": {
"description": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.",
"description": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
"items": {
"$ref": "#/definitions/io.k8s.api.core.v1.ResourceHealth"
},

View File

@ -6617,7 +6617,7 @@
"x-kubernetes-map-type": "atomic"
},
"io.k8s.api.core.v1.ResourceHealth": {
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.",
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"properties": {
"health": {
"description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
@ -6817,11 +6817,11 @@
"properties": {
"name": {
"default": "",
"description": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.",
"description": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"type": "string"
},
"resources": {
"description": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.",
"description": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
"items": {
"allOf": [
{

View File

@ -2788,11 +2788,17 @@ type ContainerStatus struct {
}
type ResourceStatus struct {
// Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required
Name ResourceName
// List of unique Resources health. Each element in the list contains a unique resource ID and resource health.
// At a minimum, ResourceID must uniquely identify the Resource
// allocated to the Pod on the Node for the lifetime of a Pod.
// See ResourceID type for it's definition.
// List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type definition for a specific format it has in various use cases.
// +listType=map
// +listMapKey=resourceID
Resources []ResourceHealth
// allow to extend this struct in future with the overall health fields or things like Device Plugin version
@ -2801,12 +2807,13 @@ type ResourceStatus struct {
// ResourceID is calculated based on the source of this resource health information.
// For DevicePlugin:
//
// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73
// DeviceID, where DeviceID is how device plugin identifies the device. The same DeviceID can be found in PodResources API.
//
// DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node.
//
// For DRA:
//
// dra:<driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
// <driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
type ResourceID string
type ResourceHealthStatus string
@ -2818,7 +2825,7 @@ const (
)
// ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.
// This is a part of KEP https://kep.k8s.io/4680.
type ResourceHealth struct {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
ResourceID ResourceID

View File

@ -8260,17 +8260,39 @@ func validateContainerStatusAllocatedResourcesStatus(containerStatuses []core.Co
// ignore missing container, see https://github.com/kubernetes/kubernetes/issues/124915
if containerFound {
found := false
var errorStr string
// get container resources from the spec
containerResources := container.Resources
for resourceName := range containerResources.Requests {
if resourceName == allocatedResource.Name {
found = true
break
if strings.HasPrefix(string(allocatedResource.Name), "claim:") {
// assume it is a claim name
errorStr = "must match one of the container's resource claims in a format 'claim:<claimName>/<request>' or 'claim:<claimName>' if request is empty"
for _, c := range container.Resources.Claims {
name := "claim:" + c.Name
if c.Request != "" {
name += "/" + c.Request
}
if name == string(allocatedResource.Name) {
found = true
break
}
}
} else {
// assume it is a resource name
errorStr = "must match one of the container's resource requests"
for resourceName := range container.Resources.Requests {
if resourceName == allocatedResource.Name {
found = true
break
}
}
}
if !found {
allErrors = append(allErrors, field.Invalid(fldPath.Index(i).Child("allocatedResourcesStatus").Index(j).Child("name"), allocatedResource.Name, "must match one of the container's resource requirements"))
allErrors = append(allErrors, field.Invalid(fldPath.Index(i).Child("allocatedResourcesStatus").Index(j).Child("name"), allocatedResource.Name, errorStr))
}
}

View File

@ -24787,7 +24787,109 @@ func TestValidateContainerStatusAllocatedResourcesStatus(t *testing.T) {
},
},
wantFieldErrors: field.ErrorList{
field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(1).Child("name"), core.ResourceName("test.device/test2"), "must match one of the container's resource requirements"),
field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(1).Child("name"), core.ResourceName("test.device/test2"), "must match one of the container's resource requests"),
},
},
"allow claims and request that are in spec": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "claim.name",
Request: "request.name",
},
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name/request.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{},
},
"allow claims that are in spec without the request": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "claim.name",
},
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{},
},
"don't allow claims that are not in spec": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "other-claim.name",
},
},
Requests: core.ResourceList{
"claim.name": resource.MustParse("1"),
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{
field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(0).Child("name"), core.ResourceName("claim:claim.name"), "must match one of the container's resource claims in a format 'claim:<claimName>/<request>' or 'claim:<claimName>' if request is empty"),
},
},

View File

@ -29483,7 +29483,7 @@ func schema_k8sio_api_core_v1_ResourceHealth(ref common.ReferenceCallback) commo
return common.OpenAPIDefinition{
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.",
Description: "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
Type: []string{"object"},
Properties: map[string]spec.Schema{
"resourceID": {
@ -29781,7 +29781,7 @@ func schema_k8sio_api_core_v1_ResourceStatus(ref common.ReferenceCallback) commo
Properties: map[string]spec.Schema{
"name": {
SchemaProps: spec.SchemaProps{
Description: "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.",
Description: "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
Default: "",
Type: []string{"string"},
Format: "",
@ -29797,7 +29797,7 @@ func schema_k8sio_api_core_v1_ResourceStatus(ref common.ReferenceCallback) commo
},
},
SchemaProps: spec.SchemaProps{
Description: "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.",
Description: "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{

View File

@ -5045,7 +5045,7 @@ message ResourceFieldSelector {
}
// ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.
// This is a part of KEP https://kep.k8s.io/4680.
message ResourceHealth {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
optional string resourceID = 1;
@ -5155,14 +5155,16 @@ message ResourceRequirements {
}
message ResourceStatus {
// Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.
// Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required
optional string name = 1;
// List of unique Resources health. Each element in the list contains an unique resource ID and resource health.
// At a minimum, ResourceID must uniquely identify the Resource
// allocated to the Pod on the Node for the lifetime of a Pod.
// See ResourceID type for it's definition.
// List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type definition for a specific format it has in various use cases.
// +listType=map
// +listMapKey=resourceID
repeated ResourceHealth resources = 2;

View File

@ -3103,13 +3103,15 @@ type ContainerStatus struct {
}
type ResourceStatus struct {
// Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.
// Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required
Name ResourceName `json:"name" protobuf:"bytes,1,opt,name=name"`
// List of unique Resources health. Each element in the list contains an unique resource ID and resource health.
// At a minimum, ResourceID must uniquely identify the Resource
// allocated to the Pod on the Node for the lifetime of a Pod.
// See ResourceID type for it's definition.
// List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type definition for a specific format it has in various use cases.
// +listType=map
// +listMapKey=resourceID
Resources []ResourceHealth `json:"resources,omitempty" protobuf:"bytes,2,rep,name=resources"`
@ -3126,16 +3128,16 @@ const (
// ResourceID is calculated based on the source of this resource health information.
// For DevicePlugin:
//
// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73
// DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73
//
// DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node.
// For DRA:
//
// dra:<driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
// <driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
type ResourceID string
// ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.
// This is a part of KEP https://kep.k8s.io/4680.
type ResourceHealth struct {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
ResourceID ResourceID `json:"resourceID" protobuf:"bytes,1,opt,name=resourceID"`

View File

@ -2125,7 +2125,7 @@ func (ResourceFieldSelector) SwaggerDoc() map[string]string {
}
var map_ResourceHealth = map[string]string{
"": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.",
"": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"resourceID": "ResourceID is the unique identifier of the resource. See the ResourceID type for more information.",
"health": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
}
@ -2188,8 +2188,8 @@ func (ResourceRequirements) SwaggerDoc() map[string]string {
}
var map_ResourceStatus = map[string]string{
"name": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.",
"resources": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.",
"name": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"resources": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
}
func (ResourceStatus) SwaggerDoc() map[string]string {