Merge pull request #128299 from SergeyKanzhelev/updateDHS

Update Device Health fields description for KEP-4680
This commit is contained in:
Kubernetes Prow Robot 2024-10-28 22:19:01 +00:00 committed by GitHub
commit 86b99869cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 176 additions and 41 deletions

View File

@ -10626,7 +10626,7 @@
"x-kubernetes-map-type": "atomic" "x-kubernetes-map-type": "atomic"
}, },
"io.k8s.api.core.v1.ResourceHealth": { "io.k8s.api.core.v1.ResourceHealth": {
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.", "description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"properties": { "properties": {
"health": { "health": {
"description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.", "description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
@ -10789,11 +10789,11 @@
"io.k8s.api.core.v1.ResourceStatus": { "io.k8s.api.core.v1.ResourceStatus": {
"properties": { "properties": {
"name": { "name": {
"description": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.", "description": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"type": "string" "type": "string"
}, },
"resources": { "resources": {
"description": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.", "description": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
"items": { "items": {
"$ref": "#/definitions/io.k8s.api.core.v1.ResourceHealth" "$ref": "#/definitions/io.k8s.api.core.v1.ResourceHealth"
}, },

View File

@ -6617,7 +6617,7 @@
"x-kubernetes-map-type": "atomic" "x-kubernetes-map-type": "atomic"
}, },
"io.k8s.api.core.v1.ResourceHealth": { "io.k8s.api.core.v1.ResourceHealth": {
"description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.", "description": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"properties": { "properties": {
"health": { "health": {
"description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.", "description": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
@ -6817,11 +6817,11 @@
"properties": { "properties": {
"name": { "name": {
"default": "", "default": "",
"description": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.", "description": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"type": "string" "type": "string"
}, },
"resources": { "resources": {
"description": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.", "description": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
"items": { "items": {
"allOf": [ "allOf": [
{ {

View File

@ -2788,11 +2788,17 @@ type ContainerStatus struct {
} }
type ResourceStatus struct { type ResourceStatus struct {
// Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required
Name ResourceName Name ResourceName
// List of unique Resources health. Each element in the list contains a unique resource ID and resource health. // List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, ResourceID must uniquely identify the Resource // At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// allocated to the Pod on the Node for the lifetime of a Pod. // If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type for it's definition. // See ResourceID type definition for a specific format it has in various use cases.
// +listType=map
// +listMapKey=resourceID
Resources []ResourceHealth Resources []ResourceHealth
// allow to extend this struct in future with the overall health fields or things like Device Plugin version // allow to extend this struct in future with the overall health fields or things like Device Plugin version
@ -2801,12 +2807,13 @@ type ResourceStatus struct {
// ResourceID is calculated based on the source of this resource health information. // ResourceID is calculated based on the source of this resource health information.
// For DevicePlugin: // For DevicePlugin:
// //
// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73 // DeviceID, where DeviceID is how device plugin identifies the device. The same DeviceID can be found in PodResources API.
// //
// DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node. // DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node.
//
// For DRA: // For DRA:
// //
// dra:<driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster. // <driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
type ResourceID string type ResourceID string
type ResourceHealthStatus string type ResourceHealthStatus string
@ -2818,7 +2825,7 @@ const (
) )
// ResourceHealth represents the health of a resource. It has the latest device health information. // ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP. // This is a part of KEP https://kep.k8s.io/4680.
type ResourceHealth struct { type ResourceHealth struct {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information. // ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
ResourceID ResourceID ResourceID ResourceID

View File

@ -8260,17 +8260,39 @@ func validateContainerStatusAllocatedResourcesStatus(containerStatuses []core.Co
// ignore missing container, see https://github.com/kubernetes/kubernetes/issues/124915 // ignore missing container, see https://github.com/kubernetes/kubernetes/issues/124915
if containerFound { if containerFound {
found := false found := false
var errorStr string
// get container resources from the spec if strings.HasPrefix(string(allocatedResource.Name), "claim:") {
containerResources := container.Resources // assume it is a claim name
for resourceName := range containerResources.Requests {
if resourceName == allocatedResource.Name { errorStr = "must match one of the container's resource claims in a format 'claim:<claimName>/<request>' or 'claim:<claimName>' if request is empty"
found = true
break for _, c := range container.Resources.Claims {
name := "claim:" + c.Name
if c.Request != "" {
name += "/" + c.Request
}
if name == string(allocatedResource.Name) {
found = true
break
}
}
} else {
// assume it is a resource name
errorStr = "must match one of the container's resource requests"
for resourceName := range container.Resources.Requests {
if resourceName == allocatedResource.Name {
found = true
break
}
} }
} }
if !found { if !found {
allErrors = append(allErrors, field.Invalid(fldPath.Index(i).Child("allocatedResourcesStatus").Index(j).Child("name"), allocatedResource.Name, "must match one of the container's resource requirements")) allErrors = append(allErrors, field.Invalid(fldPath.Index(i).Child("allocatedResourcesStatus").Index(j).Child("name"), allocatedResource.Name, errorStr))
} }
} }

View File

@ -24787,7 +24787,109 @@ func TestValidateContainerStatusAllocatedResourcesStatus(t *testing.T) {
}, },
}, },
wantFieldErrors: field.ErrorList{ wantFieldErrors: field.ErrorList{
field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(1).Child("name"), core.ResourceName("test.device/test2"), "must match one of the container's resource requirements"), field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(1).Child("name"), core.ResourceName("test.device/test2"), "must match one of the container's resource requests"),
},
},
"allow claims and request that are in spec": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "claim.name",
Request: "request.name",
},
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name/request.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{},
},
"allow claims that are in spec without the request": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "claim.name",
},
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{},
},
"don't allow claims that are not in spec": {
containers: []core.Container{
{
Name: "container-1",
Resources: core.ResourceRequirements{
Claims: []core.ResourceClaim{
{
Name: "other-claim.name",
},
},
Requests: core.ResourceList{
"claim.name": resource.MustParse("1"),
},
},
},
},
containerStatuses: []core.ContainerStatus{
{
Name: "container-1",
AllocatedResourcesStatus: []core.ResourceStatus{
{
Name: "claim:claim.name",
Resources: []core.ResourceHealth{
{
ResourceID: "driver/pool/device-name",
Health: core.ResourceHealthStatusHealthy,
},
},
},
},
},
},
wantFieldErrors: field.ErrorList{
field.Invalid(fldPath.Index(0).Child("allocatedResourcesStatus").Index(0).Child("name"), core.ResourceName("claim:claim.name"), "must match one of the container's resource claims in a format 'claim:<claimName>/<request>' or 'claim:<claimName>' if request is empty"),
}, },
}, },

View File

@ -29483,7 +29483,7 @@ func schema_k8sio_api_core_v1_ResourceHealth(ref common.ReferenceCallback) commo
return common.OpenAPIDefinition{ return common.OpenAPIDefinition{
Schema: spec.Schema{ Schema: spec.Schema{
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.", Description: "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
Type: []string{"object"}, Type: []string{"object"},
Properties: map[string]spec.Schema{ Properties: map[string]spec.Schema{
"resourceID": { "resourceID": {
@ -29781,7 +29781,7 @@ func schema_k8sio_api_core_v1_ResourceStatus(ref common.ReferenceCallback) commo
Properties: map[string]spec.Schema{ Properties: map[string]spec.Schema{
"name": { "name": {
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.", Description: "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
Default: "", Default: "",
Type: []string{"string"}, Type: []string{"string"},
Format: "", Format: "",
@ -29797,7 +29797,7 @@ func schema_k8sio_api_core_v1_ResourceStatus(ref common.ReferenceCallback) commo
}, },
}, },
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.", Description: "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
Type: []string{"array"}, Type: []string{"array"},
Items: &spec.SchemaOrArray{ Items: &spec.SchemaOrArray{
Schema: &spec.Schema{ Schema: &spec.Schema{

View File

@ -5045,7 +5045,7 @@ message ResourceFieldSelector {
} }
// ResourceHealth represents the health of a resource. It has the latest device health information. // ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP. // This is a part of KEP https://kep.k8s.io/4680.
message ResourceHealth { message ResourceHealth {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information. // ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
optional string resourceID = 1; optional string resourceID = 1;
@ -5155,14 +5155,16 @@ message ResourceRequirements {
} }
message ResourceStatus { message ResourceStatus {
// Name of the resource. Must be unique within the pod and match one of the resources from the pod spec. // Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required // +required
optional string name = 1; optional string name = 1;
// List of unique Resources health. Each element in the list contains an unique resource ID and resource health. // List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, ResourceID must uniquely identify the Resource // At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// allocated to the Pod on the Node for the lifetime of a Pod. // If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type for it's definition. // See ResourceID type definition for a specific format it has in various use cases.
// +listType=map // +listType=map
// +listMapKey=resourceID // +listMapKey=resourceID
repeated ResourceHealth resources = 2; repeated ResourceHealth resources = 2;

View File

@ -3103,13 +3103,15 @@ type ContainerStatus struct {
} }
type ResourceStatus struct { type ResourceStatus struct {
// Name of the resource. Must be unique within the pod and match one of the resources from the pod spec. // Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec.
// For DRA resources, the value must be "claim:<claim_name>/<request>".
// When this status is reported about a container, the "claim_name" and "request" must match one of the claims of this container.
// +required // +required
Name ResourceName `json:"name" protobuf:"bytes,1,opt,name=name"` Name ResourceName `json:"name" protobuf:"bytes,1,opt,name=name"`
// List of unique Resources health. Each element in the list contains an unique resource ID and resource health. // List of unique resources health. Each element in the list contains an unique resource ID and its health.
// At a minimum, ResourceID must uniquely identify the Resource // At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node.
// allocated to the Pod on the Node for the lifetime of a Pod. // If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share.
// See ResourceID type for it's definition. // See ResourceID type definition for a specific format it has in various use cases.
// +listType=map // +listType=map
// +listMapKey=resourceID // +listMapKey=resourceID
Resources []ResourceHealth `json:"resources,omitempty" protobuf:"bytes,2,rep,name=resources"` Resources []ResourceHealth `json:"resources,omitempty" protobuf:"bytes,2,rep,name=resources"`
@ -3126,16 +3128,16 @@ const (
// ResourceID is calculated based on the source of this resource health information. // ResourceID is calculated based on the source of this resource health information.
// For DevicePlugin: // For DevicePlugin:
// //
// deviceplugin:DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73 // DeviceID, where DeviceID is from the Device structure of DevicePlugin's ListAndWatchResponse type: https://github.com/kubernetes/kubernetes/blob/eda1c780543a27c078450e2f17d674471e00f494/staging/src/k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha/api.proto#L61-L73
// //
// DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node. // DevicePlugin ID is usually a constant for the lifetime of a Node and typically can be used to uniquely identify the device on the node.
// For DRA: // For DRA:
// //
// dra:<driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster. // <driver name>/<pool name>/<device name>: such a device can be looked up in the information published by that DRA driver to learn more about it. It is designed to be globally unique in a cluster.
type ResourceID string type ResourceID string
// ResourceHealth represents the health of a resource. It has the latest device health information. // ResourceHealth represents the health of a resource. It has the latest device health information.
// This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP. // This is a part of KEP https://kep.k8s.io/4680.
type ResourceHealth struct { type ResourceHealth struct {
// ResourceID is the unique identifier of the resource. See the ResourceID type for more information. // ResourceID is the unique identifier of the resource. See the ResourceID type for more information.
ResourceID ResourceID `json:"resourceID" protobuf:"bytes,1,opt,name=resourceID"` ResourceID ResourceID `json:"resourceID" protobuf:"bytes,1,opt,name=resourceID"`

View File

@ -2125,7 +2125,7 @@ func (ResourceFieldSelector) SwaggerDoc() map[string]string {
} }
var map_ResourceHealth = map[string]string{ var map_ResourceHealth = map[string]string{
"": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680 and historical health changes are planned to be added in future iterations of a KEP.", "": "ResourceHealth represents the health of a resource. It has the latest device health information. This is a part of KEP https://kep.k8s.io/4680.",
"resourceID": "ResourceID is the unique identifier of the resource. See the ResourceID type for more information.", "resourceID": "ResourceID is the unique identifier of the resource. See the ResourceID type for more information.",
"health": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.", "health": "Health of the resource. can be one of:\n - Healthy: operates as normal\n - Unhealthy: reported unhealthy. We consider this a temporary health issue\n since we do not have a mechanism today to distinguish\n temporary and permanent issues.\n - Unknown: The status cannot be determined.\n For example, Device Plugin got unregistered and hasn't been re-registered since.\n\nIn future we may want to introduce the PermanentlyUnhealthy Status.",
} }
@ -2188,8 +2188,8 @@ func (ResourceRequirements) SwaggerDoc() map[string]string {
} }
var map_ResourceStatus = map[string]string{ var map_ResourceStatus = map[string]string{
"name": "Name of the resource. Must be unique within the pod and match one of the resources from the pod spec.", "name": "Name of the resource. Must be unique within the pod and in case of non-DRA resource, match one of the resources from the pod spec. For DRA resources, the value must be \"claim:<claim_name>/<request>\". When this status is reported about a container, the \"claim_name\" and \"request\" must match one of the claims of this container.",
"resources": "List of unique Resources health. Each element in the list contains an unique resource ID and resource health. At a minimum, ResourceID must uniquely identify the Resource allocated to the Pod on the Node for the lifetime of a Pod. See ResourceID type for it's definition.", "resources": "List of unique resources health. Each element in the list contains an unique resource ID and its health. At a minimum, for the lifetime of a Pod, resource ID must uniquely identify the resource allocated to the Pod on the Node. If other Pod on the same Node reports the status with the same resource ID, it must be the same resource they share. See ResourceID type definition for a specific format it has in various use cases.",
} }
func (ResourceStatus) SwaggerDoc() map[string]string { func (ResourceStatus) SwaggerDoc() map[string]string {