test: added missing tests for the Pod analyzer (#1021)

- Fixed a small bug where failures were being appended multiple times
  for CrashLoopBackOff and ContainerCreating container status reasons.

- Added missing test cases to ensure proper testing of the Pod analyzer.
  The addition of these missing test cases has increased the code
  coverage of this analyzer to 98%.

- Added checks for init containers in a pod.

Partially addresses: https://github.com/k8sgpt-ai/k8sgpt/issues/889

Signed-off-by: VaibhavMalik4187 <vaibhavmalik2018@gmail.com>
This commit is contained in:
Vaibhav Malik
2024-04-14 01:38:33 +05:30
committed by GitHub
parent 6df0169491
commit 9dfcce842e
2 changed files with 381 additions and 163 deletions

View File

@@ -18,6 +18,7 @@ import (
"github.com/k8sgpt-ai/k8sgpt/pkg/common"
"github.com/k8sgpt-ai/k8sgpt/pkg/util"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@@ -41,12 +42,12 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
for _, pod := range list.Items {
var failures []common.Failure
// Check for pending pods
if pod.Status.Phase == "Pending" {
// Check through container status to check for crashes
for _, containerStatus := range pod.Status.Conditions {
if containerStatus.Type == "PodScheduled" && containerStatus.Reason == "Unschedulable" {
if containerStatus.Type == v1.PodScheduled && containerStatus.Reason == "Unschedulable" {
if containerStatus.Message != "" {
failures = append(failures, common.Failure{
Text: containerStatus.Message,
@@ -57,60 +58,12 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
}
}
// Check through container status to check for crashes or unready
for _, containerStatus := range pod.Status.ContainerStatuses {
// Check for errors in the init containers.
failures = append(failures, analyzeContainerStatusFailures(a, pod.Status.InitContainerStatuses, pod.Name, pod.Namespace, string(pod.Status.Phase))...)
if containerStatus.State.Waiting != nil {
// Check for errors in containers.
failures = append(failures, analyzeContainerStatusFailures(a, pod.Status.ContainerStatuses, pod.Name, pod.Namespace, string(pod.Status.Phase))...)
if isErrorReason(containerStatus.State.Waiting.Reason) && containerStatus.State.Waiting.Message != "" {
failures = append(failures, common.Failure{
Text: containerStatus.State.Waiting.Message,
Sensitive: []common.Sensitive{},
})
}
// This represents a container that is still being created or blocked due to conditions such as OOMKilled
if containerStatus.State.Waiting.Reason == "ContainerCreating" && pod.Status.Phase == "Pending" {
// parse the event log and append details
evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name)
if err != nil || evt == nil {
continue
}
if isEvtErrorReason(evt.Reason) && evt.Message != "" {
failures = append(failures, common.Failure{
Text: evt.Message,
Sensitive: []common.Sensitive{},
})
}
}
// This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled
if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" {
failures = append(failures, common.Failure{
Text: fmt.Sprintf("the last termination reason is %s container=%s pod=%s", containerStatus.LastTerminationState.Terminated.Reason, containerStatus.Name, pod.Name),
Sensitive: []common.Sensitive{},
})
}
} else {
// when pod is Running but its ReadinessProbe fails
if !containerStatus.Ready && pod.Status.Phase == "Running" {
// parse the event log and append details
evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name)
if err != nil || evt == nil {
continue
}
if evt.Reason == "Unhealthy" && evt.Message != "" {
failures = append(failures, common.Failure{
Text: evt.Message,
Sensitive: []common.Sensitive{},
})
}
}
}
}
if len(failures) > 0 {
preAnalysis[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = common.PreAnalysis{
Pod: pod,
@@ -135,6 +88,58 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
return a.Results, nil
}
func analyzeContainerStatusFailures(a common.Analyzer, statuses []v1.ContainerStatus, name string, namespace string, statusPhase string) []common.Failure {
var failures []common.Failure
// Check through container status to check for crashes or unready
for _, containerStatus := range statuses {
if containerStatus.State.Waiting != nil {
if containerStatus.State.Waiting.Reason == "ContainerCreating" && statusPhase == "Pending" {
// This represents a container that is still being created or blocked due to conditions such as OOMKilled
// parse the event log and append details
evt, err := FetchLatestEvent(a.Context, a.Client, namespace, name)
if err != nil || evt == nil {
continue
}
if isEvtErrorReason(evt.Reason) && evt.Message != "" {
failures = append(failures, common.Failure{
Text: evt.Message,
Sensitive: []common.Sensitive{},
})
}
} else if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" && containerStatus.LastTerminationState.Terminated != nil {
// This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled
failures = append(failures, common.Failure{
Text: fmt.Sprintf("the last termination reason is %s container=%s pod=%s", containerStatus.LastTerminationState.Terminated.Reason, containerStatus.Name, name),
Sensitive: []common.Sensitive{},
})
} else if isErrorReason(containerStatus.State.Waiting.Reason) && containerStatus.State.Waiting.Message != "" {
failures = append(failures, common.Failure{
Text: containerStatus.State.Waiting.Message,
Sensitive: []common.Sensitive{},
})
}
} else {
// when pod is Running but its ReadinessProbe fails
if !containerStatus.Ready && statusPhase == "Running" {
// parse the event log and append details
evt, err := FetchLatestEvent(a.Context, a.Client, namespace, name)
if err != nil || evt == nil {
continue
}
if evt.Reason == "Unhealthy" && evt.Message != "" {
failures = append(failures, common.Failure{
Text: evt.Message,
Sensitive: []common.Sensitive{},
})
}
}
}
}
return failures
}
func isErrorReason(reason string) bool {
failureReasons := []string{
"CrashLoopBackOff", "ImagePullBackOff", "CreateContainerConfigError", "PreCreateHookError", "CreateContainerError",

View File

@@ -15,144 +15,357 @@ package analyzer
import (
"context"
"sort"
"testing"
"github.com/k8sgpt-ai/k8sgpt/pkg/common"
"github.com/k8sgpt-ai/k8sgpt/pkg/kubernetes"
"github.com/magiconair/properties/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)
func TestPodAnalyzer(t *testing.T) {
clientset := fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example",
Namespace: "default",
Annotations: map[string]string{},
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
},
tests := []struct {
name string
config common.Analyzer
expectations []struct {
name string
failuresCount int
}
}{
{
name: "Pending pods, namespace filtering and readiness probe failure",
config: common.Analyzer{
Client: &kubernetes.Client{
Client: fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod1",
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
// This condition will contribute to failures.
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
},
{
// This condition won't contribute to failures.
Type: v1.PodScheduled,
Reason: "Unexpected failure",
},
},
},
},
&v1.Pod{
// This pod won't be selected because of namespace filtering.
ObjectMeta: metav1.ObjectMeta{
Name: "Pod2",
Namespace: "test",
},
},
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod3",
Namespace: "default",
},
Status: v1.PodStatus{
// When pod is Running but its ReadinessProbe fails
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Ready: false,
},
},
},
},
&v1.Event{
ObjectMeta: metav1.ObjectMeta{
Name: "Event1",
Namespace: "default",
},
InvolvedObject: v1.ObjectReference{
Kind: "Pod",
Name: "Pod3",
Namespace: "default",
},
Reason: "Unhealthy",
Message: "readiness probe failed: the detail reason here ...",
Source: v1.EventSource{Component: "eventTest"},
Count: 1,
Type: v1.EventTypeWarning,
},
),
},
},
},
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example2",
Context: context.Background(),
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "example2",
Ready: false,
},
expectations: []struct {
name string
failuresCount int
}{
{
name: "default/Pod1",
failuresCount: 1,
},
Conditions: []v1.PodCondition{
{
Type: v1.ContainersReady,
Reason: "ContainersNotReady",
Message: "containers with unready status: [example2]",
},
{
name: "default/Pod3",
failuresCount: 1,
},
},
},
// simulate event: 30s Warning Unhealthy pod/my-nginx-7fb4dbcf47-4ch4w Readiness probe failed: bash: xxxx: command not found
&v1.Event{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
{
name: "readiness probe failure without any event",
config: common.Analyzer{
Client: &kubernetes.Client{
Client: fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod1",
Namespace: "default",
},
Status: v1.PodStatus{
// When pod is Running but its ReadinessProbe fails
// It won't contribute to any failures because
// there's no event present.
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Ready: false,
},
},
},
},
),
},
Context: context.Background(),
Namespace: "default",
},
InvolvedObject: v1.ObjectReference{
Kind: "Pod",
Name: "example2",
Namespace: "default",
UID: "differentUid",
APIVersion: "v1",
},
Reason: "Unhealthy",
Message: "readiness probe failed: the detail reason here ...",
Source: v1.EventSource{Component: "eventTest"},
Count: 1,
Type: v1.EventTypeWarning,
})
config := common.Analyzer{
Client: &kubernetes.Client{
Client: clientset,
},
Context: context.Background(),
Namespace: "default",
}
podAnalyzer := PodAnalyzer{}
var analysisResults []common.Result
analysisResults, err := podAnalyzer.Analyze(config)
if err != nil {
t.Error(err)
}
assert.Equal(t, len(analysisResults), 2)
}
func TestPodAnalyzerNamespaceFiltering(t *testing.T) {
clientset := fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example",
Namespace: "default",
Annotations: map[string]string{},
{
name: "Init container status state waiting",
config: common.Analyzer{
Client: &kubernetes.Client{
Client: fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod1",
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
InitContainerStatuses: []v1.ContainerStatus{
{
Ready: true,
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{
StartedAt: metav1.Now(),
},
},
},
{
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// This represents a container that is still being created or blocked due to conditions such as OOMKilled
Reason: "ContainerCreating",
},
},
},
},
},
},
&v1.Event{
ObjectMeta: metav1.ObjectMeta{
Name: "Event1",
Namespace: "default",
},
InvolvedObject: v1.ObjectReference{
Kind: "Pod",
Name: "Pod1",
Namespace: "default",
},
Reason: "FailedCreatePodSandBox",
Message: "failed to create the pod sandbox ...",
Type: v1.EventTypeWarning,
},
),
},
Context: context.Background(),
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
},
expectations: []struct {
name string
failuresCount int
}{
{
name: "default/Pod1",
failuresCount: 1,
},
},
},
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example",
Namespace: "other-namespace",
Annotations: map[string]string{},
{
name: "Container status state waiting but no event reported",
config: common.Analyzer{
Client: &kubernetes.Client{
Client: fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod1",
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
ContainerStatuses: []v1.ContainerStatus{
{
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// This represents a container that is still being created or blocked due to conditions such as OOMKilled
Reason: "ContainerCreating",
},
},
},
},
},
},
),
},
Context: context.Background(),
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
},
},
{
name: "Container status state waiting",
config: common.Analyzer{
Client: &kubernetes.Client{
Client: fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod1",
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodPending,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "Container1",
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// This represents a container that is still being created or blocked due to conditions such as OOMKilled
Reason: "ContainerCreating",
},
},
},
{
Name: "Container2",
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled
Reason: "CrashLoopBackOff",
},
},
LastTerminationState: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
Reason: "test reason",
},
},
},
{
Name: "Container3",
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// This won't contribute to failures.
Reason: "RandomReason",
Message: "This container won't be present in the failures",
},
},
},
{
Name: "Container4",
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// Valid error reason.
Reason: "PreStartHookError",
Message: "Container4 encountered PreStartHookError",
},
},
},
{
Name: "Container5",
Ready: false,
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
// Valid error reason.
Reason: "CrashLoopBackOff",
Message: "Container4 encountered CrashLoopBackOff",
},
},
},
},
},
},
&v1.Event{
ObjectMeta: metav1.ObjectMeta{
Name: "Event1",
Namespace: "default",
},
InvolvedObject: v1.ObjectReference{
Kind: "Pod",
Name: "Pod1",
Namespace: "default",
},
// This reason won't contribute to failures.
Reason: "RandomEvent",
Type: v1.EventTypeWarning,
},
),
},
Context: context.Background(),
Namespace: "default",
},
expectations: []struct {
name string
failuresCount int
}{
{
name: "default/Pod1",
failuresCount: 3,
},
},
})
config := common.Analyzer{
Client: &kubernetes.Client{
Client: clientset,
},
Context: context.Background(),
Namespace: "default",
}
podAnalyzer := PodAnalyzer{}
var analysisResults []common.Result
analysisResults, err := podAnalyzer.Analyze(config)
if err != nil {
t.Error(err)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
results, err := podAnalyzer.Analyze(tt.config)
require.NoError(t, err)
if tt.expectations == nil {
require.Equal(t, 0, len(results))
} else {
sort.Slice(results, func(i, j int) bool {
return results[i].Name < results[j].Name
})
require.Equal(t, len(tt.expectations), len(results))
for i, result := range results {
require.Equal(t, tt.expectations[i].name, result.Name)
require.Equal(t, tt.expectations[i].failuresCount, len(result.Error))
}
}
})
}
assert.Equal(t, len(analysisResults), 1)
}