From 0b7ddf5e3b93e56ea92dfb6447e97c067cad9e54 Mon Sep 17 00:00:00 2001
From: Kay Yan <kay.yan@daocloud.io>
Date: Wed, 14 May 2025 16:22:05 +0800
Subject: [PATCH] feat: new job analyzer (#1506)

Signed-off-by: Kay Yan <kay.yan@daocloud.io>
---
 README.md                |   1 +
 pkg/analyzer/analyzer.go |   1 +
 pkg/analyzer/job.go      | 107 +++++++++++++++++++
 pkg/analyzer/job_test.go | 215 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 324 insertions(+)
 create mode 100644 pkg/analyzer/job.go
 create mode 100644 pkg/analyzer/job_test.go

diff --git a/README.md b/README.md
index 0817017..c96d6fc 100644
--- a/README.md
+++ b/README.md
@@ -252,6 +252,7 @@ you will be able to write your own analyzers.
 - [x] ingressAnalyzer
 - [x] statefulSetAnalyzer
 - [x] deploymentAnalyzer
+- [x] jobAnalyzer
 - [x] cronJobAnalyzer
 - [x] nodeAnalyzer
 - [x] mutatingWebhookAnalyzer
diff --git a/pkg/analyzer/analyzer.go b/pkg/analyzer/analyzer.go
index e6ec2c2..12c5424 100644
--- a/pkg/analyzer/analyzer.go
+++ b/pkg/analyzer/analyzer.go
@@ -39,6 +39,7 @@ var coreAnalyzerMap = map[string]common.IAnalyzer{
 	"Service":                        ServiceAnalyzer{},
 	"Ingress":                        IngressAnalyzer{},
 	"StatefulSet":                    StatefulSetAnalyzer{},
+	"Job":                            JobAnalyzer{},
 	"CronJob":                        CronJobAnalyzer{},
 	"Node":                           NodeAnalyzer{},
 	"ValidatingWebhookConfiguration": ValidatingWebhookAnalyzer{},
diff --git a/pkg/analyzer/job.go b/pkg/analyzer/job.go
new file mode 100644
index 0000000..42a650c
--- /dev/null
+++ b/pkg/analyzer/job.go
@@ -0,0 +1,107 @@
+/*
+Copyright 2025 The K8sGPT Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package analyzer
+
+import (
+	"fmt"
+
+	"github.com/k8sgpt-ai/k8sgpt/pkg/common"
+	"github.com/k8sgpt-ai/k8sgpt/pkg/kubernetes"
+	"github.com/k8sgpt-ai/k8sgpt/pkg/util"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+)
+
+type JobAnalyzer struct{}
+
+func (analyzer JobAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
+
+	kind := "Job"
+	apiDoc := kubernetes.K8sApiReference{
+		Kind: kind,
+		ApiVersion: schema.GroupVersion{
+			Group:   "batch",
+			Version: "v1",
+		},
+		OpenapiSchema: a.OpenapiSchema,
+	}
+
+	AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{
+		"analyzer_name": kind,
+	})
+
+	JobList, err := a.Client.GetClient().BatchV1().Jobs(a.Namespace).List(a.Context, v1.ListOptions{LabelSelector: a.LabelSelector})
+	if err != nil {
+		return nil, err
+	}
+
+	var preAnalysis = map[string]common.PreAnalysis{}
+
+	for _, Job := range JobList.Items {
+		var failures []common.Failure
+		if Job.Spec.Suspend != nil && *Job.Spec.Suspend {
+			doc := apiDoc.GetApiDocV2("spec.suspend")
+
+			failures = append(failures, common.Failure{
+				Text:          fmt.Sprintf("Job %s is suspended", Job.Name),
+				KubernetesDoc: doc,
+				Sensitive: []common.Sensitive{
+					{
+						Unmasked: Job.Namespace,
+						Masked:   util.MaskString(Job.Namespace),
+					},
+					{
+						Unmasked: Job.Name,
+						Masked:   util.MaskString(Job.Name),
+					},
+				},
+			})
+		}
+		if Job.Status.Failed > 0 {
+			doc := apiDoc.GetApiDocV2("status.failed")
+			failures = append(failures, common.Failure{
+				Text:          fmt.Sprintf("Job %s has failed", Job.Name),
+				KubernetesDoc: doc,
+				Sensitive: []common.Sensitive{
+					{
+						Unmasked: Job.Namespace,
+						Masked:   util.MaskString(Job.Namespace),
+					},
+					{
+						Unmasked: Job.Name,
+						Masked:   util.MaskString(Job.Name),
+					},
+				},
+			})
+		}
+
+		if len(failures) > 0 {
+			preAnalysis[fmt.Sprintf("%s/%s", Job.Namespace, Job.Name)] = common.PreAnalysis{
+				FailureDetails: failures,
+			}
+			AnalyzerErrorsMetric.WithLabelValues(kind, Job.Name, Job.Namespace).Set(float64(len(failures)))
+		}
+	}
+
+	for key, value := range preAnalysis {
+		currentAnalysis := common.Result{
+			Kind:  kind,
+			Name:  key,
+			Error: value.FailureDetails,
+		}
+		a.Results = append(a.Results, currentAnalysis)
+	}
+
+	return a.Results, nil
+}
diff --git a/pkg/analyzer/job_test.go b/pkg/analyzer/job_test.go
new file mode 100644
index 0000000..5db2ecd
--- /dev/null
+++ b/pkg/analyzer/job_test.go
@@ -0,0 +1,215 @@
+/*
+Copyright 2025 The K8sGPT Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package analyzer
+
+import (
+	"context"
+	"sort"
+	"testing"
+
+	"github.com/k8sgpt-ai/k8sgpt/pkg/common"
+	"github.com/k8sgpt-ai/k8sgpt/pkg/kubernetes"
+	"github.com/stretchr/testify/require"
+	batchv1 "k8s.io/api/batch/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+func TestJobAnalyzer(t *testing.T) {
+	tests := []struct {
+		name         string
+		config       common.Analyzer
+		expectations []struct {
+			name          string
+			failuresCount int
+		}
+	}{
+		{
+			name: "Suspended Job",
+			config: common.Analyzer{
+				Client: &kubernetes.Client{
+					Client: fake.NewSimpleClientset(
+						&batchv1.Job{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "suspended-job",
+								Namespace: "default",
+							},
+							Spec: batchv1.JobSpec{
+								Suspend: boolPtr(true),
+							},
+						},
+					),
+				},
+				Context:   context.Background(),
+				Namespace: "default",
+			},
+			expectations: []struct {
+				name          string
+				failuresCount int
+			}{
+				{
+					name:          "default/suspended-job",
+					failuresCount: 1, // One failure for being suspended
+				},
+			},
+		},
+
+		{
+			name: "Failed Job",
+			config: common.Analyzer{
+				Client: &kubernetes.Client{
+					Client: fake.NewSimpleClientset(
+						&batchv1.Job{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "failed-job",
+								Namespace: "default",
+							},
+							Spec: batchv1.JobSpec{},
+							Status: batchv1.JobStatus{
+								Failed: 1,
+							},
+						},
+					),
+				},
+				Context:   context.Background(),
+				Namespace: "default",
+			},
+			expectations: []struct {
+				name          string
+				failuresCount int
+			}{
+				{
+					name:          "default/failed-job",
+					failuresCount: 1, // One failure for failed job
+				},
+			},
+		},
+		{
+			name: "Valid Job",
+			config: common.Analyzer{
+				Client: &kubernetes.Client{
+					Client: fake.NewSimpleClientset(
+						&batchv1.Job{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "valid-job",
+								Namespace: "default",
+							},
+							Spec: batchv1.JobSpec{},
+						},
+					),
+				},
+				Context:   context.Background(),
+				Namespace: "default",
+			},
+			expectations: []struct {
+				name          string
+				failuresCount int
+			}{
+				// No expectations for valid job
+			},
+		},
+		{
+			name: "Multiple issues",
+			config: common.Analyzer{
+				Client: &kubernetes.Client{
+					Client: fake.NewSimpleClientset(
+						&batchv1.Job{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "multiple-issues",
+								Namespace: "default",
+							},
+							Spec: batchv1.JobSpec{
+								Suspend: boolPtr(true),
+							},
+							Status: batchv1.JobStatus{
+								Failed: 1,
+							},
+						},
+					),
+				},
+				Context:   context.Background(),
+				Namespace: "default",
+			},
+			expectations: []struct {
+				name          string
+				failuresCount int
+			}{
+				{
+					name:          "default/multiple-issues",
+					failuresCount: 2, // Two failures: suspended and failed job
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			analyzer := JobAnalyzer{}
+			results, err := analyzer.Analyze(tt.config)
+			require.NoError(t, err)
+			require.Len(t, results, len(tt.expectations))
+
+			// Sort results by name for consistent comparison
+			sort.Slice(results, func(i, j int) bool {
+				return results[i].Name < results[j].Name
+			})
+
+			for i, expectation := range tt.expectations {
+				require.Equal(t, expectation.name, results[i].Name)
+				require.Len(t, results[i].Error, expectation.failuresCount)
+			}
+		})
+	}
+}
+
+func TestJobAnalyzerLabelSelector(t *testing.T) {
+	clientSet := fake.NewSimpleClientset(
+		&batchv1.Job{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "job-with-label",
+				Namespace: "default",
+				Labels: map[string]string{
+					"app": "test",
+				},
+			},
+			Spec: batchv1.JobSpec{},
+			Status: batchv1.JobStatus{
+				Failed: 1,
+			},
+		},
+		&batchv1.Job{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "job-without-label",
+				Namespace: "default",
+			},
+			Spec: batchv1.JobSpec{},
+		},
+	)
+
+	// Test with label selector
+	config := common.Analyzer{
+		Client: &kubernetes.Client{
+			Client: clientSet,
+		},
+		Context:       context.Background(),
+		Namespace:     "default",
+		LabelSelector: "app=test",
+	}
+
+	analyzer := JobAnalyzer{}
+	results, err := analyzer.Analyze(config)
+	require.NoError(t, err)
+	require.Equal(t, 1, len(results))
+	require.Equal(t, "default/job-with-label", results[0].Name)
+}