diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go
index 1a844495450..b23d021cfeb 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go
@@ -482,7 +482,13 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
 // evenly distributed and remainder CPUs are allocated. The subset with the
 // lowest "balance score" will receive the CPUs in order to keep the overall
 // allocation of CPUs as "balanced" as possible.
-func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
+//
+// NOTE: This algorithm has been generalized to take an additional
+// 'cpuGroupSize' parameter to ensure that CPUs are always allocated in groups
+// of size 'cpuGroupSize' according to the algorithm described above. This is
+// important, for example, to ensure that all CPUs (i.e. all hyperthreads) from
+// a single core are allocated together.
+func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int) (cpuset.CPUSet, error) {
 	acc := newCPUAccumulator(topo, availableCPUs, numCPUs)
 	if acc.isSatisfied() {
 		return acc.result, nil
@@ -519,9 +525,20 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 				return
 			}
 
-			// Check that each NUMA node in this combination can provide
-			// (at least) numCPUs/len(combo) of the total cpus required.
-			distribution := numCPUs / len(combo)
+			// Check that CPUs can be handed out in groups of size
+			// 'cpuGroupSize' across the NUMA nodes in this combo.
+			numCPUGroups := 0
+			for _, numa := range combo {
+				numCPUGroups += (acc.details.CPUsInNUMANodes(numa).Size() / cpuGroupSize)
+			}
+			if (numCPUGroups * cpuGroupSize) < numCPUs {
+				return
+			}
+
+			// Check that each NUMA node in this combination can allocate an
+			// even distribution of CPUs in groups of size 'cpuGroupSize',
+			// modulo some remainder.
+			distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize
 			for _, numa := range combo {
 				cpus := acc.details.CPUsInNUMANodes(numa)
 				if cpus.Size() < distribution {
@@ -529,17 +546,19 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 				}
 			}
 
-			// Calculate how many CPUs will be available on each NUMA node
-			// in 'combo' ater allocating an even distribution of CPUs from
-			// them. This will be used to calculate a "balance" score for the
-			// combo to help decide which combo should ultimately be chosen.
+			// Calculate how many CPUs will be available on each NUMA node in
+			// 'combo' after allocating an even distribution of CPU groups of
+			// size 'cpuGroupSize' from them. This will be used in the "balance
+			// score" calculation to help decide if this combo should
+			// ultimately be chosen.
 			availableAfterAllocation := make(mapIntInt, len(combo))
 			for _, numa := range combo {
 				availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() - distribution
 			}
 
 			// Check if there are any remaining CPUs to distribute across the
-			// NUMA nodes once CPUs have been evenly distributed.
+			// NUMA nodes once CPUs have been evenly distributed in groups of
+			// size 'cpuGroupSize'.
 			remainder := numCPUs - (distribution * len(combo))
 
 			// Declare a set of local variables to help track the "balance
@@ -558,19 +577,22 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 
 			// Otherwise, find the best "balance score" when allocating the
 			// remainder CPUs across different subsets of NUMA nodes in 'combo'.
-			acc.iterateCombinations(combo, remainder, func(subset []int) {
+			// These remainder CPUs are handed out in groups of size 'cpuGroupSize'.
+			acc.iterateCombinations(combo, remainder/cpuGroupSize, func(subset []int) {
 				// Make a local copy of 'availableAfterAllocation'.
 				availableAfterAllocation := availableAfterAllocation.Clone()
 
-				// For all NUMA nodes in 'subset', remove 1 more CPU (to account
-				// for any remainder CPUs that will be allocated on them.
+				// For all NUMA nodes in 'subset', remove another
+				// 'cpuGroupSize' number of CPUs (to account for any remainder
+				// CPUs that will be allocated on them).
 				for _, numa := range subset {
-					availableAfterAllocation[numa] -= 1
+					availableAfterAllocation[numa] -= cpuGroupSize
 				}
 
 				// Calculate the "balance score" as the standard deviation of
 				// the number of CPUs available on all NUMA nodes in 'combo'
-				// assuming the remainder CPUs are spread across 'subset'.
+				// after the remainder CPUs have been allocated across 'subset'
+				// in groups of size 'cpuGroupSize'.
 				balance := standardDeviation(availableAfterAllocation.Values())
 				if balance < bestLocalBalance {
 					bestLocalBalance = balance
@@ -596,16 +618,18 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 		}
 
 		// Otherwise, start allocating CPUs from the NUMA node combination
-		// chosen. First allocate numCPUs / len(bestCombo) CPUs from each node.
-		distribution := numCPUs / len(bestCombo)
+		// chosen. First allocate an even distribution of CPUs in groups of
+		// size 'cpuGroupSize' from 'bestCombo'.
+		distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize
 		for _, numa := range bestCombo {
 			cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution)
 			acc.take(cpus)
 		}
 
-		// Then allocate any remaining CPUs from each NUMA node in the remainder set.
+		// Then allocate any remaining CPUs in groups of size 'cpuGroupSize'
+		// from each NUMA node in the remainder set.
 		for _, numa := range bestRemainder {
-			cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), 1)
+			cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize)
 			acc.take(cpus)
 		}
 
diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go
index 4432be5aa8c..59059feb0cc 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go
@@ -666,14 +666,39 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
 	}
 }
 
-func TestTakeByTopologyNUMADistributed(t *testing.T) {
+type takeByTopologyExtendedTestCase struct {
+	description   string
+	topo          *topology.CPUTopology
+	availableCPUs cpuset.CPUSet
+	numCPUs       int
+	cpuGroupSize  int
+	expErr        string
+	expResult     cpuset.CPUSet
+}
+
+func commonTakeByTopologyExtendedTestCases(t *testing.T) []takeByTopologyExtendedTestCase {
+	var extendedTestCases []takeByTopologyExtendedTestCase
+
 	testCases := commonTakeByTopologyTestCases(t)
-	testCases = append(testCases, []takeByTopologyTestCase{
+	for _, tc := range testCases {
+		extendedTestCases = append(extendedTestCases, takeByTopologyExtendedTestCase{
+			tc.description,
+			tc.topo,
+			tc.availableCPUs,
+			tc.numCPUs,
+			1,
+			tc.expErr,
+			tc.expResult,
+		})
+	}
+
+	extendedTestCases = append(extendedTestCases, []takeByTopologyExtendedTestCase{
 		{
 			"allocate 4 full cores with 2 distributed across each NUMA node",
 			topoDualSocketHT,
 			mustParseCPUSet(t, "0-11"),
 			8,
+			1,
 			"",
 			mustParseCPUSet(t, "0,6,2,8,1,7,3,9"),
 		},
@@ -682,6 +707,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-79"),
 			64,
+			1,
 			"",
 			mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-67,70-77"),
 		},
@@ -690,6 +716,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-79"),
 			48,
+			1,
 			"",
 			mustParseCPUSet(t, "0-7,10-17,20-27,40-47,50-57,60-67"),
 		},
@@ -698,6 +725,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "2-39,42-79"),
 			48,
+			1,
 			"",
 			mustParseCPUSet(t, "2-9,10-17,20-27,42-49,50-57,60-67"),
 		},
@@ -706,6 +734,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "3-39,43-79"),
 			48,
+			1,
 			"",
 			mustParseCPUSet(t, "10-17,20-27,30-37,50-57,60-67,70-77"),
 		},
@@ -714,6 +743,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"),
 			16,
+			1,
 			"",
 			mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"),
 		},
@@ -722,38 +752,77 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"),
 			16,
+			1,
 			"",
 			mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"),
 		},
+	}...)
+
+	return extendedTestCases
+}
+
+func TestTakeByTopologyNUMADistributed(t *testing.T) {
+	testCases := commonTakeByTopologyExtendedTestCases(t)
+	testCases = append(testCases, []takeByTopologyExtendedTestCase{
 		{
 			"allocate 13 full cores distributed across the first 2 NUMA nodes",
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-79"),
 			26,
+			1,
 			"",
 			mustParseCPUSet(t, "0-6,10-16,40-45,50-55"),
 		},
+		{
+			"allocate 13 full cores distributed across the first 2 NUMA nodes (cpuGroupSize 2)",
+			topoDualSocketMultiNumaPerSocketHT,
+			mustParseCPUSet(t, "0-79"),
+			26,
+			2,
+			"",
+			mustParseCPUSet(t, "0-6,10-15,40-46,50-55"),
+		},
 		{
 			"allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 0, 1",
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-79"),
 			62,
+			1,
 			"",
 			mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-66,70-76"),
 		},
+		{
+			"allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 1, 2 (cpuGroupSize 2)",
+			topoDualSocketMultiNumaPerSocketHT,
+			mustParseCPUSet(t, "0-79"),
+			62,
+			2,
+			"",
+			mustParseCPUSet(t, "0-7,10-17,20-27,30-36,40-47,50-57,60-67,70-76"),
+		},
 		{
 			"allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 2, 3 (to keep balance)",
 			topoDualSocketMultiNumaPerSocketHT,
 			mustParseCPUSet(t, "0-8,10-18,20-39,40-48,50-58,60-79"),
 			62,
+			1,
 			"",
 			mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-46,50-56,60-67,70-77"),
 		},
+		{
+			"allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 2, 3 (to keep balance with cpuGroupSize 2)",
+			topoDualSocketMultiNumaPerSocketHT,
+			mustParseCPUSet(t, "0-8,10-18,20-39,40-48,50-58,60-79"),
+			62,
+			2,
+			"",
+			mustParseCPUSet(t, "0-7,10-16,20-27,30-37,40-47,50-56,60-67,70-77"),
+		},
 	}...)
 
 	for _, tc := range testCases {
 		t.Run(tc.description, func(t *testing.T) {
-			result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs)
+			result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize)
 			if tc.expErr != "" && err.Error() != tc.expErr {
 				t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
 			}