mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-07-22 10:31:35 +00:00
kernel: Remove 4.19.x-rt patches and configs
Signed-off-by: Rolf Neugebauer <rn@rneugeba.io>
This commit is contained in:
parent
f98fa5ca41
commit
46f96c5572
@ -256,13 +256,11 @@ $(eval $(call kernel,5.4.28,5.4.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,5.4.28,5.4.x,-rt,))
|
||||
$(eval $(call kernel,4.19.113,4.19.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.19.113,4.19.x,,-dbg))
|
||||
$(eval $(call kernel,4.19.106,4.19.x,-rt,))
|
||||
$(eval $(call kernel,4.14.174,4.14.x,$(EXTRA),$(DEBUG)))
|
||||
|
||||
else ifeq ($(ARCH),aarch64)
|
||||
$(eval $(call kernel,5.4.28,5.4.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,5.4.28,5.4.x,-rt,))
|
||||
$(eval $(call kernel,4.19.106,4.19.x,-rt,))
|
||||
|
||||
else ifeq ($(ARCH),s390x)
|
||||
$(eval $(call kernel,5.4.28,5.4.x,$(EXTRA),$(DEBUG)))
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +0,0 @@
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
||||
CONFIG_HZ_1000=y
|
||||
CONFIG_HZ=1000
|
@ -1,22 +0,0 @@
|
||||
CONFIG_RWSEM_GENERIC_SPINLOCK=y
|
||||
# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
|
||||
CONFIG_PREEMPT_RCU=y
|
||||
CONFIG_TASKS_RCU=y
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
@ -1,209 +0,0 @@
|
||||
From d831f2ac120e802a4ff642f48f6b88e543665514 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:18 +0200
|
||||
Subject: [PATCH 001/328] ARM: at91: add TCB registers definitions
|
||||
|
||||
Add registers and bits definitions for the timer counter blocks found on
|
||||
Atmel ARM SoCs.
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/soc/at91/atmel_tcb.h | 183 +++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 183 insertions(+)
|
||||
create mode 100644 include/soc/at91/atmel_tcb.h
|
||||
|
||||
diff --git a/include/soc/at91/atmel_tcb.h b/include/soc/at91/atmel_tcb.h
|
||||
new file mode 100644
|
||||
index 000000000000..657e234b1483
|
||||
--- /dev/null
|
||||
+++ b/include/soc/at91/atmel_tcb.h
|
||||
@@ -0,0 +1,183 @@
|
||||
+//SPDX-License-Identifier: GPL-2.0
|
||||
+/* Copyright (C) 2018 Microchip */
|
||||
+
|
||||
+#ifndef __SOC_ATMEL_TCB_H
|
||||
+#define __SOC_ATMEL_TCB_H
|
||||
+
|
||||
+/* Channel registers */
|
||||
+#define ATMEL_TC_COFFS(c) ((c) * 0x40)
|
||||
+#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c)
|
||||
+#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4)
|
||||
+#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8)
|
||||
+#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc)
|
||||
+#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10)
|
||||
+#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14)
|
||||
+#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18)
|
||||
+#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c)
|
||||
+#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20)
|
||||
+#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24)
|
||||
+#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28)
|
||||
+#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c)
|
||||
+#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30)
|
||||
+
|
||||
+/* Block registers */
|
||||
+#define ATMEL_TC_BCR 0xc0
|
||||
+#define ATMEL_TC_BMR 0xc4
|
||||
+#define ATMEL_TC_QIER 0xc8
|
||||
+#define ATMEL_TC_QIDR 0xcc
|
||||
+#define ATMEL_TC_QIMR 0xd0
|
||||
+#define ATMEL_TC_QISR 0xd4
|
||||
+#define ATMEL_TC_FMR 0xd8
|
||||
+#define ATMEL_TC_WPMR 0xe4
|
||||
+
|
||||
+/* CCR fields */
|
||||
+#define ATMEL_TC_CCR_CLKEN BIT(0)
|
||||
+#define ATMEL_TC_CCR_CLKDIS BIT(1)
|
||||
+#define ATMEL_TC_CCR_SWTRG BIT(2)
|
||||
+
|
||||
+/* Common CMR fields */
|
||||
+#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0)
|
||||
+#define ATMEL_TC_CMR_TCLK(x) (x)
|
||||
+#define ATMEL_TC_CMR_XC(x) ((x) + 5)
|
||||
+#define ATMEL_TC_CMR_CLKI BIT(3)
|
||||
+#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4)
|
||||
+#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4)
|
||||
+#define ATMEL_TC_CMR_WAVE BIT(15)
|
||||
+
|
||||
+/* Capture mode CMR fields */
|
||||
+#define ATMEL_TC_CMR_LDBSTOP BIT(6)
|
||||
+#define ATMEL_TC_CMR_LDBDIS BIT(7)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8)
|
||||
+#define ATMEL_TC_CMR_ABETRG BIT(10)
|
||||
+#define ATMEL_TC_CMR_CPCTRG BIT(14)
|
||||
+#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_NONE (0 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_RISING (1 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_NONE (0 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_RISING (1 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18)
|
||||
+#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20)
|
||||
+#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20)
|
||||
+
|
||||
+/* Waveform mode CMR fields */
|
||||
+#define ATMEL_TC_CMR_CPCSTOP BIT(6)
|
||||
+#define ATMEL_TC_CMR_CPCDIS BIT(7)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10)
|
||||
+#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10)
|
||||
+#define ATMEL_TC_CMR_ENETRG BIT(12)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13)
|
||||
+#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16)
|
||||
+#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16)
|
||||
+#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18)
|
||||
+#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18)
|
||||
+#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20)
|
||||
+#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20)
|
||||
+#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22)
|
||||
+#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22)
|
||||
+#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24)
|
||||
+#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24)
|
||||
+#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26)
|
||||
+#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26)
|
||||
+#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28)
|
||||
+#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28)
|
||||
+#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30)
|
||||
+#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30)
|
||||
+#define ATMEL_TC_CMR_ACTION_NONE 0
|
||||
+#define ATMEL_TC_CMR_ACTION_SET 1
|
||||
+#define ATMEL_TC_CMR_ACTION_CLEAR 2
|
||||
+#define ATMEL_TC_CMR_ACTION_TOGGLE 3
|
||||
+
|
||||
+/* SMMR fields */
|
||||
+#define ATMEL_TC_SMMR_GCEN BIT(0)
|
||||
+#define ATMEL_TC_SMMR_DOWN BIT(1)
|
||||
+
|
||||
+/* SR/IER/IDR/IMR fields */
|
||||
+#define ATMEL_TC_COVFS BIT(0)
|
||||
+#define ATMEL_TC_LOVRS BIT(1)
|
||||
+#define ATMEL_TC_CPAS BIT(2)
|
||||
+#define ATMEL_TC_CPBS BIT(3)
|
||||
+#define ATMEL_TC_CPCS BIT(4)
|
||||
+#define ATMEL_TC_LDRAS BIT(5)
|
||||
+#define ATMEL_TC_LDRBS BIT(6)
|
||||
+#define ATMEL_TC_ETRGS BIT(7)
|
||||
+#define ATMEL_TC_CLKSTA BIT(16)
|
||||
+#define ATMEL_TC_MTIOA BIT(17)
|
||||
+#define ATMEL_TC_MTIOB BIT(18)
|
||||
+
|
||||
+/* EMR fields */
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4)
|
||||
+#define ATMEL_TC_EMR_NOCLKDIV BIT(8)
|
||||
+
|
||||
+/* BCR fields */
|
||||
+#define ATMEL_TC_BCR_SYNC BIT(0)
|
||||
+
|
||||
+/* BMR fields */
|
||||
+#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2)
|
||||
+#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c)))
|
||||
+#define ATMEL_TC_BMR_QDEN BIT(8)
|
||||
+#define ATMEL_TC_BMR_POSEN BIT(9)
|
||||
+#define ATMEL_TC_BMR_SPEEDEN BIT(10)
|
||||
+#define ATMEL_TC_BMR_QDTRANS BIT(11)
|
||||
+#define ATMEL_TC_BMR_EDGPHA BIT(12)
|
||||
+#define ATMEL_TC_BMR_INVA BIT(13)
|
||||
+#define ATMEL_TC_BMR_INVB BIT(14)
|
||||
+#define ATMEL_TC_BMR_INVIDX BIT(15)
|
||||
+#define ATMEL_TC_BMR_SWAP BIT(16)
|
||||
+#define ATMEL_TC_BMR_IDXPHB BIT(17)
|
||||
+#define ATMEL_TC_BMR_AUTOC BIT(18)
|
||||
+#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20)
|
||||
+#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20)
|
||||
+#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26)
|
||||
+#define ATMEL_TC_MAXCMP(x) ((x) << 26)
|
||||
+
|
||||
+/* QEDC fields */
|
||||
+#define ATMEL_TC_QEDC_IDX BIT(0)
|
||||
+#define ATMEL_TC_QEDC_DIRCHG BIT(1)
|
||||
+#define ATMEL_TC_QEDC_QERR BIT(2)
|
||||
+#define ATMEL_TC_QEDC_MPE BIT(3)
|
||||
+#define ATMEL_TC_QEDC_DIR BIT(8)
|
||||
+
|
||||
+/* FMR fields */
|
||||
+#define ATMEL_TC_FMR_ENCF(x) BIT(x)
|
||||
+
|
||||
+/* WPMR fields */
|
||||
+#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8)
|
||||
+#define ATMEL_TC_WPMR_WPEN BIT(0)
|
||||
+
|
||||
+static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
|
||||
+
|
||||
+static const struct of_device_id atmel_tcb_dt_ids[] = {
|
||||
+ {
|
||||
+ .compatible = "atmel,at91rm9200-tcb",
|
||||
+ .data = (void *)16,
|
||||
+ }, {
|
||||
+ .compatible = "atmel,at91sam9x5-tcb",
|
||||
+ .data = (void *)32,
|
||||
+ }, {
|
||||
+ /* sentinel */
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+#endif /* __SOC_ATMEL_TCB_H */
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,484 +0,0 @@
|
||||
From a8f6e3cf352d669d8b870469ab3bff8fc64c3367 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:19 +0200
|
||||
Subject: [PATCH 002/328] clocksource/drivers: Add a new driver for the Atmel
|
||||
ARM TC blocks
|
||||
|
||||
Add a driver for the Atmel Timer Counter Blocks. This driver provides a
|
||||
clocksource and two clockevent devices.
|
||||
|
||||
One of the clockevent device is linked to the clocksource counter and so it
|
||||
will run at the same frequency. This will be used when there is only on TCB
|
||||
channel available for timers.
|
||||
|
||||
The other clockevent device runs on a separate TCB channel when available.
|
||||
|
||||
This driver uses regmap and syscon to be able to probe early in the boot
|
||||
and avoid having to switch on the TCB clocksource later. Using regmap also
|
||||
means that unused TCB channels may be used by other drivers (PWM for
|
||||
example). read/writel are still used to access channel specific registers
|
||||
to avoid the performance impact of regmap (mainly locking).
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/Kconfig | 8 +
|
||||
drivers/clocksource/Makefile | 3 +-
|
||||
drivers/clocksource/timer-atmel-tcb.c | 410 ++++++++++++++++++++++++++
|
||||
3 files changed, 420 insertions(+), 1 deletion(-)
|
||||
create mode 100644 drivers/clocksource/timer-atmel-tcb.c
|
||||
|
||||
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
|
||||
index 4d37f018d846..0ab22e7037f4 100644
|
||||
--- a/drivers/clocksource/Kconfig
|
||||
+++ b/drivers/clocksource/Kconfig
|
||||
@@ -415,6 +415,14 @@ config ATMEL_ST
|
||||
help
|
||||
Support for the Atmel ST timer.
|
||||
|
||||
+config ATMEL_ARM_TCB_CLKSRC
|
||||
+ bool "Microchip ARM TC Block" if COMPILE_TEST
|
||||
+ select REGMAP_MMIO
|
||||
+ depends on GENERIC_CLOCKEVENTS
|
||||
+ help
|
||||
+ This enables build of clocksource and clockevent driver for
|
||||
+ the integrated Timer Counter Blocks in Microchip ARM SoCs.
|
||||
+
|
||||
config CLKSRC_EXYNOS_MCT
|
||||
bool "Exynos multi core timer driver" if COMPILE_TEST
|
||||
depends on ARM || ARM64
|
||||
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
|
||||
index db51b2427e8a..0df9384a1230 100644
|
||||
--- a/drivers/clocksource/Makefile
|
||||
+++ b/drivers/clocksource/Makefile
|
||||
@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
|
||||
obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
|
||||
obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
|
||||
obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
|
||||
-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
|
||||
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
|
||||
+obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
|
||||
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
|
||||
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
|
||||
obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
|
||||
diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c
|
||||
new file mode 100644
|
||||
index 000000000000..21fbe430f91b
|
||||
--- /dev/null
|
||||
+++ b/drivers/clocksource/timer-atmel-tcb.c
|
||||
@@ -0,0 +1,410 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+#include <linux/clk.h>
|
||||
+#include <linux/clockchips.h>
|
||||
+#include <linux/clocksource.h>
|
||||
+#include <linux/interrupt.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/mfd/syscon.h>
|
||||
+#include <linux/of_address.h>
|
||||
+#include <linux/of_irq.h>
|
||||
+#include <linux/regmap.h>
|
||||
+#include <linux/sched_clock.h>
|
||||
+#include <soc/at91/atmel_tcb.h>
|
||||
+
|
||||
+struct atmel_tcb_clksrc {
|
||||
+ struct clocksource clksrc;
|
||||
+ struct clock_event_device clkevt;
|
||||
+ struct regmap *regmap;
|
||||
+ void __iomem *base;
|
||||
+ struct clk *clk[2];
|
||||
+ char name[20];
|
||||
+ int channels[2];
|
||||
+ int bits;
|
||||
+ int irq;
|
||||
+ struct {
|
||||
+ u32 cmr;
|
||||
+ u32 imr;
|
||||
+ u32 rc;
|
||||
+ bool clken;
|
||||
+ } cache[2];
|
||||
+ u32 bmr_cache;
|
||||
+ bool registered;
|
||||
+ bool clk_enabled;
|
||||
+};
|
||||
+
|
||||
+static struct atmel_tcb_clksrc tc;
|
||||
+
|
||||
+static struct clk *tcb_clk_get(struct device_node *node, int channel)
|
||||
+{
|
||||
+ struct clk *clk;
|
||||
+ char clk_name[] = "t0_clk";
|
||||
+
|
||||
+ clk_name[1] += channel;
|
||||
+ clk = of_clk_get_by_name(node->parent, clk_name);
|
||||
+ if (!IS_ERR(clk))
|
||||
+ return clk;
|
||||
+
|
||||
+ return of_clk_get_by_name(node->parent, "t0_clk");
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Clocksource and clockevent using the same channel(s)
|
||||
+ */
|
||||
+static u64 tc_get_cycles(struct clocksource *cs)
|
||||
+{
|
||||
+ u32 lower, upper;
|
||||
+
|
||||
+ do {
|
||||
+ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
|
||||
+ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
|
||||
+
|
||||
+ return (upper << 16) | lower;
|
||||
+}
|
||||
+
|
||||
+static u64 tc_get_cycles32(struct clocksource *cs)
|
||||
+{
|
||||
+ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+}
|
||||
+
|
||||
+static u64 notrace tc_sched_clock_read(void)
|
||||
+{
|
||||
+ return tc_get_cycles(&tc.clksrc);
|
||||
+}
|
||||
+
|
||||
+static u64 notrace tc_sched_clock_read32(void)
|
||||
+{
|
||||
+ return tc_get_cycles32(&tc.clksrc);
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_next_event(unsigned long delta,
|
||||
+ struct clock_event_device *d)
|
||||
+{
|
||||
+ u32 old, next, cur;
|
||||
+
|
||||
+ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+ next = old + delta;
|
||||
+ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
|
||||
+ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+
|
||||
+ /* check whether the delta elapsed while setting the register */
|
||||
+ if ((next < old && cur < old && cur > next) ||
|
||||
+ (next > old && (cur < old || cur > next))) {
|
||||
+ /*
|
||||
+ * Clear the CPCS bit in the status register to avoid
|
||||
+ * generating a spurious interrupt next time a valid
|
||||
+ * timer event is configured.
|
||||
+ */
|
||||
+ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
|
||||
+ return -ETIME;
|
||||
+ }
|
||||
+
|
||||
+ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static irqreturn_t tc_clkevt_irq(int irq, void *handle)
|
||||
+{
|
||||
+ unsigned int sr;
|
||||
+
|
||||
+ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
|
||||
+ if (sr & ATMEL_TC_CPCS) {
|
||||
+ tc.clkevt.event_handler(&tc.clkevt);
|
||||
+ return IRQ_HANDLED;
|
||||
+ }
|
||||
+
|
||||
+ return IRQ_NONE;
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_oneshot(struct clock_event_device *dev)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(dev))
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Because both clockevent devices may share the same IRQ, we don't want
|
||||
+ * the less likely one to stay requested
|
||||
+ */
|
||||
+ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
|
||||
+ tc.name, &tc);
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_shutdown(struct clock_event_device *dev)
|
||||
+{
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
|
||||
+ if (tc.bits == 16)
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
|
||||
+
|
||||
+ if (!clockevent_state_detached(dev))
|
||||
+ free_irq(tc.irq, &tc);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
|
||||
+ int mck_divisor_idx)
|
||||
+{
|
||||
+ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
|
||||
+ writel(mck_divisor_idx /* likely divide-by-8 */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
|
||||
+ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
|
||||
+ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
|
||||
+ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
|
||||
+ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
|
||||
+
|
||||
+ /* second channel: waveform mode, input TIOA */
|
||||
+ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[1]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
|
||||
+
|
||||
+ /* chain both channel, we assume the previous channel */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BMR,
|
||||
+ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
|
||||
+ /* then reset all the timers */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
|
||||
+ int mck_divisor_idx)
|
||||
+{
|
||||
+ /* channel 0: waveform mode, input mclk/8 */
|
||||
+ writel(mck_divisor_idx /* likely divide-by-8 */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
|
||||
+
|
||||
+ /* then reset all the timers */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static void tc_clksrc_suspend(struct clocksource *cs)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
|
||||
+ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
|
||||
+ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
|
||||
+ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
|
||||
+ tc.cache[i].clken = !!(readl(tc.base +
|
||||
+ ATMEL_TC_SR(tc.channels[i])) &
|
||||
+ ATMEL_TC_CLKSTA);
|
||||
+ }
|
||||
+
|
||||
+ if (tc.bits == 16)
|
||||
+ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
|
||||
+}
|
||||
+
|
||||
+static void tc_clksrc_resume(struct clocksource *cs)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
|
||||
+ /* Restore registers for the channel, RA and RB are not used */
|
||||
+ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
|
||||
+ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
|
||||
+ /* Disable all the interrupts */
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
|
||||
+ /* Reenable interrupts that were enabled before suspending */
|
||||
+ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
|
||||
+
|
||||
+ /* Start the clock if it was used */
|
||||
+ if (tc.cache[i].clken)
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc.base +
|
||||
+ ATMEL_TC_CCR(tc.channels[i]));
|
||||
+ }
|
||||
+
|
||||
+ /* in case of dual channel, chain channels */
|
||||
+ if (tc.bits == 16)
|
||||
+ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
|
||||
+ /* Finally, trigger all the channels*/
|
||||
+ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static int __init tcb_clksrc_register(struct device_node *node,
|
||||
+ struct regmap *regmap, void __iomem *base,
|
||||
+ int channel, int channel1, int irq,
|
||||
+ int bits)
|
||||
+{
|
||||
+ u32 rate, divided_rate = 0;
|
||||
+ int best_divisor_idx = -1;
|
||||
+ int i, err = -1;
|
||||
+ u64 (*tc_sched_clock)(void);
|
||||
+
|
||||
+ tc.regmap = regmap;
|
||||
+ tc.base = base;
|
||||
+ tc.channels[0] = channel;
|
||||
+ tc.channels[1] = channel1;
|
||||
+ tc.irq = irq;
|
||||
+ tc.bits = bits;
|
||||
+
|
||||
+ tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
|
||||
+ if (IS_ERR(tc.clk[0]))
|
||||
+ return PTR_ERR(tc.clk[0]);
|
||||
+ err = clk_prepare_enable(tc.clk[0]);
|
||||
+ if (err) {
|
||||
+ pr_debug("can't enable T0 clk\n");
|
||||
+ goto err_clk;
|
||||
+ }
|
||||
+
|
||||
+ /* How fast will we be counting? Pick something over 5 MHz. */
|
||||
+ rate = (u32)clk_get_rate(tc.clk[0]);
|
||||
+ for (i = 0; i < 5; i++) {
|
||||
+ unsigned int divisor = atmel_tc_divisors[i];
|
||||
+ unsigned int tmp;
|
||||
+
|
||||
+ if (!divisor)
|
||||
+ continue;
|
||||
+
|
||||
+ tmp = rate / divisor;
|
||||
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
|
||||
+ if (best_divisor_idx > 0) {
|
||||
+ if (tmp < 5 * 1000 * 1000)
|
||||
+ continue;
|
||||
+ }
|
||||
+ divided_rate = tmp;
|
||||
+ best_divisor_idx = i;
|
||||
+ }
|
||||
+
|
||||
+ if (tc.bits == 32) {
|
||||
+ tc.clksrc.read = tc_get_cycles32;
|
||||
+ tcb_setup_single_chan(&tc, best_divisor_idx);
|
||||
+ tc_sched_clock = tc_sched_clock_read32;
|
||||
+ snprintf(tc.name, sizeof(tc.name), "%s:%d",
|
||||
+ kbasename(node->parent->full_name), tc.channels[0]);
|
||||
+ } else {
|
||||
+ tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
|
||||
+ if (IS_ERR(tc.clk[1]))
|
||||
+ goto err_disable_t0;
|
||||
+
|
||||
+ err = clk_prepare_enable(tc.clk[1]);
|
||||
+ if (err) {
|
||||
+ pr_debug("can't enable T1 clk\n");
|
||||
+ goto err_clk1;
|
||||
+ }
|
||||
+ tc.clksrc.read = tc_get_cycles,
|
||||
+ tcb_setup_dual_chan(&tc, best_divisor_idx);
|
||||
+ tc_sched_clock = tc_sched_clock_read;
|
||||
+ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
|
||||
+ kbasename(node->parent->full_name), tc.channels[0],
|
||||
+ tc.channels[1]);
|
||||
+ }
|
||||
+
|
||||
+ pr_debug("%s at %d.%03d MHz\n", tc.name,
|
||||
+ divided_rate / 1000000,
|
||||
+ ((divided_rate + 500000) % 1000000) / 1000);
|
||||
+
|
||||
+ tc.clksrc.name = tc.name;
|
||||
+ tc.clksrc.suspend = tc_clksrc_suspend;
|
||||
+ tc.clksrc.resume = tc_clksrc_resume;
|
||||
+ tc.clksrc.rating = 200;
|
||||
+ tc.clksrc.mask = CLOCKSOURCE_MASK(32);
|
||||
+ tc.clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
|
||||
+
|
||||
+ err = clocksource_register_hz(&tc.clksrc, divided_rate);
|
||||
+ if (err)
|
||||
+ goto err_disable_t1;
|
||||
+
|
||||
+ sched_clock_register(tc_sched_clock, 32, divided_rate);
|
||||
+
|
||||
+ tc.registered = true;
|
||||
+
|
||||
+ /* Set up and register clockevents */
|
||||
+ tc.clkevt.name = tc.name;
|
||||
+ tc.clkevt.cpumask = cpumask_of(0);
|
||||
+ tc.clkevt.set_next_event = tcb_clkevt_next_event;
|
||||
+ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
|
||||
+ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
|
||||
+ tc.clkevt.features = CLOCK_EVT_FEAT_ONESHOT;
|
||||
+ tc.clkevt.rating = 125;
|
||||
+
|
||||
+ clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
|
||||
+ BIT(tc.bits) - 1);
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_disable_t1:
|
||||
+ if (tc.bits == 16)
|
||||
+ clk_disable_unprepare(tc.clk[1]);
|
||||
+
|
||||
+err_clk1:
|
||||
+ if (tc.bits == 16)
|
||||
+ clk_put(tc.clk[1]);
|
||||
+
|
||||
+err_disable_t0:
|
||||
+ clk_disable_unprepare(tc.clk[0]);
|
||||
+
|
||||
+err_clk:
|
||||
+ clk_put(tc.clk[0]);
|
||||
+
|
||||
+ pr_err("%s: unable to register clocksource/clockevent\n",
|
||||
+ tc.clksrc.name);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int __init tcb_clksrc_init(struct device_node *node)
|
||||
+{
|
||||
+ const struct of_device_id *match;
|
||||
+ struct regmap *regmap;
|
||||
+ void __iomem *tcb_base;
|
||||
+ u32 channel;
|
||||
+ int irq, err, chan1 = -1;
|
||||
+ unsigned bits;
|
||||
+
|
||||
+ if (tc.registered)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ /*
|
||||
+ * The regmap has to be used to access registers that are shared
|
||||
+ * between channels on the same TCB but we keep direct IO access for
|
||||
+ * the counters to avoid the impact on performance
|
||||
+ */
|
||||
+ regmap = syscon_node_to_regmap(node->parent);
|
||||
+ if (IS_ERR(regmap))
|
||||
+ return PTR_ERR(regmap);
|
||||
+
|
||||
+ tcb_base = of_iomap(node->parent, 0);
|
||||
+ if (!tcb_base) {
|
||||
+ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
|
||||
+ return -ENXIO;
|
||||
+ }
|
||||
+
|
||||
+ match = of_match_node(atmel_tcb_dt_ids, node->parent);
|
||||
+ bits = (uintptr_t)match->data;
|
||||
+
|
||||
+ err = of_property_read_u32_index(node, "reg", 0, &channel);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ irq = of_irq_get(node->parent, channel);
|
||||
+ if (irq < 0) {
|
||||
+ irq = of_irq_get(node->parent, 0);
|
||||
+ if (irq < 0)
|
||||
+ return irq;
|
||||
+ }
|
||||
+
|
||||
+ if (bits == 16) {
|
||||
+ of_property_read_u32_index(node, "reg", 1, &chan1);
|
||||
+ if (chan1 == -1) {
|
||||
+ pr_err("%s: clocksource needs two channels\n",
|
||||
+ node->parent->full_name);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
|
||||
+ bits);
|
||||
+}
|
||||
+TIMER_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer", tcb_clksrc_init);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,270 +0,0 @@
|
||||
From f2e0ea85054574af7f632ca36991c5c1a25a7bfd Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:20 +0200
|
||||
Subject: [PATCH 003/328] clocksource/drivers: timer-atmel-tcb: add clockevent
|
||||
device on separate channel
|
||||
|
||||
Add an other clockevent device that uses a separate TCB channel when
|
||||
available.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/timer-atmel-tcb.c | 217 +++++++++++++++++++++++++-
|
||||
1 file changed, 212 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c
|
||||
index 21fbe430f91b..63ce3b69338a 100644
|
||||
--- a/drivers/clocksource/timer-atmel-tcb.c
|
||||
+++ b/drivers/clocksource/timer-atmel-tcb.c
|
||||
@@ -32,7 +32,7 @@ struct atmel_tcb_clksrc {
|
||||
bool clk_enabled;
|
||||
};
|
||||
|
||||
-static struct atmel_tcb_clksrc tc;
|
||||
+static struct atmel_tcb_clksrc tc, tce;
|
||||
|
||||
static struct clk *tcb_clk_get(struct device_node *node, int channel)
|
||||
{
|
||||
@@ -47,6 +47,203 @@ static struct clk *tcb_clk_get(struct device_node *node, int channel)
|
||||
return of_clk_get_by_name(node->parent, "t0_clk");
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Clockevent device using its own channel
|
||||
+ */
|
||||
+
|
||||
+static void tc_clkevt2_clk_disable(struct clock_event_device *d)
|
||||
+{
|
||||
+ clk_disable(tce.clk[0]);
|
||||
+ tce.clk_enabled = false;
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_clk_enable(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (tce.clk_enabled)
|
||||
+ return;
|
||||
+ clk_enable(tce.clk[0]);
|
||||
+ tce.clk_enabled = true;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_stop(struct clock_event_device *d)
|
||||
+{
|
||||
+ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_shutdown(struct clock_event_device *d)
|
||||
+{
|
||||
+ tc_clkevt2_stop(d);
|
||||
+ if (!clockevent_state_detached(d))
|
||||
+ tc_clkevt2_clk_disable(d);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
+ * because using one of the divided clocks would usually mean the
|
||||
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
+ *
|
||||
+ * A divided clock could be good for high resolution timers, since
|
||||
+ * 30.5 usec resolution can seem "low".
|
||||
+ */
|
||||
+static int tc_clkevt2_set_oneshot(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
+ tc_clkevt2_stop(d);
|
||||
+
|
||||
+ tc_clkevt2_clk_enable(d);
|
||||
+
|
||||
+ /* slow clock, count up to RC, then irq and stop */
|
||||
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP |
|
||||
+ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC,
|
||||
+ tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_set_periodic(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
+ tc_clkevt2_stop(d);
|
||||
+
|
||||
+ /* By not making the gentime core emulate periodic mode on top
|
||||
+ * of oneshot, we get lower overhead and improved accuracy.
|
||||
+ */
|
||||
+ tc_clkevt2_clk_enable(d);
|
||||
+
|
||||
+ /* slow clock, count up to RC, then irq and restart */
|
||||
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE |
|
||||
+ ATMEL_TC_CMR_WAVESEL_UPRC,
|
||||
+ tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+
|
||||
+ /* Enable clock and interrupts on RC compare */
|
||||
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_next_event(unsigned long delta,
|
||||
+ struct clock_event_device *d)
|
||||
+{
|
||||
+ writel(delta, tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static irqreturn_t tc_clkevt2_irq(int irq, void *handle)
|
||||
+{
|
||||
+ unsigned int sr;
|
||||
+
|
||||
+ sr = readl(tce.base + ATMEL_TC_SR(tce.channels[0]));
|
||||
+ if (sr & ATMEL_TC_CPCS) {
|
||||
+ tce.clkevt.event_handler(&tce.clkevt);
|
||||
+ return IRQ_HANDLED;
|
||||
+ }
|
||||
+
|
||||
+ return IRQ_NONE;
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_suspend(struct clock_event_device *d)
|
||||
+{
|
||||
+ tce.cache[0].cmr = readl(tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ tce.cache[0].imr = readl(tce.base + ATMEL_TC_IMR(tce.channels[0]));
|
||||
+ tce.cache[0].rc = readl(tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ tce.cache[0].clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channels[0])) &
|
||||
+ ATMEL_TC_CLKSTA);
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_resume(struct clock_event_device *d)
|
||||
+{
|
||||
+ /* Restore registers for the channel, RA and RB are not used */
|
||||
+ writel(tce.cache[0].cmr, tc.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel(tce.cache[0].rc, tc.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RA(tce.channels[0]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RB(tce.channels[0]));
|
||||
+ /* Disable all the interrupts */
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channels[0]));
|
||||
+ /* Reenable interrupts that were enabled before suspending */
|
||||
+ writel(tce.cache[0].imr, tc.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+
|
||||
+ /* Start the clock if it was used */
|
||||
+ if (tce.cache[0].clken)
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tc.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+}
|
||||
+
|
||||
+static int __init tc_clkevt_register(struct device_node *node,
|
||||
+ struct regmap *regmap, void __iomem *base,
|
||||
+ int channel, int irq, int bits)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct clk *slow_clk;
|
||||
+
|
||||
+ tce.regmap = regmap;
|
||||
+ tce.base = base;
|
||||
+ tce.channels[0] = channel;
|
||||
+ tce.irq = irq;
|
||||
+
|
||||
+ slow_clk = of_clk_get_by_name(node->parent, "slow_clk");
|
||||
+ if (IS_ERR(slow_clk))
|
||||
+ return PTR_ERR(slow_clk);
|
||||
+
|
||||
+ ret = clk_prepare_enable(slow_clk);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ tce.clk[0] = tcb_clk_get(node, tce.channels[0]);
|
||||
+ if (IS_ERR(tce.clk[0])) {
|
||||
+ ret = PTR_ERR(tce.clk[0]);
|
||||
+ goto err_slow;
|
||||
+ }
|
||||
+
|
||||
+ snprintf(tce.name, sizeof(tce.name), "%s:%d",
|
||||
+ kbasename(node->parent->full_name), channel);
|
||||
+ tce.clkevt.cpumask = cpumask_of(0);
|
||||
+ tce.clkevt.name = tce.name;
|
||||
+ tce.clkevt.set_next_event = tc_clkevt2_next_event,
|
||||
+ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown,
|
||||
+ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic,
|
||||
+ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot,
|
||||
+ tce.clkevt.suspend = tc_clkevt2_suspend,
|
||||
+ tce.clkevt.resume = tc_clkevt2_resume,
|
||||
+ tce.clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
|
||||
+ tce.clkevt.rating = 140;
|
||||
+
|
||||
+ /* try to enable clk to avoid future errors in mode change */
|
||||
+ ret = clk_prepare_enable(tce.clk[0]);
|
||||
+ if (ret)
|
||||
+ goto err_slow;
|
||||
+ clk_disable(tce.clk[0]);
|
||||
+
|
||||
+ clockevents_config_and_register(&tce.clkevt, 32768, 1,
|
||||
+ CLOCKSOURCE_MASK(bits));
|
||||
+
|
||||
+ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED,
|
||||
+ tce.clkevt.name, &tce);
|
||||
+ if (ret)
|
||||
+ goto err_clk;
|
||||
+
|
||||
+ tce.registered = true;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_clk:
|
||||
+ clk_unprepare(tce.clk[0]);
|
||||
+err_slow:
|
||||
+ clk_disable_unprepare(slow_clk);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Clocksource and clockevent using the same channel(s)
|
||||
*/
|
||||
@@ -363,7 +560,7 @@ static int __init tcb_clksrc_init(struct device_node *node)
|
||||
int irq, err, chan1 = -1;
|
||||
unsigned bits;
|
||||
|
||||
- if (tc.registered)
|
||||
+ if (tc.registered && tce.registered)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
@@ -395,12 +592,22 @@ static int __init tcb_clksrc_init(struct device_node *node)
|
||||
return irq;
|
||||
}
|
||||
|
||||
+ if (tc.registered)
|
||||
+ return tc_clkevt_register(node, regmap, tcb_base, channel, irq,
|
||||
+ bits);
|
||||
+
|
||||
if (bits == 16) {
|
||||
of_property_read_u32_index(node, "reg", 1, &chan1);
|
||||
if (chan1 == -1) {
|
||||
- pr_err("%s: clocksource needs two channels\n",
|
||||
- node->parent->full_name);
|
||||
- return -EINVAL;
|
||||
+ if (tce.registered) {
|
||||
+ pr_err("%s: clocksource needs two channels\n",
|
||||
+ node->parent->full_name);
|
||||
+ return -EINVAL;
|
||||
+ } else {
|
||||
+ return tc_clkevt_register(node, regmap,
|
||||
+ tcb_base, channel,
|
||||
+ irq, bits);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,35 +0,0 @@
|
||||
From 23ef2fe8b6933933fb81af9decf35cfae8c14571 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:21 +0200
|
||||
Subject: [PATCH 004/328] clocksource/drivers: atmel-pit: make option silent
|
||||
|
||||
To conform with the other option, make the ATMEL_PIT option silent so it
|
||||
can be selected from the platform
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/Kconfig | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
|
||||
index 0ab22e7037f4..34b07047b91f 100644
|
||||
--- a/drivers/clocksource/Kconfig
|
||||
+++ b/drivers/clocksource/Kconfig
|
||||
@@ -404,8 +404,11 @@ config ARMV7M_SYSTICK
|
||||
This options enables support for the ARMv7M system timer unit
|
||||
|
||||
config ATMEL_PIT
|
||||
+ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST
|
||||
select TIMER_OF if OF
|
||||
- def_bool SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ help
|
||||
+ This enables build of clocksource and clockevent driver for
|
||||
+ the integrated PIT in Microchip ARM SoCs.
|
||||
|
||||
config ATMEL_ST
|
||||
bool "Atmel ST timer support" if COMPILE_TEST
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,54 +0,0 @@
|
||||
From 56d1624c2b43a84717f237d3c2d58ac52cb37b33 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:22 +0200
|
||||
Subject: [PATCH 005/328] ARM: at91: Implement clocksource selection
|
||||
|
||||
Allow selecting and unselecting the PIT clocksource driver so it doesn't
|
||||
have to be compile when unused.
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++
|
||||
1 file changed, 25 insertions(+)
|
||||
|
||||
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
|
||||
index 903f23c309df..fa493a86e2bb 100644
|
||||
--- a/arch/arm/mach-at91/Kconfig
|
||||
+++ b/arch/arm/mach-at91/Kconfig
|
||||
@@ -107,6 +107,31 @@ config SOC_AT91SAM9
|
||||
AT91SAM9X35
|
||||
AT91SAM9XE
|
||||
|
||||
+comment "Clocksource driver selection"
|
||||
+
|
||||
+config ATMEL_CLOCKSOURCE_PIT
|
||||
+ bool "Periodic Interval Timer (PIT) support"
|
||||
+ depends on SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ default SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ select ATMEL_PIT
|
||||
+ help
|
||||
+ Select this to get a clocksource based on the Atmel Periodic Interval
|
||||
+ Timer. It has a relatively low resolution and the TC Block clocksource
|
||||
+ should be preferred.
|
||||
+
|
||||
+config ATMEL_CLOCKSOURCE_TCB
|
||||
+ bool "Timer Counter Blocks (TCB) support"
|
||||
+ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST
|
||||
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ depends on !ATMEL_TCLIB
|
||||
+ select ATMEL_ARM_TCB_CLKSRC
|
||||
+ help
|
||||
+ Select this to get a high precision clocksource based on a
|
||||
+ TC block with a 5+ MHz base clock rate.
|
||||
+ On platforms with 16-bit counters, two timer channels are combined
|
||||
+ to make a single 32-bit timer.
|
||||
+ It can also be used as a clock event device supporting oneshot mode.
|
||||
+
|
||||
config HAVE_AT91_UTMI
|
||||
bool
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 9591e618026011c31f7275edd0643d390e185e38 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:23 +0200
|
||||
Subject: [PATCH 006/328] ARM: configs: at91: use new TCB timer driver
|
||||
|
||||
Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to
|
||||
timer-atmel-tcb.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/configs/at91_dt_defconfig | 1 -
|
||||
arch/arm/configs/sama5_defconfig | 1 -
|
||||
2 files changed, 2 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
|
||||
index e4b1be66b3f5..09f262e59fef 100644
|
||||
--- a/arch/arm/configs/at91_dt_defconfig
|
||||
+++ b/arch/arm/configs/at91_dt_defconfig
|
||||
@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
CONFIG_BLK_DEV_RAM_COUNT=4
|
||||
CONFIG_BLK_DEV_RAM_SIZE=8192
|
||||
-CONFIG_ATMEL_TCLIB=y
|
||||
CONFIG_ATMEL_SSC=y
|
||||
CONFIG_SCSI=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
|
||||
index 2080025556b5..f2bbc6339ca6 100644
|
||||
--- a/arch/arm/configs/sama5_defconfig
|
||||
+++ b/arch/arm/configs/sama5_defconfig
|
||||
@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
CONFIG_BLK_DEV_RAM_COUNT=4
|
||||
CONFIG_BLK_DEV_RAM_SIZE=8192
|
||||
-CONFIG_ATMEL_TCLIB=y
|
||||
CONFIG_ATMEL_SSC=y
|
||||
CONFIG_EEPROM_AT24=y
|
||||
CONFIG_SCSI=y
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,43 +0,0 @@
|
||||
From f58179ebd23db67a287e5267a5cbc2c1ae5d75d9 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:24 +0200
|
||||
Subject: [PATCH 007/328] ARM: configs: at91: unselect PIT
|
||||
|
||||
The PIT is not required anymore to successfully boot and may actually harm
|
||||
in case preempt-rt is used because the PIT interrupt is shared.
|
||||
Disable it so the TCB clocksource is used.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/configs/at91_dt_defconfig | 1 +
|
||||
arch/arm/configs/sama5_defconfig | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
|
||||
index 09f262e59fef..f4b253bd05ed 100644
|
||||
--- a/arch/arm/configs/at91_dt_defconfig
|
||||
+++ b/arch/arm/configs/at91_dt_defconfig
|
||||
@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y
|
||||
CONFIG_ARCH_AT91=y
|
||||
CONFIG_SOC_AT91RM9200=y
|
||||
CONFIG_SOC_AT91SAM9=y
|
||||
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
|
||||
CONFIG_AEABI=y
|
||||
CONFIG_UACCESS_WITH_MEMCPY=y
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
|
||||
index f2bbc6339ca6..be92871ab155 100644
|
||||
--- a/arch/arm/configs/sama5_defconfig
|
||||
+++ b/arch/arm/configs/sama5_defconfig
|
||||
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
|
||||
CONFIG_SOC_SAMA5D2=y
|
||||
CONFIG_SOC_SAMA5D3=y
|
||||
CONFIG_SOC_SAMA5D4=y
|
||||
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
|
||||
CONFIG_AEABI=y
|
||||
CONFIG_UACCESS_WITH_MEMCPY=y
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,170 +0,0 @@
|
||||
From f5fc79f507ee8c22a6f18709552cecbada48d328 Mon Sep 17 00:00:00 2001
|
||||
From: Marc Zyngier <marc.zyngier@arm.com>
|
||||
Date: Fri, 27 Jul 2018 13:38:54 +0100
|
||||
Subject: [PATCH 008/328] irqchip/gic-v3-its: Move pending table allocation to
|
||||
init time
|
||||
|
||||
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/irqchip/irq-gic-v3-its.c | 80 +++++++++++++++++++-----------
|
||||
include/linux/irqchip/arm-gic-v3.h | 1 +
|
||||
2 files changed, 53 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
|
||||
index bf7b69449b43..f93b8cd5eea2 100644
|
||||
--- a/drivers/irqchip/irq-gic-v3-its.c
|
||||
+++ b/drivers/irqchip/irq-gic-v3-its.c
|
||||
@@ -179,6 +179,7 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
|
||||
static DEFINE_IDA(its_vpeid_ida);
|
||||
|
||||
#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
|
||||
+#define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu))
|
||||
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
|
||||
#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
|
||||
|
||||
@@ -1659,7 +1660,7 @@ static void its_free_prop_table(struct page *prop_page)
|
||||
get_order(LPI_PROPBASE_SZ));
|
||||
}
|
||||
|
||||
-static int __init its_alloc_lpi_tables(void)
|
||||
+static int __init its_alloc_lpi_prop_table(void)
|
||||
{
|
||||
phys_addr_t paddr;
|
||||
|
||||
@@ -2007,30 +2008,47 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base)
|
||||
return val;
|
||||
}
|
||||
|
||||
-static void its_cpu_init_lpis(void)
|
||||
+static int __init allocate_lpi_tables(void)
|
||||
{
|
||||
- void __iomem *rbase = gic_data_rdist_rd_base();
|
||||
- struct page *pend_page;
|
||||
- u64 val, tmp;
|
||||
+ int err, cpu;
|
||||
|
||||
- /* If we didn't allocate the pending table yet, do it now */
|
||||
- pend_page = gic_data_rdist()->pend_page;
|
||||
- if (!pend_page) {
|
||||
- phys_addr_t paddr;
|
||||
+ err = its_alloc_lpi_prop_table();
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ /*
|
||||
+ * We allocate all the pending tables anyway, as we may have a
|
||||
+ * mix of RDs that have had LPIs enabled, and some that
|
||||
+ * don't. We'll free the unused ones as each CPU comes online.
|
||||
+ */
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ struct page *pend_page;
|
||||
|
||||
pend_page = its_allocate_pending_table(GFP_NOWAIT);
|
||||
if (!pend_page) {
|
||||
- pr_err("Failed to allocate PENDBASE for CPU%d\n",
|
||||
- smp_processor_id());
|
||||
- return;
|
||||
+ pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
- paddr = page_to_phys(pend_page);
|
||||
- pr_info("CPU%d: using LPI pending table @%pa\n",
|
||||
- smp_processor_id(), &paddr);
|
||||
- gic_data_rdist()->pend_page = pend_page;
|
||||
+ gic_data_rdist_cpu(cpu)->pend_page = pend_page;
|
||||
}
|
||||
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void its_cpu_init_lpis(void)
|
||||
+{
|
||||
+ void __iomem *rbase = gic_data_rdist_rd_base();
|
||||
+ struct page *pend_page;
|
||||
+ phys_addr_t paddr;
|
||||
+ u64 val, tmp;
|
||||
+
|
||||
+ if (gic_data_rdist()->lpi_enabled)
|
||||
+ return;
|
||||
+
|
||||
+ pend_page = gic_data_rdist()->pend_page;
|
||||
+ paddr = page_to_phys(pend_page);
|
||||
+
|
||||
/* set PROPBASE */
|
||||
val = (page_to_phys(gic_rdists->prop_page) |
|
||||
GICR_PROPBASER_InnerShareable |
|
||||
@@ -2106,6 +2124,10 @@ static void its_cpu_init_lpis(void)
|
||||
|
||||
/* Make sure the GIC has seen the above */
|
||||
dsb(sy);
|
||||
+ gic_data_rdist()->lpi_enabled = true;
|
||||
+ pr_info("GICv3: CPU%d: using LPI pending table @%pa\n",
|
||||
+ smp_processor_id(),
|
||||
+ &paddr);
|
||||
}
|
||||
|
||||
static void its_cpu_init_collection(struct its_node *its)
|
||||
@@ -3585,16 +3607,6 @@ static int redist_disable_lpis(void)
|
||||
u64 timeout = USEC_PER_SEC;
|
||||
u64 val;
|
||||
|
||||
- /*
|
||||
- * If coming via a CPU hotplug event, we don't need to disable
|
||||
- * LPIs before trying to re-enable them. They are already
|
||||
- * configured and all is well in the world. Detect this case
|
||||
- * by checking the allocation of the pending table for the
|
||||
- * current CPU.
|
||||
- */
|
||||
- if (gic_data_rdist()->pend_page)
|
||||
- return 0;
|
||||
-
|
||||
if (!gic_rdists_supports_plpis()) {
|
||||
pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
|
||||
return -ENXIO;
|
||||
@@ -3604,7 +3616,18 @@ static int redist_disable_lpis(void)
|
||||
if (!(val & GICR_CTLR_ENABLE_LPIS))
|
||||
return 0;
|
||||
|
||||
- pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
|
||||
+ /*
|
||||
+ * If coming via a CPU hotplug event, we don't need to disable
|
||||
+ * LPIs before trying to re-enable them. They are already
|
||||
+ * configured and all is well in the world.
|
||||
+ */
|
||||
+ if (gic_data_rdist()->lpi_enabled)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * From that point on, we only try to do some damage control.
|
||||
+ */
|
||||
+ pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
|
||||
smp_processor_id());
|
||||
add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
|
||||
|
||||
@@ -3860,7 +3883,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
|
||||
}
|
||||
|
||||
gic_rdists = rdists;
|
||||
- err = its_alloc_lpi_tables();
|
||||
+
|
||||
+ err = allocate_lpi_tables();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
|
||||
index 1d21e98d6854..fdddead7e307 100644
|
||||
--- a/include/linux/irqchip/arm-gic-v3.h
|
||||
+++ b/include/linux/irqchip/arm-gic-v3.h
|
||||
@@ -585,6 +585,7 @@ struct rdists {
|
||||
void __iomem *rd_base;
|
||||
struct page *pend_page;
|
||||
phys_addr_t phys_base;
|
||||
+ bool lpi_enabled;
|
||||
} __percpu *rdist;
|
||||
struct page *prop_page;
|
||||
u64 flags;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,202 +0,0 @@
|
||||
From 89b3ba99261e5321ba2276305f817b5c0c9817ad Mon Sep 17 00:00:00 2001
|
||||
From: Julia Cartwright <julia@ni.com>
|
||||
Date: Fri, 28 Sep 2018 21:03:51 +0000
|
||||
Subject: [PATCH 009/328] kthread: convert worker lock to raw spinlock
|
||||
|
||||
In order to enable the queuing of kthread work items from hardirq
|
||||
context even when PREEMPT_RT_FULL is enabled, convert the worker
|
||||
spin_lock to a raw_spin_lock.
|
||||
|
||||
This is only acceptable to do because the work performed under the lock
|
||||
is well-bounded and minimal.
|
||||
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Guenter Roeck <linux@roeck-us.net>
|
||||
Reported-and-tested-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
|
||||
Reported-by: Tim Sander <tim@krieglstein.org>
|
||||
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kthread.h | 2 +-
|
||||
kernel/kthread.c | 42 ++++++++++++++++++++---------------------
|
||||
2 files changed, 22 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
|
||||
index c1961761311d..ad292898f7f2 100644
|
||||
--- a/include/linux/kthread.h
|
||||
+++ b/include/linux/kthread.h
|
||||
@@ -85,7 +85,7 @@ enum {
|
||||
|
||||
struct kthread_worker {
|
||||
unsigned int flags;
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
struct list_head work_list;
|
||||
struct list_head delayed_work_list;
|
||||
struct task_struct *task;
|
||||
diff --git a/kernel/kthread.c b/kernel/kthread.c
|
||||
index 087d18d771b5..5641b55783a6 100644
|
||||
--- a/kernel/kthread.c
|
||||
+++ b/kernel/kthread.c
|
||||
@@ -599,7 +599,7 @@ void __kthread_init_worker(struct kthread_worker *worker,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
memset(worker, 0, sizeof(struct kthread_worker));
|
||||
- spin_lock_init(&worker->lock);
|
||||
+ raw_spin_lock_init(&worker->lock);
|
||||
lockdep_set_class_and_name(&worker->lock, key, name);
|
||||
INIT_LIST_HEAD(&worker->work_list);
|
||||
INIT_LIST_HEAD(&worker->delayed_work_list);
|
||||
@@ -641,21 +641,21 @@ int kthread_worker_fn(void *worker_ptr)
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
worker->task = NULL;
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
work = NULL;
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
if (!list_empty(&worker->work_list)) {
|
||||
work = list_first_entry(&worker->work_list,
|
||||
struct kthread_work, node);
|
||||
list_del_init(&work->node);
|
||||
}
|
||||
worker->current_work = work;
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (work) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
@@ -812,12 +812,12 @@ bool kthread_queue_work(struct kthread_worker *worker,
|
||||
bool ret = false;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
if (!queuing_blocked(worker, work)) {
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
ret = true;
|
||||
}
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_queue_work);
|
||||
@@ -843,7 +843,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
|
||||
if (WARN_ON_ONCE(!worker))
|
||||
return;
|
||||
|
||||
- spin_lock(&worker->lock);
|
||||
+ raw_spin_lock(&worker->lock);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -852,7 +852,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
|
||||
list_del_init(&work->node);
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
|
||||
- spin_unlock(&worker->lock);
|
||||
+ raw_spin_unlock(&worker->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
|
||||
|
||||
@@ -908,14 +908,14 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker,
|
||||
unsigned long flags;
|
||||
bool ret = false;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
|
||||
if (!queuing_blocked(worker, work)) {
|
||||
__kthread_queue_delayed_work(worker, dwork, delay);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
|
||||
@@ -951,7 +951,7 @@ void kthread_flush_work(struct kthread_work *work)
|
||||
if (!worker)
|
||||
return;
|
||||
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -963,7 +963,7 @@ void kthread_flush_work(struct kthread_work *work)
|
||||
else
|
||||
noop = true;
|
||||
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (!noop)
|
||||
wait_for_completion(&fwork.done);
|
||||
@@ -996,9 +996,9 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
|
||||
* any queuing is blocked by setting the canceling counter.
|
||||
*/
|
||||
work->canceling++;
|
||||
- spin_unlock_irqrestore(&worker->lock, *flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, *flags);
|
||||
del_timer_sync(&dwork->timer);
|
||||
- spin_lock_irqsave(&worker->lock, *flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, *flags);
|
||||
work->canceling--;
|
||||
}
|
||||
|
||||
@@ -1045,7 +1045,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
|
||||
unsigned long flags;
|
||||
int ret = false;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
|
||||
/* Do not bother with canceling when never queued. */
|
||||
if (!work->worker)
|
||||
@@ -1062,7 +1062,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
|
||||
fast_queue:
|
||||
__kthread_queue_delayed_work(worker, dwork, delay);
|
||||
out:
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
|
||||
@@ -1076,7 +1076,7 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
|
||||
if (!worker)
|
||||
goto out;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -1090,13 +1090,13 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
|
||||
* In the meantime, block any queuing by setting the canceling counter.
|
||||
*/
|
||||
work->canceling++;
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
kthread_flush_work(work);
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
work->canceling--;
|
||||
|
||||
out_fast:
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,139 +0,0 @@
|
||||
From 1e7f9f15b5cb5088ac28a0919a2fcc74bfc5f5c7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
|
||||
Date: Mon, 8 Oct 2018 14:09:37 +0300
|
||||
Subject: [PATCH 010/328] crypto: caam/qi - simplify CGR allocation, freeing
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
[Upstream commit 29e83c757006fd751966bdc53392bb22d74179c6]
|
||||
|
||||
CGRs (Congestion Groups) have to be freed by the same CPU that
|
||||
initialized them.
|
||||
This is why currently the driver takes special measures; however, using
|
||||
set_cpus_allowed_ptr() is incorrect - as reported by Sebastian.
|
||||
|
||||
Instead of the generic solution of replacing set_cpus_allowed_ptr() with
|
||||
work_on_cpu_safe(), we use the qman_delete_cgr_safe() QBMan API instead
|
||||
of qman_delete_cgr() - which internally takes care of proper CGR
|
||||
deletion.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20181005125443.dfhd2asqktm22ney@linutronix.de
|
||||
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
---
|
||||
drivers/crypto/caam/qi.c | 43 ++++------------------------------------
|
||||
drivers/crypto/caam/qi.h | 2 +-
|
||||
2 files changed, 5 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
|
||||
index 67f7f8c42c93..b84e6c8b1e13 100644
|
||||
--- a/drivers/crypto/caam/qi.c
|
||||
+++ b/drivers/crypto/caam/qi.c
|
||||
@@ -83,13 +83,6 @@ EXPORT_SYMBOL(caam_congested);
|
||||
static u64 times_congested;
|
||||
#endif
|
||||
|
||||
-/*
|
||||
- * CPU from where the module initialised. This is required because QMan driver
|
||||
- * requires CGRs to be removed from same CPU from where they were originally
|
||||
- * allocated.
|
||||
- */
|
||||
-static int mod_init_cpu;
|
||||
-
|
||||
/*
|
||||
* This is a a cache of buffers, from which the users of CAAM QI driver
|
||||
* can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than
|
||||
@@ -492,12 +485,11 @@ void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx)
|
||||
}
|
||||
EXPORT_SYMBOL(caam_drv_ctx_rel);
|
||||
|
||||
-int caam_qi_shutdown(struct device *qidev)
|
||||
+void caam_qi_shutdown(struct device *qidev)
|
||||
{
|
||||
- int i, ret;
|
||||
+ int i;
|
||||
struct caam_qi_priv *priv = dev_get_drvdata(qidev);
|
||||
const cpumask_t *cpus = qman_affine_cpus();
|
||||
- struct cpumask old_cpumask = current->cpus_allowed;
|
||||
|
||||
for_each_cpu(i, cpus) {
|
||||
struct napi_struct *irqtask;
|
||||
@@ -510,26 +502,12 @@ int caam_qi_shutdown(struct device *qidev)
|
||||
dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
|
||||
}
|
||||
|
||||
- /*
|
||||
- * QMan driver requires CGRs to be deleted from same CPU from where they
|
||||
- * were instantiated. Hence we get the module removal execute from the
|
||||
- * same CPU from where it was originally inserted.
|
||||
- */
|
||||
- set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
|
||||
-
|
||||
- ret = qman_delete_cgr(&priv->cgr);
|
||||
- if (ret)
|
||||
- dev_err(qidev, "Deletion of CGR failed: %d\n", ret);
|
||||
- else
|
||||
- qman_release_cgrid(priv->cgr.cgrid);
|
||||
+ qman_delete_cgr_safe(&priv->cgr);
|
||||
+ qman_release_cgrid(priv->cgr.cgrid);
|
||||
|
||||
kmem_cache_destroy(qi_cache);
|
||||
|
||||
- /* Now that we're done with the CGRs, restore the cpus allowed mask */
|
||||
- set_cpus_allowed_ptr(current, &old_cpumask);
|
||||
-
|
||||
platform_device_unregister(priv->qi_pdev);
|
||||
- return ret;
|
||||
}
|
||||
|
||||
static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
|
||||
@@ -718,22 +696,11 @@ int caam_qi_init(struct platform_device *caam_pdev)
|
||||
struct device *ctrldev = &caam_pdev->dev, *qidev;
|
||||
struct caam_drv_private *ctrlpriv;
|
||||
const cpumask_t *cpus = qman_affine_cpus();
|
||||
- struct cpumask old_cpumask = current->cpus_allowed;
|
||||
static struct platform_device_info qi_pdev_info = {
|
||||
.name = "caam_qi",
|
||||
.id = PLATFORM_DEVID_NONE
|
||||
};
|
||||
|
||||
- /*
|
||||
- * QMAN requires CGRs to be removed from same CPU+portal from where it
|
||||
- * was originally allocated. Hence we need to note down the
|
||||
- * initialisation CPU and use the same CPU for module exit.
|
||||
- * We select the first CPU to from the list of portal owning CPUs.
|
||||
- * Then we pin module init to this CPU.
|
||||
- */
|
||||
- mod_init_cpu = cpumask_first(cpus);
|
||||
- set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
|
||||
-
|
||||
qi_pdev_info.parent = ctrldev;
|
||||
qi_pdev_info.dma_mask = dma_get_mask(ctrldev);
|
||||
qi_pdev = platform_device_register_full(&qi_pdev_info);
|
||||
@@ -795,8 +762,6 @@ int caam_qi_init(struct platform_device *caam_pdev)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- /* Done with the CGRs; restore the cpus allowed mask */
|
||||
- set_cpus_allowed_ptr(current, &old_cpumask);
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
|
||||
×_congested, &caam_fops_u64_ro);
|
||||
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
|
||||
index 357b69f57072..b6c8acc30853 100644
|
||||
--- a/drivers/crypto/caam/qi.h
|
||||
+++ b/drivers/crypto/caam/qi.h
|
||||
@@ -174,7 +174,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc);
|
||||
void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
|
||||
|
||||
int caam_qi_init(struct platform_device *pdev);
|
||||
-int caam_qi_shutdown(struct device *dev);
|
||||
+void caam_qi_shutdown(struct device *dev);
|
||||
|
||||
/**
|
||||
* qi_cache_alloc - Allocate buffers from CAAM-QI cache
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,147 +0,0 @@
|
||||
From 2a9fed89a7bea6fbe31e717ab5f277405e20826e Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 7 Jan 2019 13:52:31 +0100
|
||||
Subject: [PATCH 011/328] sched/fair: Robustify CFS-bandwidth timer locking
|
||||
|
||||
Traditionally hrtimer callbacks were run with IRQs disabled, but with
|
||||
the introduction of HRTIMER_MODE_SOFT it is possible they run from
|
||||
SoftIRQ context, which does _NOT_ have IRQs disabled.
|
||||
|
||||
Allow for the CFS bandwidth timers (period_timer and slack_timer) to
|
||||
be ran from SoftIRQ context; this entails removing the assumption that
|
||||
IRQs are already disabled from the locking.
|
||||
|
||||
While mainline doesn't strictly need this, -RT forces all timers not
|
||||
explicitly marked with MODE_HARD into MODE_SOFT and trips over this.
|
||||
And marking these timers as MODE_HARD doesn't make sense as they're
|
||||
not required for RT operation and can potentially be quite expensive.
|
||||
|
||||
Cc: Ingo Molnar <mingo@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reported-by: Tom Putzeys <tom.putzeys@be.atlascopco.com>
|
||||
Tested-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/fair.c | 30 ++++++++++++++++--------------
|
||||
1 file changed, 16 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index 7f4f4ab5bfef..0f1ba3d72336 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -4576,7 +4576,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct rq_flags rf;
|
||||
|
||||
- rq_lock(rq, &rf);
|
||||
+ rq_lock_irqsave(rq, &rf);
|
||||
if (!cfs_rq_throttled(cfs_rq))
|
||||
goto next;
|
||||
|
||||
@@ -4595,7 +4595,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
|
||||
next:
|
||||
- rq_unlock(rq, &rf);
|
||||
+ rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
if (!remaining)
|
||||
break;
|
||||
@@ -4611,7 +4611,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||
* period the timer is deactivated until scheduling resumes; cfs_b->idle is
|
||||
* used to track this state.
|
||||
*/
|
||||
-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
|
||||
{
|
||||
u64 runtime;
|
||||
int throttled;
|
||||
@@ -4651,10 +4651,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
|
||||
runtime = cfs_b->runtime;
|
||||
cfs_b->distribute_running = 1;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
/* we can't nest cfs_b->lock while distributing bandwidth */
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
|
||||
cfs_b->distribute_running = 0;
|
||||
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
||||
@@ -4762,16 +4762,17 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||
static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||
{
|
||||
u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
|
||||
+ unsigned long flags;
|
||||
|
||||
/* confirm we're still not at a refresh boundary */
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
if (cfs_b->distribute_running) {
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4781,17 +4782,17 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||
if (runtime)
|
||||
cfs_b->distribute_running = 1;
|
||||
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
|
||||
if (!runtime)
|
||||
return;
|
||||
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
||||
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
cfs_b->runtime -= min(runtime, cfs_b->runtime);
|
||||
cfs_b->distribute_running = 0;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4871,11 +4872,12 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||
{
|
||||
struct cfs_bandwidth *cfs_b =
|
||||
container_of(timer, struct cfs_bandwidth, period_timer);
|
||||
+ unsigned long flags;
|
||||
int overrun;
|
||||
int idle = 0;
|
||||
int count = 0;
|
||||
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
for (;;) {
|
||||
overrun = hrtimer_forward_now(timer, cfs_b->period);
|
||||
if (!overrun)
|
||||
@@ -4911,11 +4913,11 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||
count = 0;
|
||||
}
|
||||
|
||||
- idle = do_sched_cfs_period_timer(cfs_b, overrun);
|
||||
+ idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
|
||||
}
|
||||
if (idle)
|
||||
cfs_b->period_active = 0;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
|
||||
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,431 +0,0 @@
|
||||
From 7c89d978bdfea369853567288ced4880deddd0b1 Mon Sep 17 00:00:00 2001
|
||||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Mon, 19 Sep 2011 14:51:14 -0700
|
||||
Subject: [PATCH 012/328] arm: Convert arm boot_lock to raw
|
||||
|
||||
The arm boot_lock is used by the secondary processor startup code. The locking
|
||||
task is the idle thread, which has idle->sched_class == &idle_sched_class.
|
||||
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
|
||||
lock, the attempt to wake it when the lock becomes available will fail:
|
||||
|
||||
try_to_wake_up()
|
||||
...
|
||||
activate_task()
|
||||
enqueue_task()
|
||||
p->sched_class->enqueue_task(rq, p, flags)
|
||||
|
||||
Fix by converting boot_lock to a raw spin lock.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Tony Lindgren <tony@atomide.com>
|
||||
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
|
||||
Tested-by: Krzysztof Kozlowski <krzk@kernel.org> [Exynos5422 Linaro PM-QA]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
|
||||
arch/arm/mach-hisi/platmcpm.c | 22 +++++++++++-----------
|
||||
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
|
||||
arch/arm/mach-prima2/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-qcom/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-spear/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-sti/platsmp.c | 10 +++++-----
|
||||
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
|
||||
8 files changed, 47 insertions(+), 47 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
|
||||
index 6a1e682371b3..17dca0ff336e 100644
|
||||
--- a/arch/arm/mach-exynos/platsmp.c
|
||||
+++ b/arch/arm/mach-exynos/platsmp.c
|
||||
@@ -239,7 +239,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void exynos_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -252,8 +252,8 @@ static void exynos_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
|
||||
@@ -317,7 +317,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -344,7 +344,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
|
||||
if (timeout == 0) {
|
||||
printk(KERN_ERR "cpu1 power enable failed");
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
@@ -390,7 +390,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
fail:
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? ret : 0;
|
||||
}
|
||||
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
|
||||
index f66815c3dd07..00524abd963f 100644
|
||||
--- a/arch/arm/mach-hisi/platmcpm.c
|
||||
+++ b/arch/arm/mach-hisi/platmcpm.c
|
||||
@@ -61,7 +61,7 @@
|
||||
|
||||
static void __iomem *sysctrl, *fabric;
|
||||
static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
static u32 fabric_phys_addr;
|
||||
/*
|
||||
* [0]: bootwrapper physical address
|
||||
@@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
|
||||
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto out;
|
||||
@@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle)
|
||||
|
||||
out:
|
||||
hip04_cpu_table[cluster][cpu]++;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu)
|
||||
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
||||
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
hip04_cpu_table[cluster][cpu]--;
|
||||
if (hip04_cpu_table[cluster][cpu] == 1) {
|
||||
/* A power_up request went ahead of us. */
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return;
|
||||
} else if (hip04_cpu_table[cluster][cpu] > 1) {
|
||||
pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
|
||||
@@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu)
|
||||
}
|
||||
|
||||
last_man = hip04_cluster_is_down(cluster);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
if (last_man) {
|
||||
/* Since it's Cortex A15, disable L2 prefetching. */
|
||||
asm volatile(
|
||||
@@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu)
|
||||
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
|
||||
|
||||
count = TIMEOUT_MSEC / POLL_MSEC;
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
for (tries = 0; tries < count; tries++) {
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto err;
|
||||
@@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
|
||||
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
|
||||
if (data & CORE_WFI_STATUS(cpu))
|
||||
break;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
/* Wait for clean L2 when the whole cluster is down. */
|
||||
msleep(POLL_MSEC);
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
@@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu)
|
||||
goto err;
|
||||
if (hip04_cluster_is_down(cluster))
|
||||
hip04_set_snoop_filter(cluster, 0);
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 1;
|
||||
err:
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
|
||||
index 1c73694c871a..ac4d2f030b87 100644
|
||||
--- a/arch/arm/mach-omap2/omap-smp.c
|
||||
+++ b/arch/arm/mach-omap2/omap-smp.c
|
||||
@@ -69,7 +69,7 @@ static const struct omap_smp_config omap5_cfg __initconst = {
|
||||
.startup_addr = omap5_secondary_startup,
|
||||
};
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void __iomem *omap4_get_scu_base(void)
|
||||
{
|
||||
@@ -177,8 +177,8 @@ static void omap4_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -191,7 +191,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Update the AuxCoreBoot0 with boot state for secondary core.
|
||||
@@ -270,7 +270,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* Now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c
|
||||
index 75ef5d4be554..c17c86e5d860 100644
|
||||
--- a/arch/arm/mach-prima2/platsmp.c
|
||||
+++ b/arch/arm/mach-prima2/platsmp.c
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
static void __iomem *clk_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sirfsoc_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static const struct of_device_id clk_ids[] = {
|
||||
@@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
/* make sure write buffer is drained */
|
||||
mb();
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c
|
||||
index 5494c9e0c909..e8ce157d3548 100644
|
||||
--- a/arch/arm/mach-qcom/platsmp.c
|
||||
+++ b/arch/arm/mach-qcom/platsmp.c
|
||||
@@ -46,7 +46,7 @@
|
||||
|
||||
extern void secondary_startup_arm(void);
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void qcom_cpu_die(unsigned int cpu)
|
||||
@@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int scss_release_secondary(unsigned int cpu)
|
||||
@@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Send the secondary CPU a soft interrupt, thereby causing
|
||||
@@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int))
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
|
||||
index 39038a03836a..6da5c93872bf 100644
|
||||
--- a/arch/arm/mach-spear/platsmp.c
|
||||
+++ b/arch/arm/mach-spear/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
|
||||
|
||||
@@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
|
||||
index 231f19e17436..a3419b7003e6 100644
|
||||
--- a/arch/arm/mach-sti/platsmp.c
|
||||
+++ b/arch/arm/mach-sti/platsmp.c
|
||||
@@ -35,7 +35,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sti_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
|
||||
index c2366510187a..6b60f582b738 100644
|
||||
--- a/arch/arm/plat-versatile/platsmp.c
|
||||
+++ b/arch/arm/plat-versatile/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void versatile_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu)
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* This is really belt and braces; we hold unintended secondary
|
||||
@@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,100 +0,0 @@
|
||||
From 9ecaf2a8f433399cc3fabcfb9fbce9a88fe6f200 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 17 Jul 2018 18:25:31 +0200
|
||||
Subject: [PATCH 013/328] x86/ioapic: Don't let setaffinity unmask threaded EOI
|
||||
interrupt too early
|
||||
|
||||
There is an issue with threaded interrupts which are marked ONESHOT
|
||||
and using the fasteoi handler.
|
||||
|
||||
if (IS_ONESHOT())
|
||||
mask_irq();
|
||||
|
||||
....
|
||||
....
|
||||
|
||||
cond_unmask_eoi_irq()
|
||||
chip->irq_eoi();
|
||||
|
||||
So if setaffinity is pending then the interrupt will be moved and then
|
||||
unmasked, which is wrong as it should be kept masked up to the point where
|
||||
the threaded handler finished. It's not a real problem, the interrupt will
|
||||
just be able to fire before the threaded handler has finished, though the irq
|
||||
masked state will be wrong for a bit.
|
||||
|
||||
The patch below should cure the issue. It also renames the horribly
|
||||
misnomed functions so it becomes clear what they are supposed to do.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: add the body of the patch, use the same functions in both
|
||||
ifdef paths (spotted by Andy Shevchenko)]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/kernel/apic/io_apic.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
|
||||
index fa3b85b222e3..1bdad61a3ef7 100644
|
||||
--- a/arch/x86/kernel/apic/io_apic.c
|
||||
+++ b/arch/x86/kernel/apic/io_apic.c
|
||||
@@ -1722,7 +1722,7 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
/* If we are moving the IRQ we need to mask it */
|
||||
if (unlikely(irqd_is_setaffinity_pending(data))) {
|
||||
@@ -1733,9 +1733,9 @@ static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
- if (unlikely(masked)) {
|
||||
+ if (unlikely(moveit)) {
|
||||
/* Only migrate the irq if the ack has been received.
|
||||
*
|
||||
* On rare occasions the broadcast level triggered ack gets
|
||||
@@ -1770,11 +1770,11 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
}
|
||||
}
|
||||
#else
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -1783,11 +1783,11 @@ static void ioapic_ack_level(struct irq_data *irq_data)
|
||||
{
|
||||
struct irq_cfg *cfg = irqd_cfg(irq_data);
|
||||
unsigned long v;
|
||||
- bool masked;
|
||||
+ bool moveit;
|
||||
int i;
|
||||
|
||||
irq_complete_move(cfg);
|
||||
- masked = ioapic_irqd_mask(irq_data);
|
||||
+ moveit = ioapic_prepare_move(irq_data);
|
||||
|
||||
/*
|
||||
* It appears there is an erratum which affects at least version 0x11
|
||||
@@ -1842,7 +1842,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)
|
||||
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
|
||||
}
|
||||
|
||||
- ioapic_irqd_unmask(irq_data, masked);
|
||||
+ ioapic_finish_move(irq_data, moveit);
|
||||
}
|
||||
|
||||
static void ioapic_ir_ack_level(struct irq_data *irq_data)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,49 +0,0 @@
|
||||
From 759e6d7c318bbcff7507641d5a9fb6b5074b2a87 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 3 Jul 2018 18:19:48 +0200
|
||||
Subject: [PATCH 014/328] cgroup: use irqsave in cgroup_rstat_flush_locked()
|
||||
|
||||
All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock
|
||||
either with spin_lock_irq() or spin_lock_irqsave().
|
||||
cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which
|
||||
is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated()
|
||||
in IRQ context and therefore requires _irqsave() locking suffix in
|
||||
cgroup_rstat_flush_locked().
|
||||
Since there is no difference between spin_lock_t and raw_spin_lock_t
|
||||
on !RT lockdep does not complain here. On RT lockdep complains because
|
||||
the interrupts were not disabled here and a deadlock is possible.
|
||||
|
||||
Acquire the raw_spin_lock_t with disabled interrupts.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cgroup/rstat.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
|
||||
index bb95a35e8c2d..3266a9781b4e 100644
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -159,8 +159,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
|
||||
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
|
||||
cpu);
|
||||
struct cgroup *pos = NULL;
|
||||
+ unsigned long flags;
|
||||
|
||||
- raw_spin_lock(cpu_lock);
|
||||
+ raw_spin_lock_irqsave(cpu_lock, flags);
|
||||
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
@@ -172,7 +173,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
|
||||
css->ss->css_rstat_flush(css, cpu);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
- raw_spin_unlock(cpu_lock);
|
||||
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
|
||||
|
||||
/* if @may_sleep, play nice and yield if necessary */
|
||||
if (may_sleep && (need_resched() ||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,63 +0,0 @@
|
||||
From 934128f28dd37073d6513a37f0433df6399c7953 Mon Sep 17 00:00:00 2001
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Tue, 3 Jul 2018 13:34:30 -0500
|
||||
Subject: [PATCH 015/328] fscache: initialize cookie hash table raw spinlocks
|
||||
|
||||
The fscache cookie mechanism uses a hash table of hlist_bl_head structures. The
|
||||
PREEMPT_RT patcheset adds a raw spinlock to this structure and so on PREEMPT_RT
|
||||
the structures get used uninitialized, causing warnings about bad magic numbers
|
||||
when spinlock debugging is turned on.
|
||||
|
||||
Use the init function for fscache cookies.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/fscache/cookie.c | 8 ++++++++
|
||||
fs/fscache/main.c | 1 +
|
||||
include/linux/fscache.h | 1 +
|
||||
3 files changed, 10 insertions(+)
|
||||
|
||||
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
|
||||
index c550512ce335..d5d57da32ffa 100644
|
||||
--- a/fs/fscache/cookie.c
|
||||
+++ b/fs/fscache/cookie.c
|
||||
@@ -962,3 +962,11 @@ int __fscache_check_consistency(struct fscache_cookie *cookie,
|
||||
return -ESTALE;
|
||||
}
|
||||
EXPORT_SYMBOL(__fscache_check_consistency);
|
||||
+
|
||||
+void __init fscache_cookie_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < (1 << fscache_cookie_hash_shift) - 1; i++)
|
||||
+ INIT_HLIST_BL_HEAD(&fscache_cookie_hash[i]);
|
||||
+}
|
||||
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
|
||||
index 30ad89db1efc..1d5f1d679ffa 100644
|
||||
--- a/fs/fscache/main.c
|
||||
+++ b/fs/fscache/main.c
|
||||
@@ -149,6 +149,7 @@ static int __init fscache_init(void)
|
||||
ret = -ENOMEM;
|
||||
goto error_cookie_jar;
|
||||
}
|
||||
+ fscache_cookie_init();
|
||||
|
||||
fscache_root = kobject_create_and_add("fscache", kernel_kobj);
|
||||
if (!fscache_root)
|
||||
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
|
||||
index 84b90a79d75a..87a9330eafa2 100644
|
||||
--- a/include/linux/fscache.h
|
||||
+++ b/include/linux/fscache.h
|
||||
@@ -230,6 +230,7 @@ extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
|
||||
extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
|
||||
extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
|
||||
bool (*)(void *), void *);
|
||||
+extern void fscache_cookie_init(void);
|
||||
|
||||
/**
|
||||
* fscache_register_netfs - Register a filesystem as desiring caching services
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,39 +0,0 @@
|
||||
From 2a2f1a8c287a6b6fb14a4a1b5583e043d5897df4 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 29 Aug 2018 21:59:04 +0200
|
||||
Subject: [PATCH 016/328] Drivers: hv: vmbus: include header for get_irq_regs()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
On !RT the header file get_irq_regs() gets pulled in via other header files. On
|
||||
RT it does not and the build fails:
|
||||
|
||||
drivers/hv/vmbus_drv.c:975 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration]
|
||||
drivers/hv/hv.c:115 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration]
|
||||
|
||||
Add the header file for get_irq_regs() in a common header so it used by
|
||||
vmbus_drv.c by hv.c for their get_irq_regs() usage.
|
||||
|
||||
Reported-by: Bernhard Landauer <oberon@manjaro.org>
|
||||
Reported-by: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/hv/hyperv_vmbus.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
|
||||
index 87d3d7da78f8..1d2d8a4b837d 100644
|
||||
--- a/drivers/hv/hyperv_vmbus.h
|
||||
+++ b/drivers/hv/hyperv_vmbus.h
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/hyperv.h>
|
||||
#include <linux/interrupt.h>
|
||||
+#include <linux/irq.h>
|
||||
|
||||
#include "hv_trace.h"
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,32 +0,0 @@
|
||||
From d487edd01d698abf2b4f3ea4e3f27897b227250c Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 11 Oct 2018 16:39:59 +0200
|
||||
Subject: [PATCH 017/328] percpu: include irqflags.h for raw_local_irq_save()
|
||||
|
||||
The header percpu.h header file is using raw_local_irq_save() but does
|
||||
not include irqflags.h for its definition. It compiles because the
|
||||
header file is included via an other header file.
|
||||
On -RT the build fails because raw_local_irq_save() is not defined.
|
||||
|
||||
Include irqflags.h in percpu.h.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/asm-generic/percpu.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
|
||||
index 1817a8415a5e..942d64c0476e 100644
|
||||
--- a/include/asm-generic/percpu.h
|
||||
+++ b/include/asm-generic/percpu.h
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
+#include <linux/irqflags.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,31 +0,0 @@
|
||||
From 5c77a75aaa23c5fc32b5485897d0d14e66fafd37 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 15:06:10 +0200
|
||||
Subject: [PATCH 018/328] efi: Allow efi=runtime
|
||||
|
||||
In case the option "efi=noruntime" is default at built-time, the user
|
||||
could overwrite its sate by `efi=runtime' and allow it again.
|
||||
|
||||
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/firmware/efi/efi.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
|
||||
index d54fca902e64..5db20908aa9c 100644
|
||||
--- a/drivers/firmware/efi/efi.c
|
||||
+++ b/drivers/firmware/efi/efi.c
|
||||
@@ -113,6 +113,9 @@ static int __init parse_efi_cmdline(char *str)
|
||||
if (parse_option_str(str, "noruntime"))
|
||||
disable_runtime = true;
|
||||
|
||||
+ if (parse_option_str(str, "runtime"))
|
||||
+ disable_runtime = false;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
early_param("efi", parse_efi_cmdline);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,54 +0,0 @@
|
||||
From af50891c552632469b09b7b97abd197545aec804 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 24 Jul 2018 14:48:55 +0200
|
||||
Subject: [PATCH 019/328] x86/efi: drop task_lock() from efi_switch_mm()
|
||||
|
||||
efi_switch_mm() is a wrapper around switch_mm() which saves current's
|
||||
->active_mm, sets the requests mm as ->active_mm and invokes
|
||||
switch_mm().
|
||||
I don't think that task_lock() is required during that procedure. It
|
||||
protects ->mm which isn't changed here.
|
||||
|
||||
It needs to be mentioned that during the whole procedure (switch to
|
||||
EFI's mm and back) the preemption needs to be disabled. A context switch
|
||||
at this point would reset the cr3 value based on current->mm. Also, this
|
||||
function may not be invoked at the same time on a different CPU because
|
||||
it would overwrite the efi_scratch.prev_mm information.
|
||||
|
||||
Remove task_lock() and also update the comment to reflect it.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/platform/efi/efi_64.c | 10 ++++------
|
||||
1 file changed, 4 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
|
||||
index 6db8f3598c80..c9ccaef8df57 100644
|
||||
--- a/arch/x86/platform/efi/efi_64.c
|
||||
+++ b/arch/x86/platform/efi/efi_64.c
|
||||
@@ -620,18 +620,16 @@ void __init efi_dump_pagetable(void)
|
||||
|
||||
/*
|
||||
* Makes the calling thread switch to/from efi_mm context. Can be used
|
||||
- * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well
|
||||
- * as during efi runtime calls i.e current->active_mm == current_mm.
|
||||
- * We are not mm_dropping()/mm_grabbing() any mm, because we are not
|
||||
- * losing/creating any references.
|
||||
+ * in a kernel thread and user context. Preemption needs to remain disabled
|
||||
+ * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm
|
||||
+ * can not change under us.
|
||||
+ * It should be ensured that there are no concurent calls to this function.
|
||||
*/
|
||||
void efi_switch_mm(struct mm_struct *mm)
|
||||
{
|
||||
- task_lock(current);
|
||||
efi_scratch.prev_mm = current->active_mm;
|
||||
current->active_mm = mm;
|
||||
switch_mm(efi_scratch.prev_mm, mm, NULL);
|
||||
- task_unlock(current);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,82 +0,0 @@
|
||||
From c96c598b9bc12e2909dcec0a1bf8f4a1b846107e Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 09:13:42 +0200
|
||||
Subject: [PATCH 020/328] arm64: KVM: compute_layout before altenates are
|
||||
applied
|
||||
|
||||
compute_layout() is invoked as part of an alternative fixup under
|
||||
stop_machine() and needs a sleeping lock as part of get_random_long().
|
||||
|
||||
Invoke compute_layout() before the alternatives are applied.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm64/include/asm/alternative.h | 6 ++++++
|
||||
arch/arm64/kernel/alternative.c | 1 +
|
||||
arch/arm64/kvm/va_layout.c | 7 +------
|
||||
3 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
|
||||
index 887a8512bf10..376561351bae 100644
|
||||
--- a/arch/arm64/include/asm/alternative.h
|
||||
+++ b/arch/arm64/include/asm/alternative.h
|
||||
@@ -35,6 +35,12 @@ void apply_alternatives_module(void *start, size_t length);
|
||||
static inline void apply_alternatives_module(void *start, size_t length) { }
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_KVM_ARM_HOST
|
||||
+void kvm_compute_layout(void);
|
||||
+#else
|
||||
+static inline void kvm_compute_layout(void) { }
|
||||
+#endif
|
||||
+
|
||||
#define ALTINSTR_ENTRY(feature) \
|
||||
" .word 661b - .\n" /* label */ \
|
||||
" .word 663f - .\n" /* new instruction */ \
|
||||
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
|
||||
index b5d603992d40..f92815d56d17 100644
|
||||
--- a/arch/arm64/kernel/alternative.c
|
||||
+++ b/arch/arm64/kernel/alternative.c
|
||||
@@ -224,6 +224,7 @@ static int __apply_alternatives_multi_stop(void *unused)
|
||||
void __init apply_alternatives_all(void)
|
||||
{
|
||||
/* better not try code patching on a live SMP system */
|
||||
+ kvm_compute_layout();
|
||||
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
|
||||
}
|
||||
|
||||
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
|
||||
index c712a7376bc1..792da0e125de 100644
|
||||
--- a/arch/arm64/kvm/va_layout.c
|
||||
+++ b/arch/arm64/kvm/va_layout.c
|
||||
@@ -33,7 +33,7 @@ static u8 tag_lsb;
|
||||
static u64 tag_val;
|
||||
static u64 va_mask;
|
||||
|
||||
-static void compute_layout(void)
|
||||
+__init void kvm_compute_layout(void)
|
||||
{
|
||||
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
|
||||
u64 hyp_va_msb;
|
||||
@@ -121,8 +121,6 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
|
||||
|
||||
BUG_ON(nr_inst != 5);
|
||||
|
||||
- if (!has_vhe() && !va_mask)
|
||||
- compute_layout();
|
||||
|
||||
for (i = 0; i < nr_inst; i++) {
|
||||
u32 rd, rn, insn, oinsn;
|
||||
@@ -167,9 +165,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
|
||||
return;
|
||||
}
|
||||
|
||||
- if (!va_mask)
|
||||
- compute_layout();
|
||||
-
|
||||
/*
|
||||
* Compute HYP VA by using the same computation as kern_hyp_va()
|
||||
*/
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,102 +0,0 @@
|
||||
From 8779fdd5686d1f9be670c7ee5ea6dfaece9e37d8 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 31 Aug 2018 14:16:30 +0200
|
||||
Subject: [PATCH 021/328] of: allocate / free phandle cache outside of the
|
||||
devtree_lock
|
||||
|
||||
The phandle cache code allocates memory while holding devtree_lock which
|
||||
is a raw_spinlock_t. Memory allocation (and free()) is not possible on
|
||||
RT while a raw_spinlock_t is held.
|
||||
Invoke the kfree() and kcalloc() while the lock is dropped.
|
||||
|
||||
Cc: Rob Herring <robh+dt@kernel.org>
|
||||
Cc: Frank Rowand <frowand.list@gmail.com>
|
||||
Cc: devicetree@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/of/base.c | 19 +++++++++++++------
|
||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/drivers/of/base.c b/drivers/of/base.c
|
||||
index f0dbb7ad88cf..c59b30bab0e0 100644
|
||||
--- a/drivers/of/base.c
|
||||
+++ b/drivers/of/base.c
|
||||
@@ -130,31 +130,34 @@ static u32 phandle_cache_mask;
|
||||
/*
|
||||
* Caller must hold devtree_lock.
|
||||
*/
|
||||
-static void __of_free_phandle_cache(void)
|
||||
+static struct device_node** __of_free_phandle_cache(void)
|
||||
{
|
||||
u32 cache_entries = phandle_cache_mask + 1;
|
||||
u32 k;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
if (!phandle_cache)
|
||||
- return;
|
||||
+ return NULL;
|
||||
|
||||
for (k = 0; k < cache_entries; k++)
|
||||
of_node_put(phandle_cache[k]);
|
||||
|
||||
- kfree(phandle_cache);
|
||||
+ shadow = phandle_cache;
|
||||
phandle_cache = NULL;
|
||||
+ return shadow;
|
||||
}
|
||||
|
||||
int of_free_phandle_cache(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
|
||||
- __of_free_phandle_cache();
|
||||
+ shadow = __of_free_phandle_cache();
|
||||
|
||||
raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
-
|
||||
+ kfree(shadow);
|
||||
return 0;
|
||||
}
|
||||
#if !defined(CONFIG_MODULES)
|
||||
@@ -189,10 +192,11 @@ void of_populate_phandle_cache(void)
|
||||
u32 cache_entries;
|
||||
struct device_node *np;
|
||||
u32 phandles = 0;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
|
||||
- __of_free_phandle_cache();
|
||||
+ shadow = __of_free_phandle_cache();
|
||||
|
||||
for_each_of_allnodes(np)
|
||||
if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
|
||||
@@ -200,12 +204,14 @@ void of_populate_phandle_cache(void)
|
||||
|
||||
if (!phandles)
|
||||
goto out;
|
||||
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
|
||||
cache_entries = roundup_pow_of_two(phandles);
|
||||
phandle_cache_mask = cache_entries - 1;
|
||||
|
||||
phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
|
||||
GFP_ATOMIC);
|
||||
+ raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
if (!phandle_cache)
|
||||
goto out;
|
||||
|
||||
@@ -217,6 +223,7 @@ void of_populate_phandle_cache(void)
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
+ kfree(shadow);
|
||||
}
|
||||
|
||||
void __init of_core_init(void)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,97 +0,0 @@
|
||||
From 7841950d4460ea93ee4ddd6a400ad67cfacee592 Mon Sep 17 00:00:00 2001
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Tue, 18 Sep 2018 10:29:31 -0500
|
||||
Subject: [PATCH 022/328] mm/kasan: make quarantine_lock a raw_spinlock_t
|
||||
|
||||
The static lock quarantine_lock is used in quarantine.c to protect the
|
||||
quarantine queue datastructures. It is taken inside quarantine queue
|
||||
manipulation routines (quarantine_put(), quarantine_reduce() and
|
||||
quarantine_remove_cache()), with IRQs disabled.
|
||||
This is not a problem on a stock kernel but is problematic on an RT
|
||||
kernel where spin locks are sleeping spinlocks, which can sleep and can
|
||||
not be acquired with disabled interrupts.
|
||||
|
||||
Convert the quarantine_lock to a raw spinlock_t. The usage of
|
||||
quarantine_lock is confined to quarantine.c and the work performed while
|
||||
the lock is held is limited.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/kasan/quarantine.c | 18 +++++++++---------
|
||||
1 file changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
|
||||
index 3a8ddf8baf7d..b209dbaefde8 100644
|
||||
--- a/mm/kasan/quarantine.c
|
||||
+++ b/mm/kasan/quarantine.c
|
||||
@@ -103,7 +103,7 @@ static int quarantine_head;
|
||||
static int quarantine_tail;
|
||||
/* Total size of all objects in global_quarantine across all batches. */
|
||||
static unsigned long quarantine_size;
|
||||
-static DEFINE_SPINLOCK(quarantine_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(quarantine_lock);
|
||||
DEFINE_STATIC_SRCU(remove_cache_srcu);
|
||||
|
||||
/* Maximum size of the global queue. */
|
||||
@@ -190,7 +190,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
|
||||
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
|
||||
qlist_move_all(q, &temp);
|
||||
|
||||
- spin_lock(&quarantine_lock);
|
||||
+ raw_spin_lock(&quarantine_lock);
|
||||
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
|
||||
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
|
||||
if (global_quarantine[quarantine_tail].bytes >=
|
||||
@@ -203,7 +203,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
|
||||
if (new_tail != quarantine_head)
|
||||
quarantine_tail = new_tail;
|
||||
}
|
||||
- spin_unlock(&quarantine_lock);
|
||||
+ raw_spin_unlock(&quarantine_lock);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
@@ -230,7 +230,7 @@ void quarantine_reduce(void)
|
||||
* expected case).
|
||||
*/
|
||||
srcu_idx = srcu_read_lock(&remove_cache_srcu);
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
|
||||
/*
|
||||
* Update quarantine size in case of hotplug. Allocate a fraction of
|
||||
@@ -254,7 +254,7 @@ void quarantine_reduce(void)
|
||||
quarantine_head = 0;
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, NULL);
|
||||
srcu_read_unlock(&remove_cache_srcu, srcu_idx);
|
||||
@@ -310,17 +310,17 @@ void quarantine_remove_cache(struct kmem_cache *cache)
|
||||
*/
|
||||
on_each_cpu(per_cpu_remove_cache, cache, 1);
|
||||
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
for (i = 0; i < QUARANTINE_BATCHES; i++) {
|
||||
if (qlist_empty(&global_quarantine[i]))
|
||||
continue;
|
||||
qlist_move_cache(&global_quarantine[i], &to_free, cache);
|
||||
/* Scanning whole quarantine can take a while. */
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
cond_resched();
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
}
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, cache);
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,50 +0,0 @@
|
||||
From c7753a6fd996fcaa0285c1c8285fde721d519a0a Mon Sep 17 00:00:00 2001
|
||||
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
|
||||
Date: Mon, 29 Oct 2018 11:53:01 +0100
|
||||
Subject: [PATCH 023/328] EXP rcu: Revert expedited GP parallelization
|
||||
cleverness
|
||||
|
||||
(Commit 258ba8e089db23f760139266c232f01bad73f85c from linux-rcu)
|
||||
|
||||
This commit reverts a series of commits starting with fcc635436501 ("rcu:
|
||||
Make expedited GPs handle CPU 0 being offline") and its successors, thus
|
||||
queueing each rcu_node structure's expedited grace-period initialization
|
||||
work on the first CPU of that rcu_node structure.
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/rcu/tree_exp.h | 9 +--------
|
||||
1 file changed, 1 insertion(+), 8 deletions(-)
|
||||
|
||||
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
|
||||
index 0b2c2ad69629..a0486414edb4 100644
|
||||
--- a/kernel/rcu/tree_exp.h
|
||||
+++ b/kernel/rcu/tree_exp.h
|
||||
@@ -472,7 +472,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
|
||||
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
smp_call_func_t func)
|
||||
{
|
||||
- int cpu;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
|
||||
@@ -494,13 +493,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
continue;
|
||||
}
|
||||
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
|
||||
- preempt_disable();
|
||||
- cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
|
||||
- /* If all offline, queue the work on an unbound CPU. */
|
||||
- if (unlikely(cpu > rnp->grphi))
|
||||
- cpu = WORK_CPU_UNBOUND;
|
||||
- queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
|
||||
- preempt_enable();
|
||||
+ queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
|
||||
rnp->exp_need_flush = true;
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,168 +0,0 @@
|
||||
From 4b0c7eda4403c5a7146714857bd1abffd2b080f8 Mon Sep 17 00:00:00 2001
|
||||
From: He Zhe <zhe.he@windriver.com>
|
||||
Date: Wed, 19 Dec 2018 16:30:57 +0100
|
||||
Subject: [PATCH 024/328] kmemleak: Turn kmemleak_lock to raw spinlock on RT
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
kmemleak_lock, as a rwlock on RT, can possibly be held in atomic context and
|
||||
causes the follow BUG.
|
||||
|
||||
BUG: scheduling while atomic: migration/15/132/0x00000002
|
||||
Preemption disabled at:
|
||||
[<ffffffff8c927c11>] cpu_stopper_thread+0x71/0x100
|
||||
CPU: 15 PID: 132 Comm: migration/15 Not tainted 4.19.0-rt1-preempt-rt #1
|
||||
Call Trace:
|
||||
schedule+0x3d/0xe0
|
||||
__rt_spin_lock+0x26/0x30
|
||||
__write_rt_lock+0x23/0x1a0
|
||||
rt_write_lock+0x2a/0x30
|
||||
find_and_remove_object+0x1e/0x80
|
||||
delete_object_full+0x10/0x20
|
||||
kmemleak_free+0x32/0x50
|
||||
kfree+0x104/0x1f0
|
||||
intel_pmu_cpu_dying+0x67/0x70
|
||||
x86_pmu_dying_cpu+0x1a/0x30
|
||||
cpuhp_invoke_callback+0x92/0x700
|
||||
take_cpu_down+0x70/0xa0
|
||||
multi_cpu_stop+0x62/0xc0
|
||||
cpu_stopper_thread+0x79/0x100
|
||||
smpboot_thread_fn+0x20f/0x2d0
|
||||
kthread+0x121/0x140
|
||||
|
||||
And on v4.18 stable tree the following call trace, caused by grabbing
|
||||
kmemleak_lock again, is also observed.
|
||||
|
||||
kernel BUG at kernel/locking/rtmutex.c:1048!
|
||||
CPU: 5 PID: 689 Comm: mkfs.ext4 Not tainted 4.18.16-rt9-preempt-rt #1
|
||||
Call Trace:
|
||||
rt_write_lock+0x2a/0x30
|
||||
create_object+0x17d/0x2b0
|
||||
kmemleak_alloc+0x34/0x50
|
||||
kmem_cache_alloc+0x146/0x220
|
||||
mempool_alloc_slab+0x15/0x20
|
||||
mempool_alloc+0x65/0x170
|
||||
sg_pool_alloc+0x21/0x60
|
||||
sg_alloc_table_chained+0x8b/0xb0
|
||||
…
|
||||
blk_flush_plug_list+0x204/0x230
|
||||
schedule+0x87/0xe0
|
||||
rt_write_lock+0x2a/0x30
|
||||
create_object+0x17d/0x2b0
|
||||
kmemleak_alloc+0x34/0x50
|
||||
__kmalloc_node+0x1cd/0x340
|
||||
alloc_request_size+0x30/0x70
|
||||
mempool_alloc+0x65/0x170
|
||||
get_request+0x4e3/0x8d0
|
||||
blk_queue_bio+0x153/0x470
|
||||
generic_make_request+0x1dc/0x3f0
|
||||
submit_bio+0x49/0x140
|
||||
…
|
||||
|
||||
kmemleak is an error detecting feature. We would not expect as good performance
|
||||
as without it. As there is no raw rwlock defining helpers, we turn kmemleak_lock
|
||||
to a raw spinlock.
|
||||
|
||||
Signed-off-by: He Zhe <zhe.he@windriver.com>
|
||||
Cc: catalin.marinas@arm.com
|
||||
Cc: bigeasy@linutronix.de
|
||||
Cc: tglx@linutronix.de
|
||||
Cc: rostedt@goodmis.org
|
||||
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
|
||||
Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com
|
||||
Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/kmemleak.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
|
||||
index 5eeabece0c17..92ce99b15f2b 100644
|
||||
--- a/mm/kmemleak.c
|
||||
+++ b/mm/kmemleak.c
|
||||
@@ -26,7 +26,7 @@
|
||||
*
|
||||
* The following locks and mutexes are used by kmemleak:
|
||||
*
|
||||
- * - kmemleak_lock (rwlock): protects the object_list modifications and
|
||||
+ * - kmemleak_lock (raw spinlock): protects the object_list modifications and
|
||||
* accesses to the object_tree_root. The object_list is the main list
|
||||
* holding the metadata (struct kmemleak_object) for the allocated memory
|
||||
* blocks. The object_tree_root is a red black tree used to look-up
|
||||
@@ -197,7 +197,7 @@ static LIST_HEAD(gray_list);
|
||||
/* search tree for object boundaries */
|
||||
static struct rb_root object_tree_root = RB_ROOT;
|
||||
/* rw_lock protecting the access to object_list and object_tree_root */
|
||||
-static DEFINE_RWLOCK(kmemleak_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(kmemleak_lock);
|
||||
|
||||
/* allocation caches for kmemleak internal data */
|
||||
static struct kmem_cache *object_cache;
|
||||
@@ -491,9 +491,9 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
|
||||
struct kmemleak_object *object;
|
||||
|
||||
rcu_read_lock();
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
/* check whether the object is still available */
|
||||
if (object && !get_object(object))
|
||||
@@ -513,13 +513,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
|
||||
unsigned long flags;
|
||||
struct kmemleak_object *object;
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
if (object) {
|
||||
rb_erase(&object->rb_node, &object_tree_root);
|
||||
list_del_rcu(&object->object_list);
|
||||
}
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
return object;
|
||||
}
|
||||
@@ -593,7 +593,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
|
||||
/* kernel backtrace */
|
||||
object->trace_len = __save_stack_trace(object->trace);
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
|
||||
min_addr = min(min_addr, ptr);
|
||||
max_addr = max(max_addr, ptr + size);
|
||||
@@ -624,7 +624,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
|
||||
|
||||
list_add_tail_rcu(&object->object_list, &object_list);
|
||||
out:
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
return object;
|
||||
}
|
||||
|
||||
@@ -1310,7 +1310,7 @@ static void scan_block(void *_start, void *_end,
|
||||
unsigned long *end = _end - (BYTES_PER_POINTER - 1);
|
||||
unsigned long flags;
|
||||
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
for (ptr = start; ptr < end; ptr++) {
|
||||
struct kmemleak_object *object;
|
||||
unsigned long pointer;
|
||||
@@ -1367,7 +1367,7 @@ static void scan_block(void *_start, void *_end,
|
||||
spin_unlock(&object->lock);
|
||||
}
|
||||
}
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,135 +0,0 @@
|
||||
From 7cb617c6dac1356dfe57b1c4a976ec78ead046a0 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 28 Oct 2016 23:05:11 +0200
|
||||
Subject: [PATCH 025/328] NFSv4: replace seqcount_t with a seqlock_t
|
||||
|
||||
The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
|
||||
because it maps to preempt_disable() in -RT which I can't have at this
|
||||
point. So I took a look at the code.
|
||||
It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use
|
||||
raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because
|
||||
lockdep complained. The whole seqcount thing was introduced in commit
|
||||
c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as
|
||||
being recovered").
|
||||
The recovery threads runs only once.
|
||||
write_seqlock() does not work on !RT because it disables preemption and it the
|
||||
writer side is preemptible (has to remain so despite the fact that it will
|
||||
block readers).
|
||||
|
||||
Reported-by: kernel test robot <xiaolong.ye@intel.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/nfs/delegation.c | 4 ++--
|
||||
fs/nfs/nfs4_fs.h | 2 +-
|
||||
fs/nfs/nfs4proc.c | 4 ++--
|
||||
fs/nfs/nfs4state.c | 22 ++++++++++++++++------
|
||||
4 files changed, 21 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
|
||||
index b0c0c2fc2fba..26565ba05dc1 100644
|
||||
--- a/fs/nfs/delegation.c
|
||||
+++ b/fs/nfs/delegation.c
|
||||
@@ -162,11 +162,11 @@ static int nfs_delegation_claim_opens(struct inode *inode,
|
||||
sp = state->owner;
|
||||
/* Block nfs4_proc_unlck */
|
||||
mutex_lock(&sp->so_delegreturn_mutex);
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = read_seqbegin(&sp->so_reclaim_seqlock);
|
||||
err = nfs4_open_delegation_recall(ctx, state, stateid);
|
||||
if (!err)
|
||||
err = nfs_delegation_claim_locks(state, stateid);
|
||||
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
err = -EAGAIN;
|
||||
mutex_unlock(&sp->so_delegreturn_mutex);
|
||||
put_nfs_open_context(ctx);
|
||||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
|
||||
index 5b61520dce88..2771aafaca19 100644
|
||||
--- a/fs/nfs/nfs4_fs.h
|
||||
+++ b/fs/nfs/nfs4_fs.h
|
||||
@@ -114,7 +114,7 @@ struct nfs4_state_owner {
|
||||
unsigned long so_flags;
|
||||
struct list_head so_states;
|
||||
struct nfs_seqid_counter so_seqid;
|
||||
- seqcount_t so_reclaim_seqcount;
|
||||
+ seqlock_t so_reclaim_seqlock;
|
||||
struct mutex so_delegreturn_mutex;
|
||||
};
|
||||
|
||||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
|
||||
index 668b648064b7..187d411668ed 100644
|
||||
--- a/fs/nfs/nfs4proc.c
|
||||
+++ b/fs/nfs/nfs4proc.c
|
||||
@@ -2870,7 +2870,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
|
||||
unsigned int seq;
|
||||
int ret;
|
||||
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
|
||||
ret = _nfs4_proc_open(opendata, ctx);
|
||||
if (ret != 0)
|
||||
@@ -2911,7 +2911,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
|
||||
|
||||
if (d_inode(dentry) == state->inode) {
|
||||
nfs_inode_attach_open_context(ctx);
|
||||
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
nfs4_schedule_stateid_recovery(server, state);
|
||||
}
|
||||
|
||||
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
|
||||
index b3086e99420c..c9bf1eb7e1b2 100644
|
||||
--- a/fs/nfs/nfs4state.c
|
||||
+++ b/fs/nfs/nfs4state.c
|
||||
@@ -515,7 +515,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
|
||||
nfs4_init_seqid_counter(&sp->so_seqid);
|
||||
atomic_set(&sp->so_count, 1);
|
||||
INIT_LIST_HEAD(&sp->so_lru);
|
||||
- seqcount_init(&sp->so_reclaim_seqcount);
|
||||
+ seqlock_init(&sp->so_reclaim_seqlock);
|
||||
mutex_init(&sp->so_delegreturn_mutex);
|
||||
return sp;
|
||||
}
|
||||
@@ -1583,8 +1583,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
|
||||
* recovering after a network partition or a reboot from a
|
||||
* server that doesn't support a grace period.
|
||||
*/
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_seqlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
restart:
|
||||
list_for_each_entry(state, &sp->so_states, open_states) {
|
||||
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
|
||||
@@ -1671,14 +1675,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
|
||||
spin_lock(&sp->so_lock);
|
||||
goto restart;
|
||||
}
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return 0;
|
||||
out_err:
|
||||
nfs4_put_open_state(state);
|
||||
- spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
- spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return status;
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,784 +0,0 @@
|
||||
From 4906d6c574d916416e92a9de0b959c4d0ed0bc17 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 4 Apr 2017 12:50:16 +0200
|
||||
Subject: [PATCH 026/328] kernel: sched: Provide a pointer to the valid CPU
|
||||
mask
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
In commit 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed()
|
||||
wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not
|
||||
much difference in !RT but in RT we used this to implement
|
||||
migrate_disable(). Within a migrate_disable() section the CPU mask is
|
||||
restricted to single CPU while the "normal" CPU mask remains untouched.
|
||||
|
||||
As an alternative implementation Ingo suggested to use
|
||||
struct task_struct {
|
||||
const cpumask_t *cpus_ptr;
|
||||
cpumask_t cpus_mask;
|
||||
};
|
||||
with
|
||||
t->cpus_allowed_ptr = &t->cpus_allowed;
|
||||
|
||||
In -RT we then can switch the cpus_ptr to
|
||||
t->cpus_allowed_ptr = &cpumask_of(task_cpu(p));
|
||||
|
||||
in a migration disabled region. The rules are simple:
|
||||
- Code that 'uses' ->cpus_allowed would use the pointer.
|
||||
- Code that 'modifies' ->cpus_allowed would use the direct mask.
|
||||
|
||||
While converting the existing users I tried to stick with the rules
|
||||
above however… well mostly CPUFREQ tries to temporary switch the CPU
|
||||
mask to do something on a certain CPU and then switches the mask back it
|
||||
its original value. So in theory `cpus_ptr' could or should be used.
|
||||
However if this is invoked in a migration disabled region (which is not
|
||||
the case because it would require something like preempt_disable() and
|
||||
set_cpus_allowed_ptr() might sleep so it can't be) then the "restore"
|
||||
part would restore the wrong mask. So it only looks strange and I go for
|
||||
the pointer…
|
||||
|
||||
Some drivers copy the cpumask without cpumask_copy() and others use
|
||||
cpumask_copy but without alloc_cpumask_var(). I did not fix those as
|
||||
part of this, could do this as a follow up…
|
||||
|
||||
So is this the way we want it?
|
||||
Is the usage of `cpus_ptr' vs `cpus_mask' for the set + restore part
|
||||
(see cpufreq users) what we want? At some point it looks like they
|
||||
should use a different interface for their doing. I am not sure why
|
||||
switching to certain CPU is important but maybe it could be done via a
|
||||
workqueue from the CPUFREQ core (so we have a comment desribing why are
|
||||
doing this and a get_online_cpus() to ensure that the CPU does not go
|
||||
offline too early).
|
||||
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Ingo Molnar <mingo@elte.hu>
|
||||
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/ia64/kernel/mca.c | 2 +-
|
||||
arch/mips/include/asm/switch_to.h | 4 +--
|
||||
arch/mips/kernel/mips-mt-fpaff.c | 2 +-
|
||||
arch/mips/kernel/traps.c | 6 ++--
|
||||
arch/powerpc/platforms/cell/spufs/sched.c | 2 +-
|
||||
arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 2 +-
|
||||
drivers/infiniband/hw/hfi1/affinity.c | 6 ++--
|
||||
drivers/infiniband/hw/hfi1/sdma.c | 3 +-
|
||||
drivers/infiniband/hw/qib/qib_file_ops.c | 7 ++--
|
||||
fs/proc/array.c | 4 +--
|
||||
include/linux/sched.h | 5 +--
|
||||
init/init_task.c | 3 +-
|
||||
kernel/cgroup/cpuset.c | 2 +-
|
||||
kernel/fork.c | 2 ++
|
||||
kernel/sched/core.c | 40 ++++++++++-----------
|
||||
kernel/sched/cpudeadline.c | 4 +--
|
||||
kernel/sched/cpupri.c | 4 +--
|
||||
kernel/sched/deadline.c | 6 ++--
|
||||
kernel/sched/fair.c | 32 ++++++++---------
|
||||
kernel/sched/rt.c | 4 +--
|
||||
kernel/trace/trace_hwlat.c | 2 +-
|
||||
lib/smp_processor_id.c | 2 +-
|
||||
samples/trace_events/trace-events-sample.c | 2 +-
|
||||
23 files changed, 74 insertions(+), 72 deletions(-)
|
||||
|
||||
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
|
||||
index 6115464d5f03..f09e34c8409c 100644
|
||||
--- a/arch/ia64/kernel/mca.c
|
||||
+++ b/arch/ia64/kernel/mca.c
|
||||
@@ -1824,7 +1824,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
|
||||
ti->cpu = cpu;
|
||||
p->stack = ti;
|
||||
p->state = TASK_UNINTERRUPTIBLE;
|
||||
- cpumask_set_cpu(cpu, &p->cpus_allowed);
|
||||
+ cpumask_set_cpu(cpu, &p->cpus_mask);
|
||||
INIT_LIST_HEAD(&p->tasks);
|
||||
p->parent = p->real_parent = p->group_leader = p;
|
||||
INIT_LIST_HEAD(&p->children);
|
||||
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
|
||||
index e610473d61b8..1428b4febbc9 100644
|
||||
--- a/arch/mips/include/asm/switch_to.h
|
||||
+++ b/arch/mips/include/asm/switch_to.h
|
||||
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
|
||||
* inline to try to keep the overhead down. If we have been forced to run on
|
||||
* a "CPU" with an FPU because of a previous high level of FP computation,
|
||||
* but did not actually use the FPU during the most recent time-slice (CU1
|
||||
- * isn't set), we undo the restriction on cpus_allowed.
|
||||
+ * isn't set), we undo the restriction on cpus_mask.
|
||||
*
|
||||
* We're not calling set_cpus_allowed() here, because we have no need to
|
||||
* force prompt migration - we're already switching the current CPU to a
|
||||
@@ -57,7 +57,7 @@ do { \
|
||||
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
|
||||
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
|
||||
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
|
||||
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
|
||||
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
|
||||
} \
|
||||
next->thread.emulated_fp = 0; \
|
||||
} while(0)
|
||||
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
|
||||
index a7c0f97e4b0d..1a08428eedcf 100644
|
||||
--- a/arch/mips/kernel/mips-mt-fpaff.c
|
||||
+++ b/arch/mips/kernel/mips-mt-fpaff.c
|
||||
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
|
||||
- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
|
||||
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
|
||||
cpumask_and(&mask, &allowed, cpu_active_mask);
|
||||
|
||||
out_unlock:
|
||||
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
|
||||
index 9dab0ed1b227..3623cf32f5f4 100644
|
||||
--- a/arch/mips/kernel/traps.c
|
||||
+++ b/arch/mips/kernel/traps.c
|
||||
@@ -1174,12 +1174,12 @@ static void mt_ase_fp_affinity(void)
|
||||
* restricted the allowed set to exclude any CPUs with FPUs,
|
||||
* we'll skip the procedure.
|
||||
*/
|
||||
- if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) {
|
||||
+ if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) {
|
||||
cpumask_t tmask;
|
||||
|
||||
current->thread.user_cpus_allowed
|
||||
- = current->cpus_allowed;
|
||||
- cpumask_and(&tmask, ¤t->cpus_allowed,
|
||||
+ = current->cpus_mask;
|
||||
+ cpumask_and(&tmask, ¤t->cpus_mask,
|
||||
&mt_fpu_cpumask);
|
||||
set_cpus_allowed_ptr(current, &tmask);
|
||||
set_thread_flag(TIF_FPUBOUND);
|
||||
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
|
||||
index c9ef3c532169..cb10249b1125 100644
|
||||
--- a/arch/powerpc/platforms/cell/spufs/sched.c
|
||||
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
|
||||
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
|
||||
* runqueue. The context will be rescheduled on the proper node
|
||||
* if it is timesliced or preempted.
|
||||
*/
|
||||
- cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed);
|
||||
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
|
||||
|
||||
/* Save the current cpu id for spu interrupt routing. */
|
||||
ctx->last_ran = raw_smp_processor_id();
|
||||
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
|
||||
index a999a58ca331..d6410d0740ea 100644
|
||||
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
|
||||
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
|
||||
@@ -1445,7 +1445,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
* may be scheduled elsewhere and invalidate entries in the
|
||||
* pseudo-locked region.
|
||||
*/
|
||||
- if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) {
|
||||
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
|
||||
index 01ed0a667928..2c62de6b5bf1 100644
|
||||
--- a/drivers/infiniband/hw/hfi1/affinity.c
|
||||
+++ b/drivers/infiniband/hw/hfi1/affinity.c
|
||||
@@ -1039,7 +1039,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
struct hfi1_affinity_node *entry;
|
||||
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
|
||||
const struct cpumask *node_mask,
|
||||
- *proc_mask = ¤t->cpus_allowed;
|
||||
+ *proc_mask = current->cpus_ptr;
|
||||
struct hfi1_affinity_node_list *affinity = &node_affinity;
|
||||
struct cpu_mask_set *set = &affinity->proc;
|
||||
|
||||
@@ -1047,7 +1047,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
* check whether process/context affinity has already
|
||||
* been set
|
||||
*/
|
||||
- if (cpumask_weight(proc_mask) == 1) {
|
||||
+ if (current->nr_cpus_allowed == 1) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
@@ -1058,7 +1058,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
cpu = cpumask_first(proc_mask);
|
||||
cpumask_set_cpu(cpu, &set->used);
|
||||
goto done;
|
||||
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
|
||||
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
|
||||
index 291c12f588b5..05e7b28a03c1 100644
|
||||
--- a/drivers/infiniband/hw/hfi1/sdma.c
|
||||
+++ b/drivers/infiniband/hw/hfi1/sdma.c
|
||||
@@ -853,14 +853,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
|
||||
{
|
||||
struct sdma_rht_node *rht_node;
|
||||
struct sdma_engine *sde = NULL;
|
||||
- const struct cpumask *current_mask = ¤t->cpus_allowed;
|
||||
unsigned long cpu_id;
|
||||
|
||||
/*
|
||||
* To ensure that always the same sdma engine(s) will be
|
||||
* selected make sure the process is pinned to this CPU only.
|
||||
*/
|
||||
- if (cpumask_weight(current_mask) != 1)
|
||||
+ if (current->nr_cpus_allowed != 1)
|
||||
goto out;
|
||||
|
||||
cpu_id = smp_processor_id();
|
||||
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
|
||||
index 98e1ce14fa2a..5d3828625017 100644
|
||||
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
|
||||
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
|
||||
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
|
||||
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
|
||||
{
|
||||
struct qib_filedata *fd = fp->private_data;
|
||||
- const unsigned int weight = cpumask_weight(¤t->cpus_allowed);
|
||||
+ const unsigned int weight = current->nr_cpus_allowed;
|
||||
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
|
||||
int local_cpu;
|
||||
|
||||
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
|
||||
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
|
||||
else {
|
||||
int unit;
|
||||
- const unsigned int cpu = cpumask_first(¤t->cpus_allowed);
|
||||
- const unsigned int weight =
|
||||
- cpumask_weight(¤t->cpus_allowed);
|
||||
+ const unsigned int cpu = cpumask_first(current->cpus_ptr);
|
||||
+ const unsigned int weight = current->nr_cpus_allowed;
|
||||
|
||||
if (weight == 1 && !test_bit(cpu, qib_cpulist))
|
||||
if (!find_hca(cpu, &unit) && unit >= 0)
|
||||
diff --git a/fs/proc/array.c b/fs/proc/array.c
|
||||
index 9eb99a43f849..e4d0cfebaac5 100644
|
||||
--- a/fs/proc/array.c
|
||||
+++ b/fs/proc/array.c
|
||||
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
|
||||
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
seq_printf(m, "Cpus_allowed:\t%*pb\n",
|
||||
- cpumask_pr_args(&task->cpus_allowed));
|
||||
+ cpumask_pr_args(task->cpus_ptr));
|
||||
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
|
||||
- cpumask_pr_args(&task->cpus_allowed));
|
||||
+ cpumask_pr_args(task->cpus_ptr));
|
||||
}
|
||||
|
||||
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index 0530de9a4efc..4298a87b9de6 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -660,7 +660,8 @@ struct task_struct {
|
||||
|
||||
unsigned int policy;
|
||||
int nr_cpus_allowed;
|
||||
- cpumask_t cpus_allowed;
|
||||
+ const cpumask_t *cpus_ptr;
|
||||
+ cpumask_t cpus_mask;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
@@ -1398,7 +1399,7 @@ extern struct pid *cad_pid;
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
||||
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
|
||||
diff --git a/init/init_task.c b/init/init_task.c
|
||||
index 5aebe3be4d7c..0b49b9cf5571 100644
|
||||
--- a/init/init_task.c
|
||||
+++ b/init/init_task.c
|
||||
@@ -71,7 +71,8 @@ struct task_struct init_task
|
||||
.static_prio = MAX_PRIO - 20,
|
||||
.normal_prio = MAX_PRIO - 20,
|
||||
.policy = SCHED_NORMAL,
|
||||
- .cpus_allowed = CPU_MASK_ALL,
|
||||
+ .cpus_ptr = &init_task.cpus_mask,
|
||||
+ .cpus_mask = CPU_MASK_ALL,
|
||||
.nr_cpus_allowed= NR_CPUS,
|
||||
.mm = NULL,
|
||||
.active_mm = &init_mm,
|
||||
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
|
||||
index ff956ccbb6df..7bb129c5b412 100644
|
||||
--- a/kernel/cgroup/cpuset.c
|
||||
+++ b/kernel/cgroup/cpuset.c
|
||||
@@ -2090,7 +2090,7 @@ static void cpuset_fork(struct task_struct *task)
|
||||
if (task_css_is_root(task, cpuset_cgrp_id))
|
||||
return;
|
||||
|
||||
- set_cpus_allowed_ptr(task, ¤t->cpus_allowed);
|
||||
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
}
|
||||
|
||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||
index 1a2d18e98bf9..bc182d6fa2a9 100644
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -850,6 +850,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
tsk->stack_canary = get_random_canary();
|
||||
#endif
|
||||
+ if (orig->cpus_ptr == &orig->cpus_mask)
|
||||
+ tsk->cpus_ptr = &tsk->cpus_mask;
|
||||
|
||||
/*
|
||||
* One for us, one for whoever does the "release_task()" (usually
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 2befd2c4ce9e..07dc66137a26 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -878,7 +878,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
|
||||
*/
|
||||
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
|
||||
{
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return false;
|
||||
|
||||
if (is_per_cpu_kthread(p))
|
||||
@@ -973,7 +973,7 @@ static int migration_cpu_stop(void *data)
|
||||
local_irq_disable();
|
||||
/*
|
||||
* We need to explicitly wake pending tasks before running
|
||||
- * __migrate_task() such that we will not miss enforcing cpus_allowed
|
||||
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
|
||||
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
||||
*/
|
||||
sched_ttwu_pending();
|
||||
@@ -1004,7 +1004,7 @@ static int migration_cpu_stop(void *data)
|
||||
*/
|
||||
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
- cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
+ cpumask_copy(&p->cpus_mask, new_mask);
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
}
|
||||
|
||||
@@ -1074,7 +1074,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
goto out;
|
||||
}
|
||||
|
||||
- if (cpumask_equal(&p->cpus_allowed, new_mask))
|
||||
+ if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||
goto out;
|
||||
|
||||
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
|
||||
@@ -1237,10 +1237,10 @@ static int migrate_swap_stop(void *data)
|
||||
if (task_cpu(arg->src_task) != arg->src_cpu)
|
||||
goto unlock;
|
||||
|
||||
- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
__migrate_swap_task(arg->src_task, arg->dst_cpu);
|
||||
@@ -1282,10 +1282,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
|
||||
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
|
||||
goto out;
|
||||
|
||||
- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
|
||||
@@ -1430,7 +1430,7 @@ void kick_process(struct task_struct *p)
|
||||
EXPORT_SYMBOL_GPL(kick_process);
|
||||
|
||||
/*
|
||||
- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
|
||||
+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
|
||||
*
|
||||
* A few notes on cpu_active vs cpu_online:
|
||||
*
|
||||
@@ -1470,14 +1470,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
for_each_cpu(dest_cpu, nodemask) {
|
||||
if (!cpu_active(dest_cpu))
|
||||
continue;
|
||||
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
|
||||
return dest_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/* Any allowed, online CPU? */
|
||||
- for_each_cpu(dest_cpu, &p->cpus_allowed) {
|
||||
+ for_each_cpu(dest_cpu, p->cpus_ptr) {
|
||||
if (!is_cpu_allowed(p, dest_cpu))
|
||||
continue;
|
||||
|
||||
@@ -1521,7 +1521,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
}
|
||||
|
||||
/*
|
||||
- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
|
||||
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
@@ -1531,11 +1531,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
if (p->nr_cpus_allowed > 1)
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
||||
else
|
||||
- cpu = cpumask_any(&p->cpus_allowed);
|
||||
+ cpu = cpumask_any(p->cpus_ptr);
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
- * to rely on ttwu() to place the task on a valid ->cpus_allowed
|
||||
+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
|
||||
* CPU.
|
||||
*
|
||||
* Since this is common to all placement strategies, this lives here.
|
||||
@@ -2402,7 +2402,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Fork balancing, do it here and not earlier because:
|
||||
- * - cpus_allowed can change in the fork path
|
||||
+ * - cpus_ptr can change in the fork path
|
||||
* - any previously selected CPU might disappear through hotplug
|
||||
*
|
||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||
@@ -4316,7 +4316,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
* the entire root_domain to become SCHED_DEADLINE. We
|
||||
* will also fail if there's no bandwidth available.
|
||||
*/
|
||||
- if (!cpumask_subset(span, &p->cpus_allowed) ||
|
||||
+ if (!cpumask_subset(span, p->cpus_ptr) ||
|
||||
rq->rd->dl_bw.bw == 0) {
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
return -EPERM;
|
||||
@@ -4915,7 +4915,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
||||
goto out_unlock;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
|
||||
+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
@@ -5496,7 +5496,7 @@ int task_can_attach(struct task_struct *p,
|
||||
* allowed nodes is unnecessary. Thus, cpusets are not
|
||||
* applicable for such threads. This prevents checking for
|
||||
* success of set_cpus_allowed_ptr() on all attached tasks
|
||||
- * before cpus_allowed may be changed.
|
||||
+ * before cpus_mask may be changed.
|
||||
*/
|
||||
if (p->flags & PF_NO_SETAFFINITY) {
|
||||
ret = -EINVAL;
|
||||
@@ -5523,7 +5523,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
|
||||
if (curr_cpu == target_cpu)
|
||||
return 0;
|
||||
|
||||
- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: This is not properly updating schedstats */
|
||||
@@ -5661,7 +5661,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
put_prev_task(rq, next);
|
||||
|
||||
/*
|
||||
- * Rules for changing task_struct::cpus_allowed are holding
|
||||
+ * Rules for changing task_struct::cpus_mask are holding
|
||||
* both pi_lock and rq->lock, such that holding either
|
||||
* stabilizes the mask.
|
||||
*
|
||||
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
|
||||
index 50316455ea66..d57fb2f8ae67 100644
|
||||
--- a/kernel/sched/cpudeadline.c
|
||||
+++ b/kernel/sched/cpudeadline.c
|
||||
@@ -124,14 +124,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
const struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
if (later_mask &&
|
||||
- cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
|
||||
+ cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
|
||||
return 1;
|
||||
} else {
|
||||
int best_cpu = cpudl_maximum(cp);
|
||||
|
||||
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
|
||||
|
||||
- if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
|
||||
+ if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
|
||||
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
||||
if (later_mask)
|
||||
cpumask_set_cpu(best_cpu, later_mask);
|
||||
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
|
||||
index daaadf939ccb..f7d2c10b4c92 100644
|
||||
--- a/kernel/sched/cpupri.c
|
||||
+++ b/kernel/sched/cpupri.c
|
||||
@@ -98,11 +98,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||
+ if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
|
||||
if (lowest_mask) {
|
||||
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
+ cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
|
||||
|
||||
/*
|
||||
* We have to ensure that we have at least one bit
|
||||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
|
||||
index ebec37cb3be9..4b13df38c069 100644
|
||||
--- a/kernel/sched/deadline.c
|
||||
+++ b/kernel/sched/deadline.c
|
||||
@@ -539,7 +539,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
||||
* If we cannot preempt any rq, fall back to pick any
|
||||
* online CPU:
|
||||
*/
|
||||
- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
|
||||
+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
|
||||
if (cpu >= nr_cpu_ids) {
|
||||
/*
|
||||
* Failed to find any suitable CPU.
|
||||
@@ -1856,7 +1856,7 @@ static void set_curr_task_dl(struct rq *rq)
|
||||
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
- cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@@ -2006,7 +2006,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
/* Retry if something changed. */
|
||||
if (double_lock_balance(rq, later_rq)) {
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
|
||||
+ !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index 0f1ba3d72336..27f9f9a785c1 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -1678,7 +1678,7 @@ static void task_numa_compare(struct task_numa_env *env,
|
||||
* be incurred if the tasks were swapped.
|
||||
*/
|
||||
/* Skip this swap candidate if cannot move to the source cpu */
|
||||
- if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
@@ -1776,7 +1776,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
|
||||
|
||||
for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
|
||||
/* Skip this CPU if the source task cannot migrate */
|
||||
- if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
|
||||
continue;
|
||||
|
||||
env->dst_cpu = cpu;
|
||||
@@ -5782,7 +5782,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
||||
|
||||
/* Skip over this group if it has no CPUs allowed */
|
||||
if (!cpumask_intersects(sched_group_span(group),
|
||||
- &p->cpus_allowed))
|
||||
+ p->cpus_ptr))
|
||||
continue;
|
||||
|
||||
local_group = cpumask_test_cpu(this_cpu,
|
||||
@@ -5914,7 +5914,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
||||
return cpumask_first(sched_group_span(group));
|
||||
|
||||
/* Traverse only the allowed CPUs */
|
||||
- for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
|
||||
+ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
|
||||
if (available_idle_cpu(i)) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct cpuidle_state *idle = idle_get_state(rq);
|
||||
@@ -5954,7 +5954,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
|
||||
{
|
||||
int new_cpu = cpu;
|
||||
|
||||
- if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
|
||||
+ if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
|
||||
return prev_cpu;
|
||||
|
||||
/*
|
||||
@@ -6071,7 +6071,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
|
||||
if (!test_idle_cores(target, false))
|
||||
return -1;
|
||||
|
||||
- cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
|
||||
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
for_each_cpu_wrap(core, cpus, target) {
|
||||
bool idle = true;
|
||||
@@ -6105,7 +6105,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
|
||||
return -1;
|
||||
|
||||
for_each_cpu(cpu, cpu_smt_mask(target)) {
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
return cpu;
|
||||
@@ -6168,7 +6168,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
|
||||
if (!--nr)
|
||||
return -1;
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
break;
|
||||
@@ -6205,7 +6205,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
recent_used_cpu != target &&
|
||||
cpus_share_cache(recent_used_cpu, target) &&
|
||||
available_idle_cpu(recent_used_cpu) &&
|
||||
- cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
|
||||
+ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
|
||||
/*
|
||||
* Replace recent_used_cpu with prev as it is a potential
|
||||
* candidate for the next wake:
|
||||
@@ -6423,7 +6423,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
||||
if (sd_flag & SD_BALANCE_WAKE) {
|
||||
record_wakee(p);
|
||||
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
|
||||
- && cpumask_test_cpu(cpu, &p->cpus_allowed);
|
||||
+ && cpumask_test_cpu(cpu, p->cpus_ptr);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -7162,14 +7162,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
/*
|
||||
* We do not migrate tasks that are:
|
||||
* 1) throttled_lb_pair, or
|
||||
- * 2) cannot be migrated to this CPU due to cpus_allowed, or
|
||||
+ * 2) cannot be migrated to this CPU due to cpus_ptr, or
|
||||
* 3) running (obviously), or
|
||||
* 4) are cache-hot on their current CPU.
|
||||
*/
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
return 0;
|
||||
|
||||
- if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
|
||||
+ if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
|
||||
int cpu;
|
||||
|
||||
schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
|
||||
@@ -7189,7 +7189,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
|
||||
/* Prevent to re-select dst_cpu via env's CPUs: */
|
||||
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
|
||||
- if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
|
||||
+ if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
|
||||
env->flags |= LBF_DST_PINNED;
|
||||
env->new_dst_cpu = cpu;
|
||||
break;
|
||||
@@ -7786,7 +7786,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
|
||||
|
||||
/*
|
||||
* Group imbalance indicates (and tries to solve) the problem where balancing
|
||||
- * groups is inadequate due to ->cpus_allowed constraints.
|
||||
+ * groups is inadequate due to ->cpus_ptr constraints.
|
||||
*
|
||||
* Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
|
||||
* cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
|
||||
@@ -8401,7 +8401,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||
/*
|
||||
* If the busiest group is imbalanced the below checks don't
|
||||
* work because they assume all things are equal, which typically
|
||||
- * isn't true due to cpus_allowed constraints and the like.
|
||||
+ * isn't true due to cpus_ptr constraints and the like.
|
||||
*/
|
||||
if (busiest->group_type == group_imbalanced)
|
||||
goto force_balance;
|
||||
@@ -8797,7 +8797,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
* if the curr task on busiest CPU can't be
|
||||
* moved to this_cpu:
|
||||
*/
|
||||
- if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
|
||||
+ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
|
||||
raw_spin_unlock_irqrestore(&busiest->lock,
|
||||
flags);
|
||||
env.flags |= LBF_ALL_PINNED;
|
||||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
|
||||
index b980cc96604f..b6ca4a630050 100644
|
||||
--- a/kernel/sched/rt.c
|
||||
+++ b/kernel/sched/rt.c
|
||||
@@ -1611,7 +1611,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
- cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@@ -1748,7 +1748,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
* Also make sure that it wasn't scheduled on its rq.
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
|
||||
+ !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
|
||||
index 8030e24dbf14..862f4b0139fc 100644
|
||||
--- a/kernel/trace/trace_hwlat.c
|
||||
+++ b/kernel/trace/trace_hwlat.c
|
||||
@@ -279,7 +279,7 @@ static void move_to_next_cpu(void)
|
||||
* of this thread, than stop migrating for the duration
|
||||
* of the current test.
|
||||
*/
|
||||
- if (!cpumask_equal(current_mask, ¤t->cpus_allowed))
|
||||
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
|
||||
goto disable;
|
||||
|
||||
get_online_cpus();
|
||||
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
|
||||
index 85925aaa4fff..fb35c45b9421 100644
|
||||
--- a/lib/smp_processor_id.c
|
||||
+++ b/lib/smp_processor_id.c
|
||||
@@ -22,7 +22,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
|
||||
* Kernel threads bound to a single CPU can safely use
|
||||
* smp_processor_id():
|
||||
*/
|
||||
- if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu)))
|
||||
+ if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
|
||||
index 5522692100ba..8b4be8e1802a 100644
|
||||
--- a/samples/trace_events/trace-events-sample.c
|
||||
+++ b/samples/trace_events/trace-events-sample.c
|
||||
@@ -33,7 +33,7 @@ static void simple_thread_func(int cnt)
|
||||
|
||||
/* Silly tracepoints */
|
||||
trace_foo_bar("hello", cnt, array, random_strings[len],
|
||||
- ¤t->cpus_allowed);
|
||||
+ current->cpus_ptr);
|
||||
|
||||
trace_foo_with_template_simple("HELLO", cnt);
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,265 +0,0 @@
|
||||
From 9f9cd889fa22fc1e25802f565f7210b271d136a2 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 27 May 2017 19:02:06 +0200
|
||||
Subject: [PATCH 027/328] kernel/sched/core: add migrate_disable()
|
||||
|
||||
---
|
||||
include/linux/preempt.h | 23 +++++++
|
||||
include/linux/sched.h | 7 +++
|
||||
include/linux/smp.h | 3 +
|
||||
kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++-
|
||||
kernel/sched/debug.c | 4 ++
|
||||
5 files changed, 165 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
|
||||
index c01813c3fbe9..3196d0e76719 100644
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -185,6 +185,22 @@ do { \
|
||||
|
||||
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
|
||||
+#ifdef CONFIG_SMP
|
||||
+
|
||||
+extern void migrate_disable(void);
|
||||
+extern void migrate_enable(void);
|
||||
+
|
||||
+int __migrate_disabled(struct task_struct *p);
|
||||
+
|
||||
+#else
|
||||
+#define migrate_disable() barrier()
|
||||
+#define migrate_enable() barrier()
|
||||
+static inline int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PREEMPT
|
||||
#define preempt_enable() \
|
||||
do { \
|
||||
@@ -253,6 +269,13 @@ do { \
|
||||
#define preempt_enable_notrace() barrier()
|
||||
#define preemptible() 0
|
||||
|
||||
+#define migrate_disable() barrier()
|
||||
+#define migrate_enable() barrier()
|
||||
+
|
||||
+static inline int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
#endif /* CONFIG_PREEMPT_COUNT */
|
||||
|
||||
#ifdef MODULE
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index 4298a87b9de6..0489d3e0e78c 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -662,6 +662,13 @@ struct task_struct {
|
||||
int nr_cpus_allowed;
|
||||
const cpumask_t *cpus_ptr;
|
||||
cpumask_t cpus_mask;
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ int migrate_disable;
|
||||
+ int migrate_disable_update;
|
||||
+# ifdef CONFIG_SCHED_DEBUG
|
||||
+ int migrate_disable_atomic;
|
||||
+# endif
|
||||
+#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
diff --git a/include/linux/smp.h b/include/linux/smp.h
|
||||
index 9fb239e12b82..5801e516ba63 100644
|
||||
--- a/include/linux/smp.h
|
||||
+++ b/include/linux/smp.h
|
||||
@@ -202,6 +202,9 @@ static inline int get_boot_cpu_id(void)
|
||||
#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
|
||||
#define put_cpu() preempt_enable()
|
||||
|
||||
+#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
|
||||
+#define put_cpu_light() migrate_enable()
|
||||
+
|
||||
/*
|
||||
* Callback to arch code if there's nosmp or maxcpus=0 on the
|
||||
* boot command line:
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 07dc66137a26..d0450f06612c 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1008,7 +1008,15 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
}
|
||||
|
||||
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return p->migrate_disable;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void __do_set_cpus_allowed_tail(struct task_struct *p,
|
||||
+ const struct cpumask *new_mask)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
bool queued, running;
|
||||
@@ -1037,6 +1045,20 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
set_curr_task(rq, p);
|
||||
}
|
||||
|
||||
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
+{
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ if (__migrate_disabled(p)) {
|
||||
+ lockdep_assert_held(&p->pi_lock);
|
||||
+
|
||||
+ cpumask_copy(&p->cpus_mask, new_mask);
|
||||
+ p->migrate_disable_update = 1;
|
||||
+ return;
|
||||
+ }
|
||||
+#endif
|
||||
+ __do_set_cpus_allowed_tail(p, new_mask);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Change a given task's CPU affinity. Migrate the thread to a
|
||||
* proper CPU and schedule it away if the CPU it's executing on
|
||||
@@ -1096,9 +1118,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
}
|
||||
|
||||
/* Can the task run on the task's current CPU? If so, we're done */
|
||||
- if (cpumask_test_cpu(task_cpu(p), new_mask))
|
||||
+ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
|
||||
goto out;
|
||||
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ if (__migrate_disabled(p)) {
|
||||
+ p->migrate_disable_update = 1;
|
||||
+ goto out;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
||||
struct migration_arg arg = { p, dest_cpu };
|
||||
/* Need help from migration thread: drop lock and wait. */
|
||||
@@ -7105,3 +7134,100 @@ const u32 sched_prio_to_wmult[40] = {
|
||||
};
|
||||
|
||||
#undef CREATE_TRACE_POINTS
|
||||
+
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+
|
||||
+void migrate_disable(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+
|
||||
+ if (in_atomic() || irqs_disabled()) {
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ p->migrate_disable_atomic++;
|
||||
+#endif
|
||||
+ return;
|
||||
+ }
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+#endif
|
||||
+
|
||||
+ if (p->migrate_disable) {
|
||||
+ p->migrate_disable++;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ p->migrate_disable = 1;
|
||||
+
|
||||
+ p->cpus_ptr = cpumask_of(smp_processor_id());
|
||||
+ p->nr_cpus_allowed = 1;
|
||||
+
|
||||
+ preempt_enable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(migrate_disable);
|
||||
+
|
||||
+void migrate_enable(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+
|
||||
+ if (in_atomic() || irqs_disabled()) {
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ p->migrate_disable_atomic--;
|
||||
+#endif
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+#endif
|
||||
+
|
||||
+ WARN_ON_ONCE(p->migrate_disable <= 0);
|
||||
+ if (p->migrate_disable > 1) {
|
||||
+ p->migrate_disable--;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ preempt_disable();
|
||||
+
|
||||
+ p->cpus_ptr = &p->cpus_mask;
|
||||
+ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
|
||||
+ p->migrate_disable = 0;
|
||||
+
|
||||
+ if (p->migrate_disable_update) {
|
||||
+ struct rq *rq;
|
||||
+ struct rq_flags rf;
|
||||
+
|
||||
+ rq = task_rq_lock(p, &rf);
|
||||
+ update_rq_clock(rq);
|
||||
+
|
||||
+ __do_set_cpus_allowed_tail(p, &p->cpus_mask);
|
||||
+ task_rq_unlock(rq, p, &rf);
|
||||
+
|
||||
+ p->migrate_disable_update = 0;
|
||||
+
|
||||
+ WARN_ON(smp_processor_id() != task_cpu(p));
|
||||
+ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
|
||||
+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
|
||||
+ struct migration_arg arg;
|
||||
+ unsigned int dest_cpu;
|
||||
+
|
||||
+ if (p->flags & PF_KTHREAD) {
|
||||
+ /*
|
||||
+ * Kernel threads are allowed on online && !active CPUs
|
||||
+ */
|
||||
+ cpu_valid_mask = cpu_online_mask;
|
||||
+ }
|
||||
+ dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_mask);
|
||||
+ arg.task = p;
|
||||
+ arg.dest_cpu = dest_cpu;
|
||||
+
|
||||
+ preempt_enable();
|
||||
+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
|
||||
+ tlb_migrate_finish(p->mm);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(migrate_enable);
|
||||
+#endif
|
||||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||||
index 78fadf0438ea..5027158d3908 100644
|
||||
--- a/kernel/sched/debug.c
|
||||
+++ b/kernel/sched/debug.c
|
||||
@@ -982,6 +982,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
P(dl.runtime);
|
||||
P(dl.deadline);
|
||||
}
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ P(migrate_disable);
|
||||
+#endif
|
||||
+ P(nr_cpus_allowed);
|
||||
#undef PN_SCHEDSTAT
|
||||
#undef PN
|
||||
#undef __PN
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,37 +0,0 @@
|
||||
From db2220843fd1c19c7b89db5f6e20382b5622fa05 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Oct 2018 17:34:50 +0200
|
||||
Subject: [PATCH 028/328] sched/migrate_disable: Add export_symbol_gpl for
|
||||
__migrate_disabled
|
||||
|
||||
Jonathan reported that lttng/modules can't use __migrate_disabled().
|
||||
This function is only used by sched/core itself and the tracing
|
||||
infrastructure to report the migrate counter (lttng does probably the
|
||||
same). Since the rework migrate_disable() it moved from sched.h to
|
||||
preempt.h and is became an exported function instead of a "static
|
||||
inline" due to the header recursion of preempt vs sched.
|
||||
|
||||
Since the compiler inlines the function for sched/core usage, add a
|
||||
EXPORT_SYMBOL_GPL to allow the module/LTTNG usage.
|
||||
|
||||
Reported-by: Jonathan Rajott <jonathan.rajotte-julien@efficios.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/core.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index d0450f06612c..e6022cc2605b 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1013,6 +1013,7 @@ int __migrate_disabled(struct task_struct *p)
|
||||
{
|
||||
return p->migrate_disable;
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(__migrate_disabled);
|
||||
#endif
|
||||
|
||||
static void __do_set_cpus_allowed_tail(struct task_struct *p,
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,97 +0,0 @@
|
||||
From b978b0a313d26ed5e51a9120c8744385a99e541a Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 9 Mar 2016 10:51:06 +0100
|
||||
Subject: [PATCH 029/328] arm: at91: do not disable/enable clocks in a row
|
||||
|
||||
Currently the driver will disable the clock and enable it one line later
|
||||
if it is switching from periodic mode into one shot.
|
||||
This can be avoided and causes a needless warning on -RT.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 33 ++++++++++++++++++++++++++++----
|
||||
1 file changed, 29 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
|
||||
index 43f4d5c4d6fa..de6baf564dfe 100644
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -126,6 +126,7 @@ static struct clocksource clksrc = {
|
||||
struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
+ bool clk_enabled;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -143,6 +144,24 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
|
||||
*/
|
||||
static u32 timer_clock;
|
||||
|
||||
+static void tc_clk_disable(struct clock_event_device *d)
|
||||
+{
|
||||
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
+
|
||||
+ clk_disable(tcd->clk);
|
||||
+ tcd->clk_enabled = false;
|
||||
+}
|
||||
+
|
||||
+static void tc_clk_enable(struct clock_event_device *d)
|
||||
+{
|
||||
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
+
|
||||
+ if (tcd->clk_enabled)
|
||||
+ return;
|
||||
+ clk_enable(tcd->clk);
|
||||
+ tcd->clk_enabled = true;
|
||||
+}
|
||||
+
|
||||
static int tc_shutdown(struct clock_event_device *d)
|
||||
{
|
||||
struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
@@ -150,8 +169,14 @@ static int tc_shutdown(struct clock_event_device *d)
|
||||
|
||||
writel(0xff, regs + ATMEL_TC_REG(2, IDR));
|
||||
writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_shutdown_clk_off(struct clock_event_device *d)
|
||||
+{
|
||||
+ tc_shutdown(d);
|
||||
if (!clockevent_state_detached(d))
|
||||
- clk_disable(tcd->clk);
|
||||
+ tc_clk_disable(d);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -164,7 +189,7 @@ static int tc_set_oneshot(struct clock_event_device *d)
|
||||
if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
tc_shutdown(d);
|
||||
|
||||
- clk_enable(tcd->clk);
|
||||
+ tc_clk_enable(d);
|
||||
|
||||
/* slow clock, count up to RC, then irq and stop */
|
||||
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
|
||||
@@ -186,7 +211,7 @@ static int tc_set_periodic(struct clock_event_device *d)
|
||||
/* By not making the gentime core emulate periodic mode on top
|
||||
* of oneshot, we get lower overhead and improved accuracy.
|
||||
*/
|
||||
- clk_enable(tcd->clk);
|
||||
+ tc_clk_enable(d);
|
||||
|
||||
/* slow clock, count up to RC, then irq and restart */
|
||||
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
@@ -220,7 +245,7 @@ static struct tc_clkevt_device clkevt = {
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
.rating = 125,
|
||||
.set_next_event = tc_next_event,
|
||||
- .set_state_shutdown = tc_shutdown,
|
||||
+ .set_state_shutdown = tc_shutdown_clk_off,
|
||||
.set_state_periodic = tc_set_periodic,
|
||||
.set_state_oneshot = tc_set_oneshot,
|
||||
},
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,169 +0,0 @@
|
||||
From 8549d4577f0573dddbc34e310c4310920a6bb714 Mon Sep 17 00:00:00 2001
|
||||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Mon, 8 Mar 2010 18:57:04 +0100
|
||||
Subject: [PATCH 030/328] clocksource: TCLIB: Allow higher clock rates for
|
||||
clock events
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
As default the TCLIB uses the 32KiHz base clock rate for clock events.
|
||||
Add a compile time selection to allow higher clock resulution.
|
||||
|
||||
(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>)
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 36 +++++++++++++++++++-------------
|
||||
drivers/misc/Kconfig | 12 +++++++++--
|
||||
2 files changed, 31 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
|
||||
index de6baf564dfe..ba15242a6066 100644
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -25,8 +25,7 @@
|
||||
* this 32 bit free-running counter. the second channel is not used.
|
||||
*
|
||||
* - The third channel may be used to provide a 16-bit clockevent
|
||||
- * source, used in either periodic or oneshot mode. This runs
|
||||
- * at 32 KiHZ, and can handle delays of up to two seconds.
|
||||
+ * source, used in either periodic or oneshot mode.
|
||||
*
|
||||
* A boot clocksource and clockevent source are also currently needed,
|
||||
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
|
||||
@@ -127,6 +126,7 @@ struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
bool clk_enabled;
|
||||
+ u32 freq;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -135,13 +135,6 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
|
||||
return container_of(clkevt, struct tc_clkevt_device, clkevt);
|
||||
}
|
||||
|
||||
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
- * because using one of the divided clocks would usually mean the
|
||||
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
- *
|
||||
- * A divided clock could be good for high resolution timers, since
|
||||
- * 30.5 usec resolution can seem "low".
|
||||
- */
|
||||
static u32 timer_clock;
|
||||
|
||||
static void tc_clk_disable(struct clock_event_device *d)
|
||||
@@ -191,7 +184,7 @@ static int tc_set_oneshot(struct clock_event_device *d)
|
||||
|
||||
tc_clk_enable(d);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and stop */
|
||||
+ /* count up to RC, then irq and stop */
|
||||
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
|
||||
ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
|
||||
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -213,10 +206,10 @@ static int tc_set_periodic(struct clock_event_device *d)
|
||||
*/
|
||||
tc_clk_enable(d);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and restart */
|
||||
+ /* count up to RC, then irq and restart */
|
||||
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
- writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
+ writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
|
||||
/* Enable clock and interrupts on RC compare */
|
||||
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -243,7 +236,11 @@ static struct tc_clkevt_device clkevt = {
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC |
|
||||
CLOCK_EVT_FEAT_ONESHOT,
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
.rating = 125,
|
||||
+#else
|
||||
+ .rating = 200,
|
||||
+#endif
|
||||
.set_next_event = tc_next_event,
|
||||
.set_state_shutdown = tc_shutdown_clk_off,
|
||||
.set_state_periodic = tc_set_periodic,
|
||||
@@ -265,8 +262,9 @@ static irqreturn_t ch2_irq(int irq, void *handle)
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
|
||||
{
|
||||
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
|
||||
int ret;
|
||||
struct clk *t2_clk = tc->clk[2];
|
||||
int irq = tc->irq[2];
|
||||
@@ -287,7 +285,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
clkevt.regs = tc->regs;
|
||||
clkevt.clk = t2_clk;
|
||||
|
||||
- timer_clock = clk32k_divisor_idx;
|
||||
+ timer_clock = divisor_idx;
|
||||
+ if (!divisor)
|
||||
+ clkevt.freq = 32768;
|
||||
+ else
|
||||
+ clkevt.freq = clk_get_rate(t2_clk) / divisor;
|
||||
|
||||
clkevt.clkevt.cpumask = cpumask_of(0);
|
||||
|
||||
@@ -298,7 +300,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
return ret;
|
||||
}
|
||||
|
||||
- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
|
||||
+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -435,7 +437,11 @@ static int __init tcb_clksrc_init(void)
|
||||
goto err_disable_t1;
|
||||
|
||||
/* channel 2: periodic and oneshot timer support */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
ret = setup_clkevents(tc, clk32k_divisor_idx);
|
||||
+#else
|
||||
+ ret = setup_clkevents(tc, best_divisor_idx);
|
||||
+#endif
|
||||
if (ret)
|
||||
goto err_unregister_clksrc;
|
||||
|
||||
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
|
||||
index 3726eacdf65d..0900dec7ec04 100644
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -69,8 +69,7 @@ config ATMEL_TCB_CLKSRC
|
||||
are combined to make a single 32-bit timer.
|
||||
|
||||
When GENERIC_CLOCKEVENTS is defined, the third timer channel
|
||||
- may be used as a clock event device supporting oneshot mode
|
||||
- (delays of up to two seconds) based on the 32 KiHz clock.
|
||||
+ may be used as a clock event device supporting oneshot mode.
|
||||
|
||||
config ATMEL_TCB_CLKSRC_BLOCK
|
||||
int
|
||||
@@ -83,6 +82,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
TC can be used for other purposes, such as PWM generation and
|
||||
interval timing.
|
||||
|
||||
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
+ bool "TC Block use 32 KiHz clock"
|
||||
+ depends on ATMEL_TCB_CLKSRC
|
||||
+ default y
|
||||
+ help
|
||||
+ Select this to use 32 KiHz base clock rate as TC block clock
|
||||
+ source for clock events.
|
||||
+
|
||||
+
|
||||
config DUMMY_IRQ
|
||||
tristate "Dummy IRQ handler"
|
||||
default n
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,170 +0,0 @@
|
||||
From 4948d026b217faebc43bfe4a21cb3c5e781508ad Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 14 Feb 2013 22:36:59 +0100
|
||||
Subject: [PATCH 031/328] timekeeping: Split jiffies seqlock
|
||||
|
||||
Replace jiffies_lock seqlock with a simple seqcounter and a rawlock so
|
||||
it can be taken in atomic context on RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/time/jiffies.c | 7 ++++---
|
||||
kernel/time/tick-common.c | 10 ++++++----
|
||||
kernel/time/tick-sched.c | 19 ++++++++++++-------
|
||||
kernel/time/timekeeping.c | 6 ++++--
|
||||
kernel/time/timekeeping.h | 3 ++-
|
||||
5 files changed, 28 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
|
||||
index 497719127bf9..62acb8914c9e 100644
|
||||
--- a/kernel/time/jiffies.c
|
||||
+++ b/kernel/time/jiffies.c
|
||||
@@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = {
|
||||
.max_cycles = 10,
|
||||
};
|
||||
|
||||
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp seqcount_t jiffies_seq;
|
||||
|
||||
#if (BITS_PER_LONG < 64)
|
||||
u64 get_jiffies_64(void)
|
||||
@@ -83,9 +84,9 @@ u64 get_jiffies_64(void)
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
ret = jiffies_64;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(get_jiffies_64);
|
||||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
|
||||
index a02e0f6b287c..32f5101f07ce 100644
|
||||
--- a/kernel/time/tick-common.c
|
||||
+++ b/kernel/time/tick-common.c
|
||||
@@ -79,13 +79,15 @@ int tick_is_oneshot_available(void)
|
||||
static void tick_periodic(int cpu)
|
||||
{
|
||||
if (tick_do_timer_cpu == cpu) {
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
/* Keep track of the next tick event */
|
||||
tick_next_period = ktime_add(tick_next_period, tick_period);
|
||||
|
||||
do_timer(1);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
|
||||
ktime_t next;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
next = tick_next_period;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
|
||||
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
|
||||
|
||||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
|
||||
index 48403fb653c2..e774a49176cc 100644
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -68,7 +68,8 @@ static void tick_do_update_jiffies64(ktime_t now)
|
||||
return;
|
||||
|
||||
/* Reevaluate with jiffies_lock held */
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
delta = ktime_sub(now, last_jiffies_update);
|
||||
if (delta >= tick_period) {
|
||||
@@ -94,10 +95,12 @@ static void tick_do_update_jiffies64(ktime_t now)
|
||||
/* Keep the tick_next_period variable up to date */
|
||||
tick_next_period = ktime_add(last_jiffies_update, tick_period);
|
||||
} else {
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return;
|
||||
}
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -108,12 +111,14 @@ static ktime_t tick_init_jiffy_update(void)
|
||||
{
|
||||
ktime_t period;
|
||||
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
/* Did we start the jiffies update yet ? */
|
||||
if (last_jiffies_update == 0)
|
||||
last_jiffies_update = tick_next_period;
|
||||
period = last_jiffies_update;
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return period;
|
||||
}
|
||||
|
||||
@@ -656,10 +661,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
|
||||
|
||||
/* Read jiffies and the time when jiffies were updated last */
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
basemono = last_jiffies_update;
|
||||
basejiff = jiffies;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
ts->last_jiffies = basejiff;
|
||||
ts->timer_expires_base = basemono;
|
||||
|
||||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
|
||||
index 81ee5b83c920..512db778f442 100644
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -2394,8 +2394,10 @@ EXPORT_SYMBOL(hardpps);
|
||||
*/
|
||||
void xtime_update(unsigned long ticks)
|
||||
{
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
do_timer(ticks);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
|
||||
index 141ab3ab0354..099737f6f10c 100644
|
||||
--- a/kernel/time/timekeeping.h
|
||||
+++ b/kernel/time/timekeeping.h
|
||||
@@ -25,7 +25,8 @@ static inline void sched_clock_resume(void) { }
|
||||
extern void do_timer(unsigned long ticks);
|
||||
extern void update_wall_time(void);
|
||||
|
||||
-extern seqlock_t jiffies_lock;
|
||||
+extern raw_spinlock_t jiffies_lock;
|
||||
+extern seqcount_t jiffies_seq;
|
||||
|
||||
#define CS_NAME_LEN 32
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,37 +0,0 @@
|
||||
From dad624b7531ae0a0275cab3c82ea0d7c6a29cc7c Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 21 Sep 2011 19:57:12 +0200
|
||||
Subject: [PATCH 032/328] signal: Revert ptrace preempt magic
|
||||
|
||||
Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more
|
||||
than a bandaid around the ptrace design trainwreck. It's not a
|
||||
correctness issue, it's merily a cosmetic bandaid.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/signal.c | 8 --------
|
||||
1 file changed, 8 deletions(-)
|
||||
|
||||
diff --git a/kernel/signal.c b/kernel/signal.c
|
||||
index 08911bb6fe9a..5e278f1540ad 100644
|
||||
--- a/kernel/signal.c
|
||||
+++ b/kernel/signal.c
|
||||
@@ -2103,15 +2103,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
|
||||
if (gstop_done && ptrace_reparented(current))
|
||||
do_notify_parent_cldstop(current, false, why);
|
||||
|
||||
- /*
|
||||
- * Don't want to allow preemption here, because
|
||||
- * sys_ptrace() needs this task to be inactive.
|
||||
- *
|
||||
- * XXX: implement read_unlock_no_resched().
|
||||
- */
|
||||
- preempt_disable();
|
||||
read_unlock(&tasklist_lock);
|
||||
- preempt_enable_no_resched();
|
||||
freezable_schedule();
|
||||
} else {
|
||||
/*
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,63 +0,0 @@
|
||||
From 5b974aebb7a0797ecc4c47dda6158e8c6788d50b Mon Sep 17 00:00:00 2001
|
||||
From: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Date: Wed, 5 Mar 2014 00:49:47 +0100
|
||||
Subject: [PATCH 033/328] net: sched: Use msleep() instead of yield()
|
||||
|
||||
On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50
|
||||
(by default). If a high priority userspace process tries to shut down a busy
|
||||
network interface it might spin in a yield loop waiting for the device to
|
||||
become idle. With the interrupt thread having a lower priority than the
|
||||
looping process it might never be scheduled and so result in a deadlock on UP
|
||||
systems.
|
||||
|
||||
With Magic SysRq the following backtrace can be produced:
|
||||
|
||||
> test_app R running 0 174 168 0x00000000
|
||||
> [<c02c7070>] (__schedule+0x220/0x3fc) from [<c02c7870>] (preempt_schedule_irq+0x48/0x80)
|
||||
> [<c02c7870>] (preempt_schedule_irq+0x48/0x80) from [<c0008fa8>] (svc_preempt+0x8/0x20)
|
||||
> [<c0008fa8>] (svc_preempt+0x8/0x20) from [<c001a984>] (local_bh_enable+0x18/0x88)
|
||||
> [<c001a984>] (local_bh_enable+0x18/0x88) from [<c025316c>] (dev_deactivate_many+0x220/0x264)
|
||||
> [<c025316c>] (dev_deactivate_many+0x220/0x264) from [<c023be04>] (__dev_close_many+0x64/0xd4)
|
||||
> [<c023be04>] (__dev_close_many+0x64/0xd4) from [<c023be9c>] (__dev_close+0x28/0x3c)
|
||||
> [<c023be9c>] (__dev_close+0x28/0x3c) from [<c023f7f0>] (__dev_change_flags+0x88/0x130)
|
||||
> [<c023f7f0>] (__dev_change_flags+0x88/0x130) from [<c023f904>] (dev_change_flags+0x10/0x48)
|
||||
> [<c023f904>] (dev_change_flags+0x10/0x48) from [<c024c140>] (do_setlink+0x370/0x7ec)
|
||||
> [<c024c140>] (do_setlink+0x370/0x7ec) from [<c024d2f0>] (rtnl_newlink+0x2b4/0x450)
|
||||
> [<c024d2f0>] (rtnl_newlink+0x2b4/0x450) from [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4)
|
||||
> [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4) from [<c0256740>] (netlink_rcv_skb+0xac/0xc0)
|
||||
> [<c0256740>] (netlink_rcv_skb+0xac/0xc0) from [<c024bbd8>] (rtnetlink_rcv+0x18/0x24)
|
||||
> [<c024bbd8>] (rtnetlink_rcv+0x18/0x24) from [<c02561b8>] (netlink_unicast+0x13c/0x198)
|
||||
> [<c02561b8>] (netlink_unicast+0x13c/0x198) from [<c025651c>] (netlink_sendmsg+0x264/0x2e0)
|
||||
> [<c025651c>] (netlink_sendmsg+0x264/0x2e0) from [<c022af98>] (sock_sendmsg+0x78/0x98)
|
||||
> [<c022af98>] (sock_sendmsg+0x78/0x98) from [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278)
|
||||
> [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278) from [<c022cf08>] (__sys_sendmsg+0x48/0x78)
|
||||
> [<c022cf08>] (__sys_sendmsg+0x48/0x78) from [<c0009320>] (ret_fast_syscall+0x0/0x2c)
|
||||
|
||||
This patch works around the problem by replacing yield() by msleep(1), giving
|
||||
the interrupt thread time to finish, similar to other changes contained in the
|
||||
rt patch set. Using wait_for_completion() instead would probably be a better
|
||||
solution.
|
||||
|
||||
|
||||
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
net/sched/sch_generic.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
|
||||
index 8a4d01e427a2..4ab20f1138fd 100644
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -1204,7 +1204,7 @@ void dev_deactivate_many(struct list_head *head)
|
||||
/* Wait for outstanding qdisc_run calls. */
|
||||
list_for_each_entry(dev, head, close_list) {
|
||||
while (some_qdisc_is_busy(dev))
|
||||
- yield();
|
||||
+ msleep(1);
|
||||
/* The new qdisc is assigned at this point so we can safely
|
||||
* unwind stale skb lists and qdisc statistics
|
||||
*/
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,36 +0,0 @@
|
||||
From 38dbd44808bcdd34f0b973698b0f9bd65d2f2db5 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 27 Mar 2018 16:24:15 +0200
|
||||
Subject: [PATCH 034/328] dm rq: remove BUG_ON(!irqs_disabled) check
|
||||
|
||||
In commit 052189a2ec95 ("dm: remove superfluous irq disablement in
|
||||
dm_request_fn") the spin_lock_irq() was replaced with spin_lock() + a
|
||||
check for disabled interrupts. Later the locking part was removed in
|
||||
commit 2eb6e1e3aa87 ("dm: submit stacked requests in irq enabled
|
||||
context") but the BUG_ON() check remained.
|
||||
|
||||
Since the original purpose for the "are-irqs-off" check is gone (the
|
||||
->queue_lock has been removed) remove it.
|
||||
|
||||
Cc: Keith Busch <keith.busch@intel.com>
|
||||
Cc: Mike Snitzer <snitzer@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/md/dm-rq.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
|
||||
index 4d36373e1c0f..12ed08245130 100644
|
||||
--- a/drivers/md/dm-rq.c
|
||||
+++ b/drivers/md/dm-rq.c
|
||||
@@ -692,7 +692,6 @@ static void dm_old_request_fn(struct request_queue *q)
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
kthread_queue_work(&md->kworker, &tio->work);
|
||||
- BUG_ON(!irqs_disabled());
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,45 +0,0 @@
|
||||
From f31d5f36bfd80c261ba37fe3b8849f2be819c088 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 8 Nov 2013 17:34:54 +0100
|
||||
Subject: [PATCH 035/328] usb: do no disable interrupts in giveback
|
||||
|
||||
Since commit 94dfd7ed ("USB: HCD: support giveback of URB in tasklet
|
||||
context") the USB code disables interrupts before invoking the complete
|
||||
callback.
|
||||
This should not be required the HCD completes the URBs either in hard-irq
|
||||
context or in BH context. Lockdep may report false positives if one has two
|
||||
HCDs (one completes in IRQ and the other in BH context) and is using the same
|
||||
USB driver (device) with both HCDs. This is safe since the same URBs are never
|
||||
mixed with those two HCDs.
|
||||
Longeterm we should force all HCDs to complete in the same context.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/usb/core/hcd.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
|
||||
index b82a7d787add..2f3015356124 100644
|
||||
--- a/drivers/usb/core/hcd.c
|
||||
+++ b/drivers/usb/core/hcd.c
|
||||
@@ -1738,7 +1738,6 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
|
||||
struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus);
|
||||
struct usb_anchor *anchor = urb->anchor;
|
||||
int status = urb->unlinked;
|
||||
- unsigned long flags;
|
||||
|
||||
urb->hcpriv = NULL;
|
||||
if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) &&
|
||||
@@ -1766,9 +1765,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
|
||||
* and no one may trigger the above deadlock situation when
|
||||
* running complete() in tasklet.
|
||||
*/
|
||||
- local_irq_save(flags);
|
||||
urb->complete(urb);
|
||||
- local_irq_restore(flags);
|
||||
|
||||
usb_anchor_resume_wakeups(anchor);
|
||||
atomic_dec(&urb->use_count);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,63 +0,0 @@
|
||||
From f93f63735dec865d4013677969324e66da7f02c4 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 17 Jun 2011 12:39:57 +0200
|
||||
Subject: [PATCH 036/328] rt: Provide PREEMPT_RT_BASE config switch
|
||||
|
||||
Introduce PREEMPT_RT_BASE which enables parts of
|
||||
PREEMPT_RT_FULL. Forces interrupt threading and enables some of the RT
|
||||
substitutions for testing.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/Kconfig.preempt | 21 ++++++++++++++++++---
|
||||
1 file changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
|
||||
index cd1655122ec0..027db5976c2f 100644
|
||||
--- a/kernel/Kconfig.preempt
|
||||
+++ b/kernel/Kconfig.preempt
|
||||
@@ -1,3 +1,10 @@
|
||||
+config PREEMPT
|
||||
+ bool
|
||||
+ select PREEMPT_COUNT
|
||||
+
|
||||
+config PREEMPT_RT_BASE
|
||||
+ bool
|
||||
+ select PREEMPT
|
||||
|
||||
choice
|
||||
prompt "Preemption Model"
|
||||
@@ -34,10 +41,10 @@ config PREEMPT_VOLUNTARY
|
||||
|
||||
Select this if you are building a kernel for a desktop system.
|
||||
|
||||
-config PREEMPT
|
||||
+config PREEMPT__LL
|
||||
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||||
depends on !ARCH_NO_PREEMPT
|
||||
- select PREEMPT_COUNT
|
||||
+ select PREEMPT
|
||||
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
|
||||
help
|
||||
This option reduces the latency of the kernel by making
|
||||
@@ -54,7 +61,15 @@ config PREEMPT
|
||||
embedded system with latency requirements in the milliseconds
|
||||
range.
|
||||
|
||||
+config PREEMPT_RTB
|
||||
+ bool "Preemptible Kernel (Basic RT)"
|
||||
+ select PREEMPT_RT_BASE
|
||||
+ help
|
||||
+ This option is basically the same as (Low-Latency Desktop) but
|
||||
+ enables changes which are preliminary for the full preemptible
|
||||
+ RT kernel.
|
||||
+
|
||||
endchoice
|
||||
|
||||
config PREEMPT_COUNT
|
||||
- bool
|
||||
\ No newline at end of file
|
||||
+ bool
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,75 +0,0 @@
|
||||
From 824fc9b2ae92b317da3e2a42406a49f330e20a6d Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 14 Dec 2011 01:03:49 +0100
|
||||
Subject: [PATCH 037/328] cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
|
||||
|
||||
There are "valid" GFP_ATOMIC allocations such as
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 2130, name: tar
|
||||
|1 lock held by tar/2130:
|
||||
| #0: (&mm->mmap_sem){++++++}, at: [<ffffffff811d4e89>] SyS_brk+0x39/0x190
|
||||
|Preemption disabled at:[<ffffffff81063048>] flush_tlb_mm_range+0x28/0x350
|
||||
|
|
||||
|CPU: 1 PID: 2130 Comm: tar Tainted: G W 4.8.2-rt2+ #747
|
||||
|Call Trace:
|
||||
| [<ffffffff814d52dc>] dump_stack+0x86/0xca
|
||||
| [<ffffffff810a26fb>] ___might_sleep+0x14b/0x240
|
||||
| [<ffffffff819bc1d4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff81194fba>] get_page_from_freelist+0x83a/0x11b0
|
||||
| [<ffffffff81195e8b>] __alloc_pages_nodemask+0x15b/0x1190
|
||||
| [<ffffffff811f0b81>] alloc_pages_current+0xa1/0x1f0
|
||||
| [<ffffffff811f7df5>] new_slab+0x3e5/0x690
|
||||
| [<ffffffff811fb0d5>] ___slab_alloc+0x495/0x660
|
||||
| [<ffffffff811fb311>] __slab_alloc.isra.79+0x71/0xc0
|
||||
| [<ffffffff811fb447>] __kmalloc_node+0xe7/0x240
|
||||
| [<ffffffff814d4ee0>] alloc_cpumask_var_node+0x20/0x50
|
||||
| [<ffffffff814d4f3e>] alloc_cpumask_var+0xe/0x10
|
||||
| [<ffffffff810430c1>] native_send_call_func_ipi+0x21/0x130
|
||||
| [<ffffffff8111c13f>] smp_call_function_many+0x22f/0x370
|
||||
| [<ffffffff81062b64>] native_flush_tlb_others+0x1a4/0x3a0
|
||||
| [<ffffffff8106309b>] flush_tlb_mm_range+0x7b/0x350
|
||||
| [<ffffffff811c88e2>] tlb_flush_mmu_tlbonly+0x62/0xd0
|
||||
| [<ffffffff811c9af4>] tlb_finish_mmu+0x14/0x50
|
||||
| [<ffffffff811d1c84>] unmap_region+0xe4/0x110
|
||||
| [<ffffffff811d3db3>] do_munmap+0x293/0x470
|
||||
| [<ffffffff811d4f8c>] SyS_brk+0x13c/0x190
|
||||
| [<ffffffff810032e2>] do_fast_syscall_32+0xb2/0x2f0
|
||||
| [<ffffffff819be181>] entry_SYSENTER_compat+0x51/0x60
|
||||
|
||||
which forbid allocations at run-time.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/Kconfig | 2 +-
|
||||
lib/Kconfig | 1 +
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index af35f5caadbe..e40ba59efe7f 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -934,7 +934,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
|
||||
config MAXSMP
|
||||
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
|
||||
depends on X86_64 && SMP && DEBUG_KERNEL
|
||||
- select CPUMASK_OFFSTACK
|
||||
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
|
||||
---help---
|
||||
Enable maximum number of CPUS and NUMA Nodes for this architecture.
|
||||
If unsure, say N.
|
||||
diff --git a/lib/Kconfig b/lib/Kconfig
|
||||
index a3928d4438b5..a50b2158f7cd 100644
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -441,6 +441,7 @@ config CHECK_SIGNATURE
|
||||
|
||||
config CPUMASK_OFFSTACK
|
||||
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
Use dynamic allocation for cpumask_var_t, instead of putting
|
||||
them on the stack. This is a bit more expensive, but avoids
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From feb8e4e9bfee1c054ec0c83ae2a12897d85da9b3 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 8 Jul 2015 17:14:48 +0200
|
||||
Subject: [PATCH 038/328] jump-label: disable if stop_machine() is used
|
||||
|
||||
Some architectures are using stop_machine() while switching the opcode which
|
||||
leads to latency spikes.
|
||||
The architectures which use stop_machine() atm:
|
||||
- ARM stop machine
|
||||
- s390 stop machine
|
||||
|
||||
The architecures which use other sorcery:
|
||||
- MIPS
|
||||
- X86
|
||||
- powerpc
|
||||
- sparc
|
||||
- arm64
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: only ARM for now]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
|
||||
index e2f7c50dbace..91ba9fe945ff 100644
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -51,7 +51,7 @@ config ARM
|
||||
select HARDIRQS_SW_RESEND
|
||||
select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
|
||||
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
|
||||
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
|
||||
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
|
||||
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 521872fd8b19b111a7001f6388912ab9d506f741 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 24 Jul 2011 12:11:43 +0200
|
||||
Subject: [PATCH 039/328] kconfig: Disable config options which are not RT
|
||||
compatible
|
||||
|
||||
Disable stuff which is known to have issues on RT
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/Kconfig | 1 +
|
||||
mm/Kconfig | 2 +-
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/Kconfig b/arch/Kconfig
|
||||
index a336548487e6..3f537b264852 100644
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -28,6 +28,7 @@ config OPROFILE
|
||||
tristate "OProfile system profiling"
|
||||
depends on PROFILING
|
||||
depends on HAVE_OPROFILE
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select RING_BUFFER
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
help
|
||||
diff --git a/mm/Kconfig b/mm/Kconfig
|
||||
index b457e94ae618..0dddbb2a3282 100644
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -377,7 +377,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
|
||||
|
||||
config TRANSPARENT_HUGEPAGE
|
||||
bool "Transparent Hugepage Support"
|
||||
- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
|
||||
select COMPACTION
|
||||
select RADIX_TREE_MULTIORDER
|
||||
help
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,34 +0,0 @@
|
||||
From 464fd34dd6dc53c357221cff371377ca19601e14 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 17 Oct 2017 16:36:18 +0200
|
||||
Subject: [PATCH 040/328] lockdep: disable self-test
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The self-test wasn't always 100% accurate for RT. We disabled a few
|
||||
tests which failed because they had a different semantic for RT. Some
|
||||
still reported false positives. Now the selftest locks up the system
|
||||
during boot and it needs to be investigated…
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
lib/Kconfig.debug | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
|
||||
index 46a910acce3f..38cf7f81daa7 100644
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -1207,7 +1207,7 @@ config DEBUG_ATOMIC_SLEEP
|
||||
|
||||
config DEBUG_LOCKING_API_SELFTESTS
|
||||
bool "Locking API boot-time self-tests"
|
||||
- depends on DEBUG_KERNEL
|
||||
+ depends on DEBUG_KERNEL && !PREEMPT_RT_FULL
|
||||
help
|
||||
Say Y here if you want the kernel to run a short self-test during
|
||||
bootup. The self-test checks whether common types of locking bugs
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,36 +0,0 @@
|
||||
From bb8c948b260e99e7c7ad2dc38ea03a958be18769 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:44:03 -0500
|
||||
Subject: [PATCH 041/328] mm: Allow only slub on RT
|
||||
|
||||
Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
init/Kconfig | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 47035b5a46f6..ae9a0113a699 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1637,6 +1637,7 @@ choice
|
||||
|
||||
config SLAB
|
||||
bool "SLAB"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select HAVE_HARDENED_USERCOPY_ALLOCATOR
|
||||
help
|
||||
The regular slab allocator that is established and known to work
|
||||
@@ -1657,6 +1658,7 @@ config SLUB
|
||||
config SLOB
|
||||
depends on EXPERT
|
||||
bool "SLOB (Simple Allocator)"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
SLOB replaces the stock allocator with a drastically simpler
|
||||
allocator. SLOB is generally more space efficient but
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 6bbedb933d43f1bc2283d96523412298d765b8a2 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:51:45 +0200
|
||||
Subject: [PATCH 042/328] locking: Disable spin on owner for RT
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Drop spin on owner for mutex / rwsem. We are most likely not using it
|
||||
but…
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/Kconfig.locks | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
|
||||
index 84d882f3e299..af27c4000812 100644
|
||||
--- a/kernel/Kconfig.locks
|
||||
+++ b/kernel/Kconfig.locks
|
||||
@@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW
|
||||
|
||||
config MUTEX_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
- depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
+ depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
|
||||
|
||||
config RWSEM_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
- depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
+ depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
|
||||
|
||||
config LOCK_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,29 +0,0 @@
|
||||
From 0942d8d1880802a3a19df4dfdff1ec5769d92fe3 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 28 Oct 2012 13:26:09 +0000
|
||||
Subject: [PATCH 043/328] rcu: Disable RCU_FAST_NO_HZ on RT
|
||||
|
||||
This uses a timer_list timer from the irq disabled guts of the idle
|
||||
code. Disable it for now to prevent wreckage.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/rcu/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
|
||||
index 9210379c0353..644264be90f0 100644
|
||||
--- a/kernel/rcu/Kconfig
|
||||
+++ b/kernel/rcu/Kconfig
|
||||
@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF
|
||||
|
||||
config RCU_FAST_NO_HZ
|
||||
bool "Accelerate last non-dyntick-idle CPU's grace periods"
|
||||
- depends on NO_HZ_COMMON && SMP && RCU_EXPERT
|
||||
+ depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
|
||||
default n
|
||||
help
|
||||
This option permits CPUs to enter dynticks-idle state even if
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,33 +0,0 @@
|
||||
From b784c987142020d5cc32de03823004d362b390ec Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 21 Mar 2014 20:19:05 +0100
|
||||
Subject: [PATCH 044/328] rcu: make RCU_BOOST default on RT
|
||||
|
||||
Since it is no longer invoked from the softirq people run into OOM more
|
||||
often if the priority of the RCU thread is too low. Making boosting
|
||||
default on RT should help in those case and it can be switched off if
|
||||
someone knows better.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/rcu/Kconfig | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
|
||||
index 644264be90f0..a243a78ff38c 100644
|
||||
--- a/kernel/rcu/Kconfig
|
||||
+++ b/kernel/rcu/Kconfig
|
||||
@@ -190,8 +190,8 @@ config RCU_FAST_NO_HZ
|
||||
|
||||
config RCU_BOOST
|
||||
bool "Enable RCU priority boosting"
|
||||
- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
|
||||
- default n
|
||||
+ depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT_FULL
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
help
|
||||
This option boosts the priority of preempted RCU readers that
|
||||
block the current preemptible RCU grace period for too long.
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,34 +0,0 @@
|
||||
From 648e8c04474df9ed71c649af1d1e5a161cddaf41 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:03:52 +0200
|
||||
Subject: [PATCH 045/328] sched: Disable CONFIG_RT_GROUP_SCHED on RT
|
||||
|
||||
Carsten reported problems when running:
|
||||
|
||||
taskset 01 chrt -f 1 sleep 1
|
||||
|
||||
from within rc.local on a F15 machine. The task stays running and
|
||||
never gets on the run queue because some of the run queues have
|
||||
rt_throttled=1 which does not go away. Works nice from a ssh login
|
||||
shell. Disabling CONFIG_RT_GROUP_SCHED solves that as well.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index ae9a0113a699..61e8b531649b 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -784,6 +784,7 @@ config CFS_BANDWIDTH
|
||||
config RT_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_RR/FIFO"
|
||||
depends on CGROUP_SCHED
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
default n
|
||||
help
|
||||
This feature lets you explicitly allocate real CPU bandwidth
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 0b90609a04c39529c4ff712a4786aecde55a0733 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 27 May 2017 19:02:06 +0200
|
||||
Subject: [PATCH 046/328] net/core: disable NET_RX_BUSY_POLL
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
sk_busy_loop() does preempt_disable() followed by a few operations which can
|
||||
take sleeping locks and may get long.
|
||||
I _think_ that we could use preempt_disable_nort() (in sk_busy_loop()) instead
|
||||
but after a successfull cmpxchg(&napi->state, …) we would gain the ressource
|
||||
and could be scheduled out. At this point nobody knows who (which context) owns
|
||||
it and so it could take a while until the state is realeased and napi_poll()
|
||||
could be invoked again.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
net/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/net/Kconfig b/net/Kconfig
|
||||
index 228dfa382eec..bc8d01996f22 100644
|
||||
--- a/net/Kconfig
|
||||
+++ b/net/Kconfig
|
||||
@@ -275,7 +275,7 @@ config CGROUP_NET_CLASSID
|
||||
|
||||
config NET_RX_BUSY_POLL
|
||||
bool
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
|
||||
config BQL
|
||||
bool
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,165 +0,0 @@
|
||||
From af731f1e8edb7e93c5977a0da70bd61c5d9fa7b1 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 1 Dec 2017 10:42:03 +0100
|
||||
Subject: [PATCH 047/328] arm*: disable NEON in kernel mode
|
||||
|
||||
NEON in kernel mode is used by the crypto algorithms and raid6 code.
|
||||
While the raid6 code looks okay, the crypto algorithms do not: NEON
|
||||
is enabled on first invocation and may allocate/free/map memory before
|
||||
the NEON mode is disabled again.
|
||||
This needs to be changed until it can be enabled.
|
||||
On ARM NEON in kernel mode can be simply disabled. on ARM64 it needs to
|
||||
stay on due to possible EFI callbacks so here I disable each algorithm.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 2 +-
|
||||
arch/arm64/crypto/Kconfig | 28 ++++++++++++++--------------
|
||||
arch/arm64/crypto/crc32-ce-glue.c | 3 ++-
|
||||
3 files changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
|
||||
index 91ba9fe945ff..bd9d180db5c7 100644
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -2163,7 +2163,7 @@ config NEON
|
||||
|
||||
config KERNEL_MODE_NEON
|
||||
bool "Support for NEON in kernel mode"
|
||||
- depends on NEON && AEABI
|
||||
+ depends on NEON && AEABI && !PREEMPT_RT_BASE
|
||||
help
|
||||
Say Y to include support for NEON in kernel mode.
|
||||
|
||||
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
|
||||
index d51944ff9f91..0d4b3f0cfba6 100644
|
||||
--- a/arch/arm64/crypto/Kconfig
|
||||
+++ b/arch/arm64/crypto/Kconfig
|
||||
@@ -19,43 +19,43 @@ config CRYPTO_SHA512_ARM64
|
||||
|
||||
config CRYPTO_SHA1_ARM64_CE
|
||||
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA1
|
||||
|
||||
config CRYPTO_SHA2_ARM64_CE
|
||||
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA256_ARM64
|
||||
|
||||
config CRYPTO_SHA512_ARM64_CE
|
||||
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA512_ARM64
|
||||
|
||||
config CRYPTO_SHA3_ARM64
|
||||
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA3
|
||||
|
||||
config CRYPTO_SM3_ARM64_CE
|
||||
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SM3
|
||||
|
||||
config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_AES
|
||||
@@ -63,7 +63,7 @@ config CRYPTO_GHASH_ARM64_CE
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
- depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
+ depends on KERNEL_MODE_NEON && CRC_T10DIF && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
@@ -77,13 +77,13 @@ config CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
- depends on ARM64 && KERNEL_MODE_NEON
|
||||
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_CCM
|
||||
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
|
||||
- depends on ARM64 && KERNEL_MODE_NEON
|
||||
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
@@ -91,7 +91,7 @@ config CRYPTO_AES_ARM64_CE_CCM
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
@@ -99,7 +99,7 @@ config CRYPTO_AES_ARM64_CE_BLK
|
||||
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_AES
|
||||
@@ -107,13 +107,13 @@ config CRYPTO_AES_ARM64_NEON_BLK
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_AES_ARM64_BS
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_AES_ARM64
|
||||
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
|
||||
index 34b4e3d46aab..ae055cdad8cf 100644
|
||||
--- a/arch/arm64/crypto/crc32-ce-glue.c
|
||||
+++ b/arch/arm64/crypto/crc32-ce-glue.c
|
||||
@@ -208,7 +208,8 @@ static struct shash_alg crc32_pmull_algs[] = { {
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
+ !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,32 +0,0 @@
|
||||
From c90bc1f0bbce77f2baf2b4213125fb5b7870fc20 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: [PATCH 048/328] powerpc: Use generic rwsem on RT
|
||||
|
||||
Use generic code which uses rtmutex
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/powerpc/Kconfig | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
|
||||
index 6f475dc5829b..3d5c86336072 100644
|
||||
--- a/arch/powerpc/Kconfig
|
||||
+++ b/arch/powerpc/Kconfig
|
||||
@@ -105,10 +105,11 @@ config LOCKDEP_SUPPORT
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
|
||||
config RWSEM_XCHGADD_ALGORITHM
|
||||
bool
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
|
||||
config GENERIC_LOCKBREAK
|
||||
bool
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,44 +0,0 @@
|
||||
From 3460880af8146f0e3e05acd590e7e52d450bbf80 Mon Sep 17 00:00:00 2001
|
||||
From: Bogdan Purcareata <bogdan.purcareata@freescale.com>
|
||||
Date: Fri, 24 Apr 2015 15:53:13 +0000
|
||||
Subject: [PATCH 049/328] powerpc/kvm: Disable in-kernel MPIC emulation for
|
||||
PREEMPT_RT_FULL
|
||||
|
||||
While converting the openpic emulation code to use a raw_spinlock_t enables
|
||||
guests to run on RT, there's still a performance issue. For interrupts sent in
|
||||
directed delivery mode with a multiple CPU mask, the emulated openpic will loop
|
||||
through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop
|
||||
through all the pending interrupts for that VCPU. This is done while holding the
|
||||
raw_lock, meaning that in all this time the interrupts and preemption are
|
||||
disabled on the host Linux. A malicious user app can max both these number and
|
||||
cause a DoS.
|
||||
|
||||
This temporary fix is sent for two reasons. First is so that users who want to
|
||||
use the in-kernel MPIC emulation are aware of the potential latencies, thus
|
||||
making sure that the hardware MPIC and their usage scenario does not involve
|
||||
interrupts sent in directed delivery mode, and the number of possible pending
|
||||
interrupts is kept small. Secondly, this should incentivize the development of a
|
||||
proper openpic emulation that would be better suited for RT.
|
||||
|
||||
Acked-by: Scott Wood <scottwood@freescale.com>
|
||||
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/kvm/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
|
||||
index 68a0e9d5b440..6f4d5d7615af 100644
|
||||
--- a/arch/powerpc/kvm/Kconfig
|
||||
+++ b/arch/powerpc/kvm/Kconfig
|
||||
@@ -178,6 +178,7 @@ config KVM_E500MC
|
||||
config KVM_MPIC
|
||||
bool "KVM in-kernel MPIC emulation"
|
||||
depends on KVM && E500
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,28 +0,0 @@
|
||||
From 13e6a60aad3edc7b4efd2168abcca0447ff20763 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:08:34 +0200
|
||||
Subject: [PATCH 050/328] powerpc: Disable highmem on RT
|
||||
|
||||
The current highmem handling on -RT is not compatible and needs fixups.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/powerpc/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
|
||||
index 3d5c86336072..1b332f69dd36 100644
|
||||
--- a/arch/powerpc/Kconfig
|
||||
+++ b/arch/powerpc/Kconfig
|
||||
@@ -399,7 +399,7 @@ menu "Kernel options"
|
||||
|
||||
config HIGHMEM
|
||||
bool "High memory support"
|
||||
- depends on PPC32
|
||||
+ depends on PPC32 && !PREEMPT_RT_FULL
|
||||
|
||||
source kernel/Kconfig.hz
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,28 +0,0 @@
|
||||
From 55ff21a4418f35a443f2c210779a9ff4dee33e93 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:10:12 +0200
|
||||
Subject: [PATCH 051/328] mips: Disable highmem on RT
|
||||
|
||||
The current highmem handling on -RT is not compatible and needs fixups.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/mips/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
|
||||
index a830a9701e50..3d5fae3891be 100644
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -2518,7 +2518,7 @@ config MIPS_CRC_SUPPORT
|
||||
#
|
||||
config HIGHMEM
|
||||
bool "High Memory Support"
|
||||
- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
|
||||
+ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
|
||||
|
||||
config CPU_SUPPORTS_HIGHMEM
|
||||
bool
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,33 +0,0 @@
|
||||
From d0b5d43931b3de89c64c8a697256eb60eb9c0ebb Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 26 Jul 2009 02:21:32 +0200
|
||||
Subject: [PATCH 052/328] x86: Use generic rwsem_spinlocks on -rt
|
||||
|
||||
Simplifies the separation of anon_rw_semaphores and rw_semaphores for
|
||||
-rt.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/Kconfig | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index e40ba59efe7f..f22e787329cf 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -264,8 +264,11 @@ config ARCH_MAY_HAVE_PC_FDC
|
||||
def_bool y
|
||||
depends on ISA_DMA_API
|
||||
|
||||
+config RWSEM_GENERIC_SPINLOCK
|
||||
+ def_bool PREEMPT_RT_FULL
|
||||
+
|
||||
config RWSEM_XCHGADD_ALGORITHM
|
||||
- def_bool y
|
||||
+ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
|
||||
|
||||
config GENERIC_CALIBRATE_DELAY
|
||||
def_bool y
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,40 +0,0 @@
|
||||
From 9c164cac4dbebd9bf5376428113db97b366625a0 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 23 Jan 2014 14:45:59 +0100
|
||||
Subject: [PATCH 053/328] leds: trigger: disable CPU trigger on -RT
|
||||
|
||||
as it triggers:
|
||||
|CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141
|
||||
|[<c0014aa4>] (unwind_backtrace+0x0/0xf8) from [<c0012788>] (show_stack+0x1c/0x20)
|
||||
|[<c0012788>] (show_stack+0x1c/0x20) from [<c043c8dc>] (dump_stack+0x20/0x2c)
|
||||
|[<c043c8dc>] (dump_stack+0x20/0x2c) from [<c004c5e8>] (__might_sleep+0x13c/0x170)
|
||||
|[<c004c5e8>] (__might_sleep+0x13c/0x170) from [<c043f270>] (__rt_spin_lock+0x28/0x38)
|
||||
|[<c043f270>] (__rt_spin_lock+0x28/0x38) from [<c043fa00>] (rt_read_lock+0x68/0x7c)
|
||||
|[<c043fa00>] (rt_read_lock+0x68/0x7c) from [<c036cf74>] (led_trigger_event+0x2c/0x5c)
|
||||
|[<c036cf74>] (led_trigger_event+0x2c/0x5c) from [<c036e0bc>] (ledtrig_cpu+0x54/0x5c)
|
||||
|[<c036e0bc>] (ledtrig_cpu+0x54/0x5c) from [<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c)
|
||||
|[<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c) from [<c00590b8>] (cpu_startup_entry+0xa8/0x234)
|
||||
|[<c00590b8>] (cpu_startup_entry+0xa8/0x234) from [<c043b2cc>] (rest_init+0xb8/0xe0)
|
||||
|[<c043b2cc>] (rest_init+0xb8/0xe0) from [<c061ebe0>] (start_kernel+0x2c4/0x380)
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/leds/trigger/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
|
||||
index 4018af769969..b4ce8c115949 100644
|
||||
--- a/drivers/leds/trigger/Kconfig
|
||||
+++ b/drivers/leds/trigger/Kconfig
|
||||
@@ -63,6 +63,7 @@ config LEDS_TRIGGER_BACKLIGHT
|
||||
|
||||
config LEDS_TRIGGER_CPU
|
||||
bool "LED CPU Trigger"
|
||||
+ depends on !PREEMPT_RT_BASE
|
||||
help
|
||||
This allows LEDs to be controlled by active CPUs. This shows
|
||||
the active CPUs across an array of LEDs so you can see which
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,38 +0,0 @@
|
||||
From fa67192faa15cd98f554bcf82f0ecc40a26d9165 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 9 Apr 2015 15:23:01 +0200
|
||||
Subject: [PATCH 054/328] cpufreq: drop K8's driver from beeing selected
|
||||
|
||||
Ralf posted a picture of a backtrace from
|
||||
|
||||
| powernowk8_target_fn() -> transition_frequency_fidvid() and then at the
|
||||
| end:
|
||||
| 932 policy = cpufreq_cpu_get(smp_processor_id());
|
||||
| 933 cpufreq_cpu_put(policy);
|
||||
|
||||
crashing the system on -RT. I assumed that policy was a NULL pointer but
|
||||
was rulled out. Since Ralf can't do any more investigations on this and
|
||||
I have no machine with this, I simply switch it off.
|
||||
|
||||
Reported-by: Ralf Mardorf <ralf.mardorf@alice-dsl.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/Kconfig.x86 | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
|
||||
index 35f71825b7f3..bb4a6160d0f7 100644
|
||||
--- a/drivers/cpufreq/Kconfig.x86
|
||||
+++ b/drivers/cpufreq/Kconfig.x86
|
||||
@@ -125,7 +125,7 @@ config X86_POWERNOW_K7_ACPI
|
||||
|
||||
config X86_POWERNOW_K8
|
||||
tristate "AMD Opteron/Athlon64 PowerNow!"
|
||||
- depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
|
||||
+ depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
|
||||
help
|
||||
This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
|
||||
Support for K10 and newer processors is now in acpi-cpufreq.
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,40 +0,0 @@
|
||||
From 274246f0cc33aabdc562929c114eae24541eb9a3 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 29 Aug 2013 11:48:57 +0200
|
||||
Subject: [PATCH 055/328] md: disable bcache
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
It uses anon semaphores
|
||||
|drivers/md/bcache/request.c: In function ‘cached_dev_write_complete’:
|
||||
|drivers/md/bcache/request.c:1007:2: error: implicit declaration of function ‘up_read_non_owner’ [-Werror=implicit-function-declaration]
|
||||
| up_read_non_owner(&dc->writeback_lock);
|
||||
| ^
|
||||
|drivers/md/bcache/request.c: In function ‘request_write’:
|
||||
|drivers/md/bcache/request.c:1033:2: error: implicit declaration of function ‘down_read_non_owner’ [-Werror=implicit-function-declaration]
|
||||
| down_read_non_owner(&dc->writeback_lock);
|
||||
| ^
|
||||
|
||||
either we get rid of those or we have to introduce them…
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/md/bcache/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
|
||||
index f6e0a8b3a61e..18c03d79a442 100644
|
||||
--- a/drivers/md/bcache/Kconfig
|
||||
+++ b/drivers/md/bcache/Kconfig
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
config BCACHE
|
||||
tristate "Block device as cache"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select CRC64
|
||||
help
|
||||
Allows a block device to be used as cache for other devices; uses
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,45 +0,0 @@
|
||||
From b453602d0f02b94edf714d46e6293b037ea94b67 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 15:03:16 +0200
|
||||
Subject: [PATCH 056/328] efi: Disable runtime services on RT
|
||||
|
||||
Based on meassurements the EFI functions get_variable /
|
||||
get_next_variable take up to 2us which looks okay.
|
||||
The functions get_time, set_time take around 10ms. Those 10ms are too
|
||||
much. Even one ms would be too much.
|
||||
Ard mentioned that SetVariable might even trigger larger latencies if
|
||||
the firware will erase flash blocks on NOR.
|
||||
|
||||
The time-functions are used by efi-rtc and can be triggered during
|
||||
runtimed (either via explicit read/write or ntp sync).
|
||||
|
||||
The variable write could be used by pstore.
|
||||
These functions can be disabled without much of a loss. The poweroff /
|
||||
reboot hooks may be provided by PSCI.
|
||||
|
||||
Disable EFI's runtime wrappers.
|
||||
|
||||
This was observed on "EFI v2.60 by SoftIron Overdrive 1000".
|
||||
|
||||
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/firmware/efi/efi.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
|
||||
index 5db20908aa9c..1708505fdf5d 100644
|
||||
--- a/drivers/firmware/efi/efi.c
|
||||
+++ b/drivers/firmware/efi/efi.c
|
||||
@@ -87,7 +87,7 @@ struct mm_struct efi_mm = {
|
||||
|
||||
struct workqueue_struct *efi_rts_wq;
|
||||
|
||||
-static bool disable_runtime;
|
||||
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT_BASE);
|
||||
static int __init setup_noefi(char *arg)
|
||||
{
|
||||
disable_runtime = true;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,173 +0,0 @@
|
||||
From dc16c6a47edf44cb0f69ff6124d6a85ee6dd3dcc Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 22 Jul 2011 17:58:40 +0200
|
||||
Subject: [PATCH 057/328] printk: Add a printk kill switch
|
||||
|
||||
Add a prinkt-kill-switch. This is used from (NMI) watchdog to ensure that
|
||||
it does not dead-lock with the early printk code.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/printk.h | 2 ++
|
||||
kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++-----------
|
||||
kernel/watchdog_hld.c | 10 ++++++
|
||||
3 files changed, 71 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/include/linux/printk.h b/include/linux/printk.h
|
||||
index cf3eccfe1543..30ebf5f82a7c 100644
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -140,9 +140,11 @@ struct va_format {
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
extern asmlinkage __printf(1, 2)
|
||||
void early_printk(const char *fmt, ...);
|
||||
+extern void printk_kill(void);
|
||||
#else
|
||||
static inline __printf(1, 2) __cold
|
||||
void early_printk(const char *s, ...) { }
|
||||
+static inline void printk_kill(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PRINTK_NMI
|
||||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
|
||||
index 7a2fdc097c8c..29838e532f46 100644
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -405,6 +405,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
printk_safe_exit_irqrestore(flags); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_EARLY_PRINTK
|
||||
+struct console *early_console;
|
||||
+
|
||||
+static void early_vprintk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ if (early_console) {
|
||||
+ char buf[512];
|
||||
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
||||
+
|
||||
+ early_console->write(early_console, buf, n);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+asmlinkage void early_printk(const char *fmt, ...)
|
||||
+{
|
||||
+ va_list ap;
|
||||
+
|
||||
+ va_start(ap, fmt);
|
||||
+ early_vprintk(fmt, ap);
|
||||
+ va_end(ap);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * This is independent of any log levels - a global
|
||||
+ * kill switch that turns off all of printk.
|
||||
+ *
|
||||
+ * Used by the NMI watchdog if early-printk is enabled.
|
||||
+ */
|
||||
+static bool __read_mostly printk_killswitch;
|
||||
+
|
||||
+void printk_kill(void)
|
||||
+{
|
||||
+ printk_killswitch = true;
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_PRINTK
|
||||
+static int forced_early_printk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ if (!printk_killswitch)
|
||||
+ return 0;
|
||||
+ early_vprintk(fmt, ap);
|
||||
+ return 1;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#else
|
||||
+static inline int forced_early_printk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
@@ -1905,6 +1957,13 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
unsigned long flags;
|
||||
u64 curr_log_seq;
|
||||
|
||||
+ /*
|
||||
+ * Fall back to early_printk if a debugging subsystem has
|
||||
+ * killed printk output
|
||||
+ */
|
||||
+ if (unlikely(forced_early_printk(fmt, args)))
|
||||
+ return 1;
|
||||
+
|
||||
if (level == LOGLEVEL_SCHED) {
|
||||
level = LOGLEVEL_DEFAULT;
|
||||
in_sched = true;
|
||||
@@ -2049,26 +2108,6 @@ static bool suppress_message_printing(int level) { return false; }
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
-#ifdef CONFIG_EARLY_PRINTK
|
||||
-struct console *early_console;
|
||||
-
|
||||
-asmlinkage __visible void early_printk(const char *fmt, ...)
|
||||
-{
|
||||
- va_list ap;
|
||||
- char buf[512];
|
||||
- int n;
|
||||
-
|
||||
- if (!early_console)
|
||||
- return;
|
||||
-
|
||||
- va_start(ap, fmt);
|
||||
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
||||
- va_end(ap);
|
||||
-
|
||||
- early_console->write(early_console, buf, n);
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
static int __add_preferred_console(char *name, int idx, char *options,
|
||||
char *brl_options)
|
||||
{
|
||||
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
|
||||
index 71381168dede..685443375dc0 100644
|
||||
--- a/kernel/watchdog_hld.c
|
||||
+++ b/kernel/watchdog_hld.c
|
||||
@@ -24,6 +24,8 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
static DEFINE_PER_CPU(struct perf_event *, dead_event);
|
||||
+static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
|
||||
+
|
||||
static struct cpumask dead_events_mask;
|
||||
|
||||
static unsigned long hardlockup_allcpu_dumped;
|
||||
@@ -134,6 +136,13 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
||||
/* only print hardlockups once */
|
||||
if (__this_cpu_read(hard_watchdog_warn) == true)
|
||||
return;
|
||||
+ /*
|
||||
+ * If early-printk is enabled then make sure we do not
|
||||
+ * lock up in printk() and kill console logging:
|
||||
+ */
|
||||
+ printk_kill();
|
||||
+
|
||||
+ raw_spin_lock(&watchdog_output_lock);
|
||||
|
||||
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
|
||||
print_modules();
|
||||
@@ -151,6 +160,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
||||
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
+ raw_spin_unlock(&watchdog_output_lock);
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(regs, "Hard LOCKUP");
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,38 +0,0 @@
|
||||
From bfe8e0affbc2a1d3f23e9aec43198b31d1115f3d Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 2 Sep 2011 14:41:29 +0200
|
||||
Subject: [PATCH 058/328] printk: Add "force_early_printk" boot param to help
|
||||
with debugging
|
||||
|
||||
Gives me an option to screw printk and actually see what the machine
|
||||
says.
|
||||
|
||||
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Link: http://lkml.kernel.org/r/1314967289.1301.11.camel@twins
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/n/tip-ykb97nsfmobq44xketrxs977@git.kernel.org
|
||||
---
|
||||
kernel/printk/printk.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
|
||||
index 29838e532f46..f934baed564d 100644
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -435,6 +435,13 @@ asmlinkage void early_printk(const char *fmt, ...)
|
||||
*/
|
||||
static bool __read_mostly printk_killswitch;
|
||||
|
||||
+static int __init force_early_printk_setup(char *str)
|
||||
+{
|
||||
+ printk_killswitch = true;
|
||||
+ return 0;
|
||||
+}
|
||||
+early_param("force_early_printk", force_early_printk_setup);
|
||||
+
|
||||
void printk_kill(void)
|
||||
{
|
||||
printk_killswitch = true;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,52 +0,0 @@
|
||||
From f3b0b7d6dafcddd6bed43f78535a5494b0e09aa8 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 24 Jul 2009 12:38:56 +0200
|
||||
Subject: [PATCH 059/328] preempt: Provide preempt_*_(no)rt variants
|
||||
|
||||
RT needs a few preempt_disable/enable points which are not necessary
|
||||
otherwise. Implement variants to avoid #ifdeffery.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/preempt.h | 18 +++++++++++++++++-
|
||||
1 file changed, 17 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
|
||||
index 3196d0e76719..f7a17fcc3fec 100644
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -181,7 +181,11 @@ do { \
|
||||
preempt_count_dec(); \
|
||||
} while (0)
|
||||
|
||||
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
|
||||
+#else
|
||||
+# define preempt_enable_no_resched() preempt_enable()
|
||||
+#endif
|
||||
|
||||
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
|
||||
@@ -298,6 +302,18 @@ do { \
|
||||
set_preempt_need_resched(); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define preempt_disable_rt() preempt_disable()
|
||||
+# define preempt_enable_rt() preempt_enable()
|
||||
+# define preempt_disable_nort() barrier()
|
||||
+# define preempt_enable_nort() barrier()
|
||||
+#else
|
||||
+# define preempt_disable_rt() barrier()
|
||||
+# define preempt_enable_rt() barrier()
|
||||
+# define preempt_disable_nort() preempt_disable()
|
||||
+# define preempt_enable_nort() preempt_enable()
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
|
||||
struct preempt_notifier;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,69 +0,0 @@
|
||||
From a2a505f1e5d127a2c3b1ee184d27d0f402dcdf63 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 8 Mar 2017 14:23:35 +0100
|
||||
Subject: [PATCH 060/328] futex: workaround migrate_disable/enable in different
|
||||
context
|
||||
|
||||
migrate_disable()/migrate_enable() takes a different path in atomic() vs
|
||||
!atomic() context. These little hacks ensure that we don't underflow / overflow
|
||||
the migrate code counts properly while we lock the hb lockwith interrupts
|
||||
enabled and unlock it with interrupts disabled.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 19 +++++++++++++++++++
|
||||
1 file changed, 19 insertions(+)
|
||||
|
||||
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||
index e75ad30aa7bc..5c8053098fc8 100644
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -2879,6 +2879,14 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
* before __rt_mutex_start_proxy_lock() is done.
|
||||
*/
|
||||
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
+ /*
|
||||
+ * the migrate_disable() here disables migration in the in_atomic() fast
|
||||
+ * path which is enabled again in the following spin_unlock(). We have
|
||||
+ * one migrate_disable() pending in the slow-path which is reversed
|
||||
+ * after the raw_spin_unlock_irq() where we leave the atomic context.
|
||||
+ */
|
||||
+ migrate_disable();
|
||||
+
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
|
||||
@@ -2887,6 +2895,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
*/
|
||||
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
|
||||
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
+ migrate_enable();
|
||||
|
||||
if (ret) {
|
||||
if (ret == 1)
|
||||
@@ -3035,11 +3044,21 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
|
||||
* rt_waiter. Also see the WARN in wake_futex_pi().
|
||||
*/
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
+ /*
|
||||
+ * Magic trickery for now to make the RT migrate disable
|
||||
+ * logic happy. The following spin_unlock() happens with
|
||||
+ * interrupts disabled so the internal migrate_enable()
|
||||
+ * won't undo the migrate_disable() which was issued when
|
||||
+ * locking hb->lock.
|
||||
+ */
|
||||
+ migrate_disable();
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
/* drops pi_state->pi_mutex.wait_lock */
|
||||
ret = wake_futex_pi(uaddr, uval, pi_state);
|
||||
|
||||
+ migrate_enable();
|
||||
+
|
||||
put_pi_state(pi_state);
|
||||
|
||||
/*
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,340 +0,0 @@
|
||||
From 4db63a0605ac780bf1525c6a90667aef3f897dc1 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 20 Jun 2011 09:03:47 +0200
|
||||
Subject: [PATCH 061/328] rt: Add local irq locks
|
||||
|
||||
Introduce locallock. For !RT this maps to preempt_disable()/
|
||||
local_irq_disable() so there is not much that changes. For RT this will
|
||||
map to a spinlock. This makes preemption possible and locked "ressource"
|
||||
gets the lockdep anotation it wouldn't have otherwise. The locks are
|
||||
recursive for owner == current. Also, all locks user migrate_disable()
|
||||
which ensures that the task is not migrated to another CPU while the lock
|
||||
is held and the owner is preempted.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/locallock.h | 271 ++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/percpu.h | 29 ++++
|
||||
2 files changed, 300 insertions(+)
|
||||
create mode 100644 include/linux/locallock.h
|
||||
|
||||
diff --git a/include/linux/locallock.h b/include/linux/locallock.h
|
||||
new file mode 100644
|
||||
index 000000000000..d658c2552601
|
||||
--- /dev/null
|
||||
+++ b/include/linux/locallock.h
|
||||
@@ -0,0 +1,271 @@
|
||||
+#ifndef _LINUX_LOCALLOCK_H
|
||||
+#define _LINUX_LOCALLOCK_H
|
||||
+
|
||||
+#include <linux/percpu.h>
|
||||
+#include <linux/spinlock.h>
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
+# define LL_WARN(cond) WARN_ON(cond)
|
||||
+#else
|
||||
+# define LL_WARN(cond) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
+ * per cpu lock based substitute for local_irq_*()
|
||||
+ */
|
||||
+struct local_irq_lock {
|
||||
+ spinlock_t lock;
|
||||
+ struct task_struct *owner;
|
||||
+ int nestcnt;
|
||||
+ unsigned long flags;
|
||||
+};
|
||||
+
|
||||
+#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
|
||||
+ DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
|
||||
+ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
|
||||
+
|
||||
+#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
|
||||
+ DECLARE_PER_CPU(struct local_irq_lock, lvar)
|
||||
+
|
||||
+#define local_irq_lock_init(lvar) \
|
||||
+ do { \
|
||||
+ int __cpu; \
|
||||
+ for_each_possible_cpu(__cpu) \
|
||||
+ spin_lock_init(&per_cpu(lvar, __cpu).lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline void __local_lock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current) {
|
||||
+ spin_lock(&lv->lock);
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ }
|
||||
+ lv->nestcnt++;
|
||||
+}
|
||||
+
|
||||
+#define local_lock(lvar) \
|
||||
+ do { __local_lock(&get_local_var(lvar)); } while (0)
|
||||
+
|
||||
+#define local_lock_on(lvar, cpu) \
|
||||
+ do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline int __local_trylock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current && spin_trylock(&lv->lock)) {
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ lv->nestcnt = 1;
|
||||
+ return 1;
|
||||
+ } else if (lv->owner == current) {
|
||||
+ lv->nestcnt++;
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#define local_trylock(lvar) \
|
||||
+ ({ \
|
||||
+ int __locked; \
|
||||
+ __locked = __local_trylock(&get_local_var(lvar)); \
|
||||
+ if (!__locked) \
|
||||
+ put_local_var(lvar); \
|
||||
+ __locked; \
|
||||
+ })
|
||||
+
|
||||
+static inline void __local_unlock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ LL_WARN(lv->nestcnt == 0);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ if (--lv->nestcnt)
|
||||
+ return;
|
||||
+
|
||||
+ lv->owner = NULL;
|
||||
+ spin_unlock(&lv->lock);
|
||||
+}
|
||||
+
|
||||
+#define local_unlock(lvar) \
|
||||
+ do { \
|
||||
+ __local_unlock(this_cpu_ptr(&lvar)); \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_on(lvar, cpu) \
|
||||
+ do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline void __local_lock_irq(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ spin_lock_irqsave(&lv->lock, lv->flags);
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ lv->nestcnt = 1;
|
||||
+}
|
||||
+
|
||||
+#define local_lock_irq(lvar) \
|
||||
+ do { __local_lock_irq(&get_local_var(lvar)); } while (0)
|
||||
+
|
||||
+#define local_lock_irq_on(lvar, cpu) \
|
||||
+ do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline void __local_unlock_irq(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ LL_WARN(!lv->nestcnt);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ lv->owner = NULL;
|
||||
+ lv->nestcnt = 0;
|
||||
+ spin_unlock_irq(&lv->lock);
|
||||
+}
|
||||
+
|
||||
+#define local_unlock_irq(lvar) \
|
||||
+ do { \
|
||||
+ __local_unlock_irq(this_cpu_ptr(&lvar)); \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_irq_on(lvar, cpu) \
|
||||
+ do { \
|
||||
+ __local_unlock_irq(&per_cpu(lvar, cpu)); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline int __local_lock_irqsave(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current) {
|
||||
+ __local_lock_irq(lv);
|
||||
+ return 0;
|
||||
+ } else {
|
||||
+ lv->nestcnt++;
|
||||
+ return 1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define local_lock_irqsave(lvar, _flags) \
|
||||
+ do { \
|
||||
+ if (__local_lock_irqsave(&get_local_var(lvar))) \
|
||||
+ put_local_var(lvar); \
|
||||
+ _flags = __this_cpu_read(lvar.flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_lock_irqsave_on(lvar, _flags, cpu) \
|
||||
+ do { \
|
||||
+ __local_lock_irqsave(&per_cpu(lvar, cpu)); \
|
||||
+ _flags = per_cpu(lvar, cpu).flags; \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
|
||||
+ unsigned long flags)
|
||||
+{
|
||||
+ LL_WARN(!lv->nestcnt);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ if (--lv->nestcnt)
|
||||
+ return 0;
|
||||
+
|
||||
+ lv->owner = NULL;
|
||||
+ spin_unlock_irqrestore(&lv->lock, lv->flags);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+#define local_unlock_irqrestore(lvar, flags) \
|
||||
+ do { \
|
||||
+ if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_irqrestore_on(lvar, flags, cpu) \
|
||||
+ do { \
|
||||
+ __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_trylock_irq(lvar, lock) \
|
||||
+ ({ \
|
||||
+ int __locked; \
|
||||
+ local_lock_irq(lvar); \
|
||||
+ __locked = spin_trylock(lock); \
|
||||
+ if (!__locked) \
|
||||
+ local_unlock_irq(lvar); \
|
||||
+ __locked; \
|
||||
+ })
|
||||
+
|
||||
+#define local_spin_lock_irq(lvar, lock) \
|
||||
+ do { \
|
||||
+ local_lock_irq(lvar); \
|
||||
+ spin_lock(lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_unlock_irq(lvar, lock) \
|
||||
+ do { \
|
||||
+ spin_unlock(lock); \
|
||||
+ local_unlock_irq(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_lock_irqsave(lvar, lock, flags) \
|
||||
+ do { \
|
||||
+ local_lock_irqsave(lvar, flags); \
|
||||
+ spin_lock(lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
|
||||
+ do { \
|
||||
+ spin_unlock(lock); \
|
||||
+ local_unlock_irqrestore(lvar, flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define get_locked_var(lvar, var) \
|
||||
+ (*({ \
|
||||
+ local_lock(lvar); \
|
||||
+ this_cpu_ptr(&var); \
|
||||
+ }))
|
||||
+
|
||||
+#define put_locked_var(lvar, var) local_unlock(lvar);
|
||||
+
|
||||
+#define local_lock_cpu(lvar) \
|
||||
+ ({ \
|
||||
+ local_lock(lvar); \
|
||||
+ smp_processor_id(); \
|
||||
+ })
|
||||
+
|
||||
+#define local_unlock_cpu(lvar) local_unlock(lvar)
|
||||
+
|
||||
+#else /* PREEMPT_RT_BASE */
|
||||
+
|
||||
+#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
|
||||
+#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
|
||||
+
|
||||
+static inline void local_irq_lock_init(int lvar) { }
|
||||
+
|
||||
+#define local_trylock(lvar) \
|
||||
+ ({ \
|
||||
+ preempt_disable(); \
|
||||
+ 1; \
|
||||
+ })
|
||||
+
|
||||
+#define local_lock(lvar) preempt_disable()
|
||||
+#define local_unlock(lvar) preempt_enable()
|
||||
+#define local_lock_irq(lvar) local_irq_disable()
|
||||
+#define local_lock_irq_on(lvar, cpu) local_irq_disable()
|
||||
+#define local_unlock_irq(lvar) local_irq_enable()
|
||||
+#define local_unlock_irq_on(lvar, cpu) local_irq_enable()
|
||||
+#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
|
||||
+#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
|
||||
+
|
||||
+#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
|
||||
+#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
|
||||
+#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
|
||||
+#define local_spin_lock_irqsave(lvar, lock, flags) \
|
||||
+ spin_lock_irqsave(lock, flags)
|
||||
+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
|
||||
+ spin_unlock_irqrestore(lock, flags)
|
||||
+
|
||||
+#define get_locked_var(lvar, var) get_cpu_var(var)
|
||||
+#define put_locked_var(lvar, var) put_cpu_var(var)
|
||||
+
|
||||
+#define local_lock_cpu(lvar) get_cpu()
|
||||
+#define local_unlock_cpu(lvar) put_cpu()
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
|
||||
index 70b7123f38c7..24421bf8c4b3 100644
|
||||
--- a/include/linux/percpu.h
|
||||
+++ b/include/linux/percpu.h
|
||||
@@ -19,6 +19,35 @@
|
||||
#define PERCPU_MODULE_RESERVE 0
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+#define get_local_var(var) (*({ \
|
||||
+ migrate_disable(); \
|
||||
+ this_cpu_ptr(&var); }))
|
||||
+
|
||||
+#define put_local_var(var) do { \
|
||||
+ (void)&(var); \
|
||||
+ migrate_enable(); \
|
||||
+} while (0)
|
||||
+
|
||||
+# define get_local_ptr(var) ({ \
|
||||
+ migrate_disable(); \
|
||||
+ this_cpu_ptr(var); })
|
||||
+
|
||||
+# define put_local_ptr(var) do { \
|
||||
+ (void)(var); \
|
||||
+ migrate_enable(); \
|
||||
+} while (0)
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#define get_local_var(var) get_cpu_var(var)
|
||||
+#define put_local_var(var) put_cpu_var(var)
|
||||
+#define get_local_ptr(var) get_cpu_ptr(var)
|
||||
+#define put_local_ptr(var) put_cpu_ptr(var)
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
/* minimum unit size, also is the maximum supported allocation size */
|
||||
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,48 +0,0 @@
|
||||
From 0e057064ed654b5f7fa22cc5f159ed67eeb332dc Mon Sep 17 00:00:00 2001
|
||||
From: Julia Cartwright <julia@ni.com>
|
||||
Date: Mon, 7 May 2018 08:58:56 -0500
|
||||
Subject: [PATCH 062/328] locallock: provide {get,put}_locked_ptr() variants
|
||||
|
||||
Provide a set of locallocked accessors for pointers to per-CPU data;
|
||||
this is useful for dynamically-allocated per-CPU regions, for example.
|
||||
|
||||
These are symmetric with the {get,put}_cpu_ptr() per-CPU accessor
|
||||
variants.
|
||||
|
||||
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/locallock.h | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/include/linux/locallock.h b/include/linux/locallock.h
|
||||
index d658c2552601..921eab83cd34 100644
|
||||
--- a/include/linux/locallock.h
|
||||
+++ b/include/linux/locallock.h
|
||||
@@ -222,6 +222,14 @@ static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
|
||||
|
||||
#define put_locked_var(lvar, var) local_unlock(lvar);
|
||||
|
||||
+#define get_locked_ptr(lvar, var) \
|
||||
+ ({ \
|
||||
+ local_lock(lvar); \
|
||||
+ this_cpu_ptr(var); \
|
||||
+ })
|
||||
+
|
||||
+#define put_locked_ptr(lvar, var) local_unlock(lvar);
|
||||
+
|
||||
#define local_lock_cpu(lvar) \
|
||||
({ \
|
||||
local_lock(lvar); \
|
||||
@@ -262,6 +270,8 @@ static inline void local_irq_lock_init(int lvar) { }
|
||||
|
||||
#define get_locked_var(lvar, var) get_cpu_var(var)
|
||||
#define put_locked_var(lvar, var) put_cpu_var(var)
|
||||
+#define get_locked_ptr(lvar, var) get_cpu_ptr(var)
|
||||
+#define put_locked_ptr(lvar, var) put_cpu_ptr(var)
|
||||
|
||||
#define local_lock_cpu(lvar) get_cpu()
|
||||
#define local_unlock_cpu(lvar) put_cpu()
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,29 +0,0 @@
|
||||
From a132d9a98679bcc505c36c80270ddaa741c15cbc Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:34 -0500
|
||||
Subject: [PATCH 063/328] mm/scatterlist: Do not disable irqs on RT
|
||||
|
||||
For -RT it is enough to keep pagefault disabled (which is currently handled by
|
||||
kmap_atomic()).
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
lib/scatterlist.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
|
||||
index 60e7eca2f4be..aad8b9ecd496 100644
|
||||
--- a/lib/scatterlist.c
|
||||
+++ b/lib/scatterlist.c
|
||||
@@ -777,7 +777,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
|
||||
flush_kernel_dcache_page(miter->page);
|
||||
|
||||
if (miter->__flags & SG_MITER_ATOMIC) {
|
||||
- WARN_ON_ONCE(preemptible());
|
||||
+ WARN_ON_ONCE(!pagefault_disabled());
|
||||
kunmap_atomic(miter->addr);
|
||||
} else
|
||||
kunmap(miter->page);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,153 +0,0 @@
|
||||
From f95acea987d23816f8094d7db13ae2afb94136ce Mon Sep 17 00:00:00 2001
|
||||
From: Oleg Nesterov <oleg@redhat.com>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: [PATCH 064/328] signal/x86: Delay calling signals in atomic
|
||||
|
||||
On x86_64 we must disable preemption before we enable interrupts
|
||||
for stack faults, int3 and debugging, because the current task is using
|
||||
a per CPU debug stack defined by the IST. If we schedule out, another task
|
||||
can come in and use the same stack and cause the stack to be corrupted
|
||||
and crash the kernel on return.
|
||||
|
||||
When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
|
||||
one of these is the spin lock used in signal handling.
|
||||
|
||||
Some of the debug code (int3) causes do_trap() to send a signal.
|
||||
This function calls a spin lock that has been converted to a mutex
|
||||
and has the possibility to sleep. If this happens, the above issues with
|
||||
the corrupted stack is possible.
|
||||
|
||||
Instead of calling the signal right away, for PREEMPT_RT and x86_64,
|
||||
the signal information is stored on the stacks task_struct and
|
||||
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
|
||||
code will send the signal when preemption is enabled.
|
||||
|
||||
[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
|
||||
ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
|
||||
|
||||
|
||||
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/entry/common.c | 7 +++++++
|
||||
arch/x86/include/asm/signal.h | 13 ++++++++++++
|
||||
include/linux/sched.h | 4 ++++
|
||||
kernel/signal.c | 37 +++++++++++++++++++++++++++++++++--
|
||||
4 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
|
||||
index 8353348ddeaf..91676b0d2d4c 100644
|
||||
--- a/arch/x86/entry/common.c
|
||||
+++ b/arch/x86/entry/common.c
|
||||
@@ -152,6 +152,13 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
||||
if (cached_flags & _TIF_NEED_RESCHED)
|
||||
schedule();
|
||||
|
||||
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+ if (unlikely(current->forced_info.si_signo)) {
|
||||
+ struct task_struct *t = current;
|
||||
+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
|
||||
+ t->forced_info.si_signo = 0;
|
||||
+ }
|
||||
+#endif
|
||||
if (cached_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
|
||||
index 33d3c88a7225..fb0438d06ca7 100644
|
||||
--- a/arch/x86/include/asm/signal.h
|
||||
+++ b/arch/x86/include/asm/signal.h
|
||||
@@ -28,6 +28,19 @@ typedef struct {
|
||||
#define SA_IA32_ABI 0x02000000u
|
||||
#define SA_X32_ABI 0x01000000u
|
||||
|
||||
+/*
|
||||
+ * Because some traps use the IST stack, we must keep preemption
|
||||
+ * disabled while calling do_trap(), but do_trap() may call
|
||||
+ * force_sig_info() which will grab the signal spin_locks for the
|
||||
+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
|
||||
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
|
||||
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
|
||||
+ * trap.
|
||||
+ */
|
||||
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
|
||||
+#define ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+#endif
|
||||
+
|
||||
#ifndef CONFIG_COMPAT
|
||||
typedef sigset_t compat_sigset_t;
|
||||
#endif
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index 0489d3e0e78c..e4af260f81c5 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -881,6 +881,10 @@ struct task_struct {
|
||||
/* Restored if set_restore_sigmask() was used: */
|
||||
sigset_t saved_sigmask;
|
||||
struct sigpending pending;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /* TODO: move me into ->restart_block ? */
|
||||
+ struct siginfo forced_info;
|
||||
+#endif
|
||||
unsigned long sas_ss_sp;
|
||||
size_t sas_ss_size;
|
||||
unsigned int sas_ss_flags;
|
||||
diff --git a/kernel/signal.c b/kernel/signal.c
|
||||
index 5e278f1540ad..d5e764bb2444 100644
|
||||
--- a/kernel/signal.c
|
||||
+++ b/kernel/signal.c
|
||||
@@ -1277,8 +1277,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
|
||||
* We don't want to have recursive SIGSEGV's etc, for example,
|
||||
* that is why we also clear SIGNAL_UNKILLABLE.
|
||||
*/
|
||||
-int
|
||||
-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
+static int
|
||||
+do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
{
|
||||
unsigned long int flags;
|
||||
int ret, blocked, ignored;
|
||||
@@ -1307,6 +1307,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
+{
|
||||
+/*
|
||||
+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
|
||||
+ * since it can not enable preemption, and the signal code's spin_locks
|
||||
+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
|
||||
+ * send the signal on exit of the trap.
|
||||
+ */
|
||||
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+ if (in_atomic()) {
|
||||
+ if (WARN_ON_ONCE(t != current))
|
||||
+ return 0;
|
||||
+ if (WARN_ON_ONCE(t->forced_info.si_signo))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (is_si_special(info)) {
|
||||
+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
|
||||
+ t->forced_info.si_signo = sig;
|
||||
+ t->forced_info.si_errno = 0;
|
||||
+ t->forced_info.si_code = SI_KERNEL;
|
||||
+ t->forced_info.si_pid = 0;
|
||||
+ t->forced_info.si_uid = 0;
|
||||
+ } else {
|
||||
+ t->forced_info = *info;
|
||||
+ }
|
||||
+
|
||||
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
|
||||
+ return 0;
|
||||
+ }
|
||||
+#endif
|
||||
+ return do_force_sig_info(sig, info, t);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Nuke all other threads in the group.
|
||||
*/
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,48 +0,0 @@
|
||||
From 44575d6c7e6fb548a6bf67f427d151301cd1dfd8 Mon Sep 17 00:00:00 2001
|
||||
From: Yang Shi <yang.shi@linaro.org>
|
||||
Date: Thu, 10 Dec 2015 10:58:51 -0800
|
||||
Subject: [PATCH 065/328] x86/signal: delay calling signals on 32bit
|
||||
|
||||
When running some ptrace single step tests on x86-32 machine, the below problem
|
||||
is triggered:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 1041, name: dummy2
|
||||
Preemption disabled at:[<c100326f>] do_debug+0x1f/0x1a0
|
||||
|
||||
CPU: 10 PID: 1041 Comm: dummy2 Tainted: G W 4.1.13-rt13 #1
|
||||
Call Trace:
|
||||
[<c1aa8306>] dump_stack+0x46/0x5c
|
||||
[<c1080517>] ___might_sleep+0x137/0x220
|
||||
[<c1ab0eff>] rt_spin_lock+0x1f/0x80
|
||||
[<c1064b5a>] do_force_sig_info+0x2a/0xc0
|
||||
[<c106567d>] force_sig_info+0xd/0x10
|
||||
[<c1010cff>] send_sigtrap+0x6f/0x80
|
||||
[<c10033b1>] do_debug+0x161/0x1a0
|
||||
[<c1ab2921>] debug_stack_correct+0x2e/0x35
|
||||
|
||||
This happens since 959274753857 ("x86, traps: Track entry into and exit
|
||||
from IST context") which was merged in v4.1-rc1.
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/include/asm/signal.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
|
||||
index fb0438d06ca7..c00e27af2205 100644
|
||||
--- a/arch/x86/include/asm/signal.h
|
||||
+++ b/arch/x86/include/asm/signal.h
|
||||
@@ -37,7 +37,7 @@ typedef struct {
|
||||
* TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
|
||||
* trap.
|
||||
*/
|
||||
-#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
|
||||
+#if defined(CONFIG_PREEMPT_RT_FULL)
|
||||
#define ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
#endif
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,196 +0,0 @@
|
||||
From 6b9121d4d6cf25eabc1b638027345308486f88b1 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 09:18:52 +0100
|
||||
Subject: [PATCH 066/328] buffer_head: Replace bh_uptodate_lock for -rt
|
||||
|
||||
Wrap the bit_spin_lock calls into a separate inline and add the RT
|
||||
replacements with a real spinlock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/buffer.c | 21 +++++++--------------
|
||||
fs/ext4/page-io.c | 6 ++----
|
||||
fs/ntfs/aops.c | 10 +++-------
|
||||
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 46 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/fs/buffer.c b/fs/buffer.c
|
||||
index a550e0d8e965..a5b3a456dbff 100644
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -274,8 +274,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
* decide that the page is now completely done.
|
||||
*/
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -288,8 +287,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
|
||||
/*
|
||||
* If none of the buffers had errors and they are all
|
||||
@@ -301,9 +299,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -330,8 +326,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
|
||||
}
|
||||
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
@@ -343,15 +338,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
}
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
end_page_writeback(page);
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(end_buffer_async_write);
|
||||
|
||||
@@ -3368,6 +3360,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
|
||||
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
|
||||
if (ret) {
|
||||
INIT_LIST_HEAD(&ret->b_assoc_buffers);
|
||||
+ buffer_head_init_locks(ret);
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bh_accounting.nr);
|
||||
recalc_bh_state();
|
||||
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
|
||||
index 9cc79b7b0df1..3f4ba2011499 100644
|
||||
--- a/fs/ext4/page-io.c
|
||||
+++ b/fs/ext4/page-io.c
|
||||
@@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio *bio)
|
||||
* We check all buffers in the page under BH_Uptodate_Lock
|
||||
* to avoid races with other end io clearing async_write flags
|
||||
*/
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(head);
|
||||
do {
|
||||
if (bh_offset(bh) < bio_start ||
|
||||
bh_offset(bh) + bh->b_size > bio_end) {
|
||||
@@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio *bio)
|
||||
if (bio->bi_status)
|
||||
buffer_io_error(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(head, flags);
|
||||
if (!under_io) {
|
||||
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
||||
if (data_page)
|
||||
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
|
||||
index 8946130c87ad..71d0b3ba70f8 100644
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -106,8 +106,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
"0x%llx.", (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -122,8 +121,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
/*
|
||||
* If none of the buffers had errors then we can set the page uptodate,
|
||||
* but we first have to perform the post read mst fixups, if the
|
||||
@@ -156,9 +154,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
unlock_page(page);
|
||||
return;
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
|
||||
index 96225a77c112..8a1bcfb145d7 100644
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -76,8 +76,42 @@ struct buffer_head {
|
||||
struct address_space *b_assoc_map; /* mapping this buffer is
|
||||
associated with */
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spinlock_t b_uptodate_lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_irq_save(flags);
|
||||
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+ return flags;
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
|
||||
+{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+ local_irq_restore(flags);
|
||||
+#else
|
||||
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline void buffer_head_init_locks(struct buffer_head *bh)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
|
||||
* and buffer_foo() functions.
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,109 +0,0 @@
|
||||
From 72ca6594764d9a6523352dc609644bea68a3a74b Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 10:11:25 +0100
|
||||
Subject: [PATCH 067/328] fs: jbd/jbd2: Make state lock and journal head lock
|
||||
rt safe
|
||||
|
||||
bit_spin_locks break under RT.
|
||||
|
||||
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
--
|
||||
include/linux/buffer_head.h | 8 ++++++++
|
||||
include/linux/jbd2.h | 24 ++++++++++++++++++++++++
|
||||
2 files changed, 32 insertions(+)
|
||||
---
|
||||
include/linux/buffer_head.h | 8 ++++++++
|
||||
include/linux/jbd2.h | 24 ++++++++++++++++++++++++
|
||||
2 files changed, 32 insertions(+)
|
||||
|
||||
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
|
||||
index 8a1bcfb145d7..5869330d1f38 100644
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -78,6 +78,10 @@ struct buffer_head {
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spinlock_t b_uptodate_lock;
|
||||
+#if IS_ENABLED(CONFIG_JBD2)
|
||||
+ spinlock_t b_state_lock;
|
||||
+ spinlock_t b_journal_head_lock;
|
||||
+#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -109,6 +113,10 @@ static inline void buffer_head_init_locks(struct buffer_head *bh)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#if IS_ENABLED(CONFIG_JBD2)
|
||||
+ spin_lock_init(&bh->b_state_lock);
|
||||
+ spin_lock_init(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
|
||||
index 268f3000d1b3..8f5d6ecb802e 100644
|
||||
--- a/include/linux/jbd2.h
|
||||
+++ b/include/linux/jbd2.h
|
||||
@@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
|
||||
|
||||
static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_trylock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_is_locked(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#define J_ASSERT(assert) BUG_ON(!(assert))
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,120 +0,0 @@
|
||||
From eb37c3d2df6895d5c86504fdb1a509d075414f52 Mon Sep 17 00:00:00 2001
|
||||
From: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Date: Fri, 21 Jun 2013 15:07:25 -0400
|
||||
Subject: [PATCH 068/328] list_bl: Make list head locking RT safe
|
||||
|
||||
As per changes in include/linux/jbd_common.h for avoiding the
|
||||
bit_spin_locks on RT ("fs: jbd/jbd2: Make state lock and journal
|
||||
head lock rt safe") we do the same thing here.
|
||||
|
||||
We use the non atomic __set_bit and __clear_bit inside the scope of
|
||||
the lock to preserve the ability of the existing LIST_DEBUG code to
|
||||
use the zero'th bit in the sanity checks.
|
||||
|
||||
As a bit spinlock, we had no lockdep visibility into the usage
|
||||
of the list head locking. Now, if we were to implement it as a
|
||||
standard non-raw spinlock, we would see:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 122, name: udevd
|
||||
5 locks held by udevd/122:
|
||||
#0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [<ffffffff811967e8>] lock_rename+0xe8/0xf0
|
||||
#1: (rename_lock){+.+...}, at: [<ffffffff811a277c>] d_move+0x2c/0x60
|
||||
#2: (&dentry->d_lock){+.+...}, at: [<ffffffff811a0763>] dentry_lock_for_move+0xf3/0x130
|
||||
#3: (&dentry->d_lock/2){+.+...}, at: [<ffffffff811a0734>] dentry_lock_for_move+0xc4/0x130
|
||||
#4: (&dentry->d_lock/3){+.+...}, at: [<ffffffff811a0747>] dentry_lock_for_move+0xd7/0x130
|
||||
Pid: 122, comm: udevd Not tainted 3.4.47-rt62 #7
|
||||
Call Trace:
|
||||
[<ffffffff810b9624>] __might_sleep+0x134/0x1f0
|
||||
[<ffffffff817a24d4>] rt_spin_lock+0x24/0x60
|
||||
[<ffffffff811a0c4c>] __d_shrink+0x5c/0xa0
|
||||
[<ffffffff811a1b2d>] __d_drop+0x1d/0x40
|
||||
[<ffffffff811a24be>] __d_move+0x8e/0x320
|
||||
[<ffffffff811a278e>] d_move+0x3e/0x60
|
||||
[<ffffffff81199598>] vfs_rename+0x198/0x4c0
|
||||
[<ffffffff8119b093>] sys_renameat+0x213/0x240
|
||||
[<ffffffff817a2de5>] ? _raw_spin_unlock+0x35/0x60
|
||||
[<ffffffff8107781c>] ? do_page_fault+0x1ec/0x4b0
|
||||
[<ffffffff817a32ca>] ? retint_swapgs+0xe/0x13
|
||||
[<ffffffff813eb0e6>] ? trace_hardirqs_on_thunk+0x3a/0x3f
|
||||
[<ffffffff8119b0db>] sys_rename+0x1b/0x20
|
||||
[<ffffffff817a3b96>] system_call_fastpath+0x1a/0x1f
|
||||
|
||||
Since we are only taking the lock during short lived list operations,
|
||||
lets assume for now that it being raw won't be a significant latency
|
||||
concern.
|
||||
|
||||
|
||||
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/list_bl.h | 28 ++++++++++++++++++++++++++--
|
||||
1 file changed, 26 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
|
||||
index 3fc2cc57ba1b..69b659259bac 100644
|
||||
--- a/include/linux/list_bl.h
|
||||
+++ b/include/linux/list_bl.h
|
||||
@@ -3,6 +3,7 @@
|
||||
#define _LINUX_LIST_BL_H
|
||||
|
||||
#include <linux/list.h>
|
||||
+#include <linux/spinlock.h>
|
||||
#include <linux/bit_spinlock.h>
|
||||
|
||||
/*
|
||||
@@ -33,13 +34,22 @@
|
||||
|
||||
struct hlist_bl_head {
|
||||
struct hlist_bl_node *first;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ raw_spinlock_t lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
struct hlist_bl_node {
|
||||
struct hlist_bl_node *next, **pprev;
|
||||
};
|
||||
-#define INIT_HLIST_BL_HEAD(ptr) \
|
||||
- ((ptr)->first = NULL)
|
||||
+
|
||||
+static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
+{
|
||||
+ h->first = NULL;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ raw_spin_lock_init(&h->lock);
|
||||
+#endif
|
||||
+}
|
||||
|
||||
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
|
||||
{
|
||||
@@ -119,12 +129,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
|
||||
|
||||
static inline void hlist_bl_lock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+ raw_spin_lock(&b->lock);
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __set_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void hlist_bl_unlock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__bit_spin_unlock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __clear_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+ raw_spin_unlock(&b->lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,103 +0,0 @@
|
||||
From a294373c35c31ae762358146f49c3c48f1429526 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Cartwright <joshc@ni.com>
|
||||
Date: Thu, 31 Mar 2016 00:04:25 -0500
|
||||
Subject: [PATCH 069/328] list_bl: fixup bogus lockdep warning
|
||||
|
||||
At first glance, the use of 'static inline' seems appropriate for
|
||||
INIT_HLIST_BL_HEAD().
|
||||
|
||||
However, when a 'static inline' function invocation is inlined by gcc,
|
||||
all callers share any static local data declared within that inline
|
||||
function.
|
||||
|
||||
This presents a problem for how lockdep classes are setup. raw_spinlocks, for
|
||||
example, when CONFIG_DEBUG_SPINLOCK,
|
||||
|
||||
# define raw_spin_lock_init(lock) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
\
|
||||
__raw_spin_lock_init((lock), #lock, &__key); \
|
||||
} while (0)
|
||||
|
||||
When this macro is expanded into a 'static inline' caller, like
|
||||
INIT_HLIST_BL_HEAD():
|
||||
|
||||
static inline INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
{
|
||||
h->first = NULL;
|
||||
raw_spin_lock_init(&h->lock);
|
||||
}
|
||||
|
||||
...the static local lock_class_key object is made a function static.
|
||||
|
||||
For compilation units which initialize invoke INIT_HLIST_BL_HEAD() more
|
||||
than once, then, all of the invocations share this same static local
|
||||
object.
|
||||
|
||||
This can lead to some very confusing lockdep splats (example below).
|
||||
Solve this problem by forcing the INIT_HLIST_BL_HEAD() to be a macro,
|
||||
which prevents the lockdep class object sharing.
|
||||
|
||||
=============================================
|
||||
[ INFO: possible recursive locking detected ]
|
||||
4.4.4-rt11 #4 Not tainted
|
||||
---------------------------------------------
|
||||
kswapd0/59 is trying to acquire lock:
|
||||
(&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
but task is already holding lock:
|
||||
(&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
other info that might help us debug this:
|
||||
Possible unsafe locking scenario:
|
||||
|
||||
CPU0
|
||||
----
|
||||
lock(&h->lock#2);
|
||||
lock(&h->lock#2);
|
||||
|
||||
*** DEADLOCK ***
|
||||
|
||||
May be due to missing lock nesting notation
|
||||
|
||||
2 locks held by kswapd0/59:
|
||||
#0: (shrinker_rwsem){+.+...}, at: rt_down_read_trylock
|
||||
#1: (&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
Tested-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
Signed-off-by: Josh Cartwright <joshc@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/list_bl.h | 12 +++++++-----
|
||||
1 file changed, 7 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
|
||||
index 69b659259bac..0b5de7d9ffcf 100644
|
||||
--- a/include/linux/list_bl.h
|
||||
+++ b/include/linux/list_bl.h
|
||||
@@ -43,13 +43,15 @@ struct hlist_bl_node {
|
||||
struct hlist_bl_node *next, **pprev;
|
||||
};
|
||||
|
||||
-static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
-{
|
||||
- h->first = NULL;
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
- raw_spin_lock_init(&h->lock);
|
||||
+#define INIT_HLIST_BL_HEAD(h) \
|
||||
+do { \
|
||||
+ (h)->first = NULL; \
|
||||
+ raw_spin_lock_init(&(h)->lock); \
|
||||
+} while (0)
|
||||
+#else
|
||||
+#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
|
||||
#endif
|
||||
-}
|
||||
|
||||
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
|
||||
{
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 974bfebe6d809861b9a25af561668633ef3168a7 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:57 -0500
|
||||
Subject: [PATCH 070/328] genirq: Disable irqpoll on -rt
|
||||
|
||||
Creates long latencies for no value
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/irq/spurious.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
|
||||
index d867d6ddafdd..cd12ee86c01e 100644
|
||||
--- a/kernel/irq/spurious.c
|
||||
+++ b/kernel/irq/spurious.c
|
||||
@@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
@@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 056e2667682c7afa14f71cb6d97ccf2217c0c08b Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 3 Apr 2011 11:57:29 +0200
|
||||
Subject: [PATCH 071/328] genirq: Force interrupt thread on RT
|
||||
|
||||
Force threaded_irqs and optimize the code (force_irqthreads) in regard
|
||||
to this.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 4 ++++
|
||||
kernel/irq/manage.c | 2 ++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
|
||||
index eeceac3376fc..315f852b4981 100644
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -427,7 +427,11 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
|
||||
bool state);
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define force_irqthreads (true)
|
||||
+# else
|
||||
extern bool force_irqthreads;
|
||||
+# endif
|
||||
#else
|
||||
#define force_irqthreads (0)
|
||||
#endif
|
||||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
|
||||
index 23bcfa71077f..3c26d0708709 100644
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__read_mostly bool force_irqthreads;
|
||||
EXPORT_SYMBOL_GPL(force_irqthreads);
|
||||
|
||||
@@ -33,6 +34,7 @@ static int __init setup_forced_irqthreads(char *arg)
|
||||
return 0;
|
||||
}
|
||||
early_param("threadirqs", setup_forced_irqthreads);
|
||||
+# endif
|
||||
#endif
|
||||
|
||||
static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,172 +0,0 @@
|
||||
From 4c6015fdf87fb7f0f38ce92c85d5630d79c6ae23 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 28 May 2018 15:24:20 +0200
|
||||
Subject: [PATCH 072/328] Split IRQ-off and zone->lock while freeing pages from
|
||||
PCP list #1
|
||||
|
||||
Split the IRQ-off section while accessing the PCP list from zone->lock
|
||||
while freeing pages.
|
||||
Introcude isolate_pcp_pages() which separates the pages from the PCP
|
||||
list onto a temporary list and then free the temporary list via
|
||||
free_pcppages_bulk().
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 82 +++++++++++++++++++++++++++++++------------------
|
||||
1 file changed, 52 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index e5c610d711f3..0cfcd42517a4 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -1095,7 +1095,7 @@ static inline void prefetch_buddy(struct page *page)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Frees a number of pages from the PCP lists
|
||||
+ * Frees a number of pages which have been collected from the pcp lists.
|
||||
* Assumes all pages on list are in same zone, and of same order.
|
||||
* count is the number of pages to free.
|
||||
*
|
||||
@@ -1106,14 +1106,41 @@ static inline void prefetch_buddy(struct page *page)
|
||||
* pinned" detection logic.
|
||||
*/
|
||||
static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
- struct per_cpu_pages *pcp)
|
||||
+ struct list_head *head)
|
||||
+{
|
||||
+ bool isolated_pageblocks;
|
||||
+ struct page *page, *tmp;
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ spin_lock_irqsave(&zone->lock, flags);
|
||||
+ isolated_pageblocks = has_isolate_pageblock(zone);
|
||||
+
|
||||
+ /*
|
||||
+ * Use safe version since after __free_one_page(),
|
||||
+ * page->lru.next will not point to original list.
|
||||
+ */
|
||||
+ list_for_each_entry_safe(page, tmp, head, lru) {
|
||||
+ int mt = get_pcppage_migratetype(page);
|
||||
+ /* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
+ /* Pageblock could have been isolated meanwhile */
|
||||
+ if (unlikely(isolated_pageblocks))
|
||||
+ mt = get_pageblock_migratetype(page);
|
||||
+
|
||||
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
+ trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
+ }
|
||||
+ spin_unlock_irqrestore(&zone->lock, flags);
|
||||
+}
|
||||
+
|
||||
+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
|
||||
+ struct list_head *dst)
|
||||
+
|
||||
{
|
||||
int migratetype = 0;
|
||||
int batch_free = 0;
|
||||
int prefetch_nr = 0;
|
||||
- bool isolated_pageblocks;
|
||||
- struct page *page, *tmp;
|
||||
- LIST_HEAD(head);
|
||||
+ struct page *page;
|
||||
|
||||
while (count) {
|
||||
struct list_head *list;
|
||||
@@ -1145,7 +1172,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
if (bulkfree_pcp_prepare(page))
|
||||
continue;
|
||||
|
||||
- list_add_tail(&page->lru, &head);
|
||||
+ list_add_tail(&page->lru, dst);
|
||||
|
||||
/*
|
||||
* We are going to put the page back to the global
|
||||
@@ -1160,26 +1187,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
prefetch_buddy(page);
|
||||
} while (--count && --batch_free && !list_empty(list));
|
||||
}
|
||||
-
|
||||
- spin_lock(&zone->lock);
|
||||
- isolated_pageblocks = has_isolate_pageblock(zone);
|
||||
-
|
||||
- /*
|
||||
- * Use safe version since after __free_one_page(),
|
||||
- * page->lru.next will not point to original list.
|
||||
- */
|
||||
- list_for_each_entry_safe(page, tmp, &head, lru) {
|
||||
- int mt = get_pcppage_migratetype(page);
|
||||
- /* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
- /* Pageblock could have been isolated meanwhile */
|
||||
- if (unlikely(isolated_pageblocks))
|
||||
- mt = get_pageblock_migratetype(page);
|
||||
-
|
||||
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
- trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
- }
|
||||
- spin_unlock(&zone->lock);
|
||||
}
|
||||
|
||||
static void free_one_page(struct zone *zone,
|
||||
@@ -2544,13 +2551,18 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int to_drain, batch;
|
||||
+ LIST_HEAD(dst);
|
||||
|
||||
local_irq_save(flags);
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0)
|
||||
- free_pcppages_bulk(zone, to_drain, pcp);
|
||||
+ isolate_pcp_pages(to_drain, pcp, &dst);
|
||||
+
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ if (to_drain > 0)
|
||||
+ free_pcppages_bulk(zone, to_drain, &dst);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2566,14 +2578,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
unsigned long flags;
|
||||
struct per_cpu_pageset *pset;
|
||||
struct per_cpu_pages *pcp;
|
||||
+ LIST_HEAD(dst);
|
||||
+ int count;
|
||||
|
||||
local_irq_save(flags);
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
|
||||
pcp = &pset->pcp;
|
||||
- if (pcp->count)
|
||||
- free_pcppages_bulk(zone, pcp->count, pcp);
|
||||
+ count = pcp->count;
|
||||
+ if (count)
|
||||
+ isolate_pcp_pages(count, pcp, &dst);
|
||||
+
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ if (count)
|
||||
+ free_pcppages_bulk(zone, count, &dst);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2795,7 +2814,10 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
|
||||
pcp->count++;
|
||||
if (pcp->count >= pcp->high) {
|
||||
unsigned long batch = READ_ONCE(pcp->batch);
|
||||
- free_pcppages_bulk(zone, batch, pcp);
|
||||
+ LIST_HEAD(dst);
|
||||
+
|
||||
+ isolate_pcp_pages(batch, pcp, &dst);
|
||||
+ free_pcppages_bulk(zone, batch, &dst);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,171 +0,0 @@
|
||||
From dba8e6d7ab200ab5fe544af8c6093bcb3d215320 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 28 May 2018 15:24:21 +0200
|
||||
Subject: [PATCH 073/328] Split IRQ-off and zone->lock while freeing pages from
|
||||
PCP list #2
|
||||
|
||||
Split the IRQ-off section while accessing the PCP list from zone->lock
|
||||
while freeing pages.
|
||||
Introcude isolate_pcp_pages() which separates the pages from the PCP
|
||||
list onto a temporary list and then free the temporary list via
|
||||
free_pcppages_bulk().
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 50 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 0cfcd42517a4..9a4d150ea5b7 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -1105,8 +1105,8 @@ static inline void prefetch_buddy(struct page *page)
|
||||
* And clear the zone's pages_scanned counter, to hold off the "all pages are
|
||||
* pinned" detection logic.
|
||||
*/
|
||||
-static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
- struct list_head *head)
|
||||
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
|
||||
+ bool zone_retry)
|
||||
{
|
||||
bool isolated_pageblocks;
|
||||
struct page *page, *tmp;
|
||||
@@ -1121,12 +1121,27 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
*/
|
||||
list_for_each_entry_safe(page, tmp, head, lru) {
|
||||
int mt = get_pcppage_migratetype(page);
|
||||
+
|
||||
+ if (page_zone(page) != zone) {
|
||||
+ /*
|
||||
+ * free_unref_page_list() sorts pages by zone. If we end
|
||||
+ * up with pages from a different NUMA nodes belonging
|
||||
+ * to the same ZONE index then we need to redo with the
|
||||
+ * correct ZONE pointer. Skip the page for now, redo it
|
||||
+ * on the next iteration.
|
||||
+ */
|
||||
+ WARN_ON_ONCE(zone_retry == false);
|
||||
+ if (zone_retry)
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
/* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
/* Pageblock could have been isolated meanwhile */
|
||||
if (unlikely(isolated_pageblocks))
|
||||
mt = get_pageblock_migratetype(page);
|
||||
|
||||
+ list_del(&page->lru);
|
||||
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
}
|
||||
@@ -2562,7 +2577,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (to_drain > 0)
|
||||
- free_pcppages_bulk(zone, to_drain, &dst);
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2592,7 +2607,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (count)
|
||||
- free_pcppages_bulk(zone, count, &dst);
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2785,7 +2800,8 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
|
||||
return true;
|
||||
}
|
||||
|
||||
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
|
||||
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
|
||||
+ struct list_head *dst)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
struct per_cpu_pages *pcp;
|
||||
@@ -2814,10 +2830,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
|
||||
pcp->count++;
|
||||
if (pcp->count >= pcp->high) {
|
||||
unsigned long batch = READ_ONCE(pcp->batch);
|
||||
- LIST_HEAD(dst);
|
||||
|
||||
- isolate_pcp_pages(batch, pcp, &dst);
|
||||
- free_pcppages_bulk(zone, batch, &dst);
|
||||
+ isolate_pcp_pages(batch, pcp, dst);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2828,13 +2842,17 @@ void free_unref_page(struct page *page)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
+ struct zone *zone = page_zone(page);
|
||||
+ LIST_HEAD(dst);
|
||||
|
||||
if (!free_unref_page_prepare(page, pfn))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
- free_unref_page_commit(page, pfn);
|
||||
+ free_unref_page_commit(page, pfn, &dst);
|
||||
local_irq_restore(flags);
|
||||
+ if (!list_empty(&dst))
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2845,6 +2863,11 @@ void free_unref_page_list(struct list_head *list)
|
||||
struct page *page, *next;
|
||||
unsigned long flags, pfn;
|
||||
int batch_count = 0;
|
||||
+ struct list_head dsts[__MAX_NR_ZONES];
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < __MAX_NR_ZONES; i++)
|
||||
+ INIT_LIST_HEAD(&dsts[i]);
|
||||
|
||||
/* Prepare pages for freeing */
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
@@ -2857,10 +2880,12 @@ void free_unref_page_list(struct list_head *list)
|
||||
local_irq_save(flags);
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
unsigned long pfn = page_private(page);
|
||||
+ enum zone_type type;
|
||||
|
||||
set_page_private(page, 0);
|
||||
trace_mm_page_free_batched(page);
|
||||
- free_unref_page_commit(page, pfn);
|
||||
+ type = page_zonenum(page);
|
||||
+ free_unref_page_commit(page, pfn, &dsts[type]);
|
||||
|
||||
/*
|
||||
* Guard against excessive IRQ disabled times when we get
|
||||
@@ -2873,6 +2898,21 @@ void free_unref_page_list(struct list_head *list)
|
||||
}
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ for (i = 0; i < __MAX_NR_ZONES; ) {
|
||||
+ struct page *page;
|
||||
+ struct zone *zone;
|
||||
+
|
||||
+ if (list_empty(&dsts[i])) {
|
||||
+ i++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ page = list_first_entry(&dsts[i], struct page, lru);
|
||||
+ zone = page_zone(page);
|
||||
+
|
||||
+ free_pcppages_bulk(zone, &dsts[i], true);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,618 +0,0 @@
|
||||
From abbdf6516e6ac19a92a3c08fc7a2f1ecc66c2bc6 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 May 2018 15:24:22 +0200
|
||||
Subject: [PATCH 074/328] mm/SLxB: change list_lock to raw_spinlock_t
|
||||
|
||||
The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t
|
||||
otherwise the interrupts won't be disabled on -RT. The locking rules remain
|
||||
the same on !RT.
|
||||
This patch changes it for SLAB and SLUB since both share the same header
|
||||
file for struct kmem_cache_node defintion.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slab.c | 94 +++++++++++++++++++++++++++----------------------------
|
||||
mm/slab.h | 2 +-
|
||||
mm/slub.c | 50 ++++++++++++++---------------
|
||||
3 files changed, 73 insertions(+), 73 deletions(-)
|
||||
|
||||
diff --git a/mm/slab.c b/mm/slab.c
|
||||
index 46f21e73db2f..38f6609343b3 100644
|
||||
--- a/mm/slab.c
|
||||
+++ b/mm/slab.c
|
||||
@@ -233,7 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
|
||||
parent->shared = NULL;
|
||||
parent->alien = NULL;
|
||||
parent->colour_next = 0;
|
||||
- spin_lock_init(&parent->list_lock);
|
||||
+ raw_spin_lock_init(&parent->list_lock);
|
||||
parent->free_objects = 0;
|
||||
parent->free_touched = 0;
|
||||
}
|
||||
@@ -600,9 +600,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
|
||||
page_node = page_to_nid(page);
|
||||
n = get_node(cachep, page_node);
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, &objp, 1, page_node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -731,7 +731,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
|
||||
struct kmem_cache_node *n = get_node(cachep, node);
|
||||
|
||||
if (ac->avail) {
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
/*
|
||||
* Stuff objects into the remote nodes shared array first.
|
||||
* That way we could avoid the overhead of putting the objects
|
||||
@@ -742,7 +742,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
|
||||
|
||||
free_block(cachep, ac->entry, ac->avail, node, list);
|
||||
ac->avail = 0;
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -815,9 +815,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
|
||||
slabs_destroy(cachep, &list);
|
||||
} else {
|
||||
n = get_node(cachep, page_node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, &objp, 1, page_node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
return 1;
|
||||
@@ -858,10 +858,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
|
||||
*/
|
||||
n = get_node(cachep, node);
|
||||
if (n) {
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
|
||||
cachep->num;
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -940,7 +940,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
|
||||
goto fail;
|
||||
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
if (n->shared && force_change) {
|
||||
free_block(cachep, n->shared->entry,
|
||||
n->shared->avail, node, &list);
|
||||
@@ -958,7 +958,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
|
||||
new_alien = NULL;
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
|
||||
/*
|
||||
@@ -997,7 +997,7 @@ static void cpuup_canceled(long cpu)
|
||||
if (!n)
|
||||
continue;
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
/* Free limit for this kmem_cache_node */
|
||||
n->free_limit -= cachep->batchcount;
|
||||
@@ -1010,7 +1010,7 @@ static void cpuup_canceled(long cpu)
|
||||
}
|
||||
|
||||
if (!cpumask_empty(mask)) {
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
goto free_slab;
|
||||
}
|
||||
|
||||
@@ -1024,7 +1024,7 @@ static void cpuup_canceled(long cpu)
|
||||
alien = n->alien;
|
||||
n->alien = NULL;
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
kfree(shared);
|
||||
if (alien) {
|
||||
@@ -1208,7 +1208,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *
|
||||
/*
|
||||
* Do not assume that spinlocks can be initialized via memcpy:
|
||||
*/
|
||||
- spin_lock_init(&ptr->list_lock);
|
||||
+ raw_spin_lock_init(&ptr->list_lock);
|
||||
|
||||
MAKE_ALL_LISTS(cachep, ptr, nodeid);
|
||||
cachep->node[nodeid] = ptr;
|
||||
@@ -1379,11 +1379,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
unsigned long total_slabs, free_slabs, free_objs;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
total_slabs = n->total_slabs;
|
||||
free_slabs = n->free_slabs;
|
||||
free_objs = n->free_objects;
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
|
||||
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
|
||||
node, total_slabs - free_slabs, total_slabs,
|
||||
@@ -2178,7 +2178,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
check_irq_off();
|
||||
- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
|
||||
+ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2186,7 +2186,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
check_irq_off();
|
||||
- assert_spin_locked(&get_node(cachep, node)->list_lock);
|
||||
+ assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2226,9 +2226,9 @@ static void do_drain(void *arg)
|
||||
check_irq_off();
|
||||
ac = cpu_cache_get(cachep);
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, ac->entry, ac->avail, node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
ac->avail = 0;
|
||||
}
|
||||
@@ -2246,9 +2246,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
|
||||
drain_alien_cache(cachep, n->alien);
|
||||
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
drain_array_locked(cachep, n->shared, node, true, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -2270,10 +2270,10 @@ static int drain_freelist(struct kmem_cache *cache,
|
||||
nr_freed = 0;
|
||||
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
p = n->slabs_free.prev;
|
||||
if (p == &n->slabs_free) {
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2286,7 +2286,7 @@ static int drain_freelist(struct kmem_cache *cache,
|
||||
* to the cache.
|
||||
*/
|
||||
n->free_objects -= cache->num;
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slab_destroy(cache, page);
|
||||
nr_freed++;
|
||||
}
|
||||
@@ -2734,7 +2734,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
|
||||
INIT_LIST_HEAD(&page->lru);
|
||||
n = get_node(cachep, page_to_nid(page));
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
n->total_slabs++;
|
||||
if (!page->active) {
|
||||
list_add_tail(&page->lru, &(n->slabs_free));
|
||||
@@ -2744,7 +2744,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
|
||||
|
||||
STATS_INC_GROWN(cachep);
|
||||
n->free_objects += cachep->num - page->active;
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
}
|
||||
@@ -2912,7 +2912,7 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
- assert_spin_locked(&n->list_lock);
|
||||
+ assert_raw_spin_locked(&n->list_lock);
|
||||
page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
|
||||
if (!page) {
|
||||
n->free_touched = 1;
|
||||
@@ -2938,10 +2938,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
|
||||
if (!gfp_pfmemalloc_allowed(flags))
|
||||
return NULL;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
page = get_first_slab(n, true);
|
||||
if (!page) {
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2950,7 +2950,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
|
||||
|
||||
fixup_slab_list(cachep, n, page, &list);
|
||||
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
|
||||
return obj;
|
||||
@@ -3009,7 +3009,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
|
||||
if (!n->free_objects && (!shared || !shared->avail))
|
||||
goto direct_grow;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
shared = READ_ONCE(n->shared);
|
||||
|
||||
/* See if we can refill from the shared array */
|
||||
@@ -3033,7 +3033,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
|
||||
must_grow:
|
||||
n->free_objects -= ac->avail;
|
||||
alloc_done:
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
|
||||
direct_grow:
|
||||
@@ -3258,7 +3258,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
|
||||
BUG_ON(!n);
|
||||
|
||||
check_irq_off();
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
page = get_first_slab(n, false);
|
||||
if (!page)
|
||||
goto must_grow;
|
||||
@@ -3276,12 +3276,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
|
||||
|
||||
fixup_slab_list(cachep, n, page, &list);
|
||||
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
return obj;
|
||||
|
||||
must_grow:
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
|
||||
if (page) {
|
||||
/* This slab isn't counted yet so don't update free_objects */
|
||||
@@ -3457,7 +3457,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
|
||||
|
||||
check_irq_off();
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
if (n->shared) {
|
||||
struct array_cache *shared_array = n->shared;
|
||||
int max = shared_array->limit - shared_array->avail;
|
||||
@@ -3486,7 +3486,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
|
||||
STATS_SET_FREEABLE(cachep, i);
|
||||
}
|
||||
#endif
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
ac->avail -= batchcount;
|
||||
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
|
||||
@@ -3896,9 +3896,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
|
||||
node = cpu_to_mem(cpu);
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
free_block(cachep, ac->entry, ac->avail, node, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
free_percpu(prev);
|
||||
@@ -4023,9 +4023,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
|
||||
return;
|
||||
}
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
drain_array_locked(cachep, ac, node, false, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -4109,7 +4109,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
|
||||
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
check_irq_on();
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
total_slabs += n->total_slabs;
|
||||
free_slabs += n->free_slabs;
|
||||
@@ -4118,7 +4118,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
|
||||
if (n->shared)
|
||||
shared_avail += n->shared->avail;
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
}
|
||||
num_objs = total_slabs * cachep->num;
|
||||
active_slabs = total_slabs - free_slabs;
|
||||
@@ -4338,13 +4338,13 @@ static int leaks_show(struct seq_file *m, void *p)
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
|
||||
check_irq_on();
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
list_for_each_entry(page, &n->slabs_full, lru)
|
||||
handle_slab(x, cachep, page);
|
||||
list_for_each_entry(page, &n->slabs_partial, lru)
|
||||
handle_slab(x, cachep, page);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
}
|
||||
} while (!is_store_user_clean(cachep));
|
||||
|
||||
diff --git a/mm/slab.h b/mm/slab.h
|
||||
index 9632772e14be..d6b01d61f768 100644
|
||||
--- a/mm/slab.h
|
||||
+++ b/mm/slab.h
|
||||
@@ -454,7 +454,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
|
||||
* The slab lists for all objects.
|
||||
*/
|
||||
struct kmem_cache_node {
|
||||
- spinlock_t list_lock;
|
||||
+ raw_spinlock_t list_lock;
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
struct list_head slabs_partial; /* partial list first, better asm code */
|
||||
diff --git a/mm/slub.c b/mm/slub.c
|
||||
index 9c3937c5ce38..ba20c68a9cfd 100644
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1167,7 +1167,7 @@ static noinline int free_debug_processing(
|
||||
unsigned long uninitialized_var(flags);
|
||||
int ret = 0;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
slab_lock(page);
|
||||
|
||||
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
|
||||
@@ -1202,7 +1202,7 @@ static noinline int free_debug_processing(
|
||||
bulk_cnt, cnt);
|
||||
|
||||
slab_unlock(page);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
if (!ret)
|
||||
slab_fix(s, "Object at 0x%p not freed", object);
|
||||
return ret;
|
||||
@@ -1802,7 +1802,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
|
||||
if (!n || !n->nr_partial)
|
||||
return NULL;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
list_for_each_entry_safe(page, page2, &n->partial, lru) {
|
||||
void *t;
|
||||
|
||||
@@ -1827,7 +1827,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
|
||||
break;
|
||||
|
||||
}
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
return object;
|
||||
}
|
||||
|
||||
@@ -2073,7 +2073,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
|
||||
* that acquire_slab() will see a slab page that
|
||||
* is frozen
|
||||
*/
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
} else {
|
||||
m = M_FULL;
|
||||
@@ -2084,7 +2084,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
|
||||
* slabs from diagnostic functions will not see
|
||||
* any frozen slabs.
|
||||
*/
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2119,7 +2119,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
|
||||
goto redo;
|
||||
|
||||
if (lock)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
if (m == M_FREE) {
|
||||
stat(s, DEACTIVATE_EMPTY);
|
||||
@@ -2154,10 +2154,10 @@ static void unfreeze_partials(struct kmem_cache *s,
|
||||
n2 = get_node(s, page_to_nid(page));
|
||||
if (n != n2) {
|
||||
if (n)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
n = n2;
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
|
||||
do {
|
||||
@@ -2186,7 +2186,7 @@ static void unfreeze_partials(struct kmem_cache *s,
|
||||
}
|
||||
|
||||
if (n)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
while (discard_page) {
|
||||
page = discard_page;
|
||||
@@ -2355,10 +2355,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
|
||||
unsigned long x = 0;
|
||||
struct page *page;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
list_for_each_entry(page, &n->partial, lru)
|
||||
x += get_count(page);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return x;
|
||||
}
|
||||
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
|
||||
@@ -2793,7 +2793,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
|
||||
do {
|
||||
if (unlikely(n)) {
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
n = NULL;
|
||||
}
|
||||
prior = page->freelist;
|
||||
@@ -2825,7 +2825,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
* Otherwise the list_lock will synchronize with
|
||||
* other processors updating the list of slabs.
|
||||
*/
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -2867,7 +2867,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
add_partial(n, page, DEACTIVATE_TO_TAIL);
|
||||
stat(s, FREE_ADD_PARTIAL);
|
||||
}
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return;
|
||||
|
||||
slab_empty:
|
||||
@@ -2882,7 +2882,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
remove_full(s, n, page);
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
stat(s, FREE_SLAB);
|
||||
discard_slab(s, page);
|
||||
}
|
||||
@@ -3269,7 +3269,7 @@ static void
|
||||
init_kmem_cache_node(struct kmem_cache_node *n)
|
||||
{
|
||||
n->nr_partial = 0;
|
||||
- spin_lock_init(&n->list_lock);
|
||||
+ raw_spin_lock_init(&n->list_lock);
|
||||
INIT_LIST_HEAD(&n->partial);
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
atomic_long_set(&n->nr_slabs, 0);
|
||||
@@ -3656,7 +3656,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
||||
struct page *page, *h;
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
list_for_each_entry_safe(page, h, &n->partial, lru) {
|
||||
if (!page->inuse) {
|
||||
remove_partial(n, page);
|
||||
@@ -3666,7 +3666,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()");
|
||||
}
|
||||
}
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
list_for_each_entry_safe(page, h, &discard, lru)
|
||||
discard_slab(s, page);
|
||||
@@ -3939,7 +3939,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
|
||||
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
|
||||
INIT_LIST_HEAD(promote + i);
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
/*
|
||||
* Build lists of slabs to discard or promote.
|
||||
@@ -3970,7 +3970,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
|
||||
for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
|
||||
list_splice(promote + i, &n->partial);
|
||||
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
|
||||
/* Release empty slabs */
|
||||
list_for_each_entry_safe(page, t, &discard, lru)
|
||||
@@ -4384,7 +4384,7 @@ static int validate_slab_node(struct kmem_cache *s,
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
list_for_each_entry(page, &n->partial, lru) {
|
||||
validate_slab_slab(s, page, map);
|
||||
@@ -4406,7 +4406,7 @@ static int validate_slab_node(struct kmem_cache *s,
|
||||
s->name, count, atomic_long_read(&n->nr_slabs));
|
||||
|
||||
out:
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -4596,12 +4596,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
|
||||
if (!atomic_long_read(&n->nr_slabs))
|
||||
continue;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
list_for_each_entry(page, &n->partial, lru)
|
||||
process_slab(&t, s, page, alloc, map);
|
||||
list_for_each_entry(page, &n->full, lru)
|
||||
process_slab(&t, s, page, alloc, map);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
}
|
||||
|
||||
for (i = 0; i < t.count; i++) {
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,222 +0,0 @@
|
||||
From b3c42996e1092269d6fac0652b22140250f11b7b Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 21 Jun 2018 17:29:19 +0200
|
||||
Subject: [PATCH 075/328] mm/SLUB: delay giving back empty slubs to IRQ enabled
|
||||
regions
|
||||
|
||||
__free_slab() is invoked with disabled interrupts which increases the
|
||||
irq-off time while __free_pages() is doing the work.
|
||||
Allow __free_slab() to be invoked with enabled interrupts and move
|
||||
everything from interrupts-off invocations to a temporary per-CPU list
|
||||
so it can be processed later.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++----
|
||||
1 file changed, 69 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/mm/slub.c b/mm/slub.c
|
||||
index ba20c68a9cfd..224663e20772 100644
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1330,6 +1330,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
|
||||
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
+struct slub_free_list {
|
||||
+ raw_spinlock_t lock;
|
||||
+ struct list_head list;
|
||||
+};
|
||||
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
|
||||
+
|
||||
/*
|
||||
* Hooks for other subsystems that check memory allocations. In a typical
|
||||
* production configuration these hooks all should produce no code at all.
|
||||
@@ -1684,6 +1690,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
|
||||
__free_pages(page, order);
|
||||
}
|
||||
|
||||
+static void free_delayed(struct list_head *h)
|
||||
+{
|
||||
+ while (!list_empty(h)) {
|
||||
+ struct page *page = list_first_entry(h, struct page, lru);
|
||||
+
|
||||
+ list_del(&page->lru);
|
||||
+ __free_slab(page->slab_cache, page);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void rcu_free_slab(struct rcu_head *h)
|
||||
{
|
||||
struct page *page = container_of(h, struct page, rcu_head);
|
||||
@@ -1695,6 +1711,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
|
||||
{
|
||||
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
|
||||
call_rcu(&page->rcu_head, rcu_free_slab);
|
||||
+ } else if (irqs_disabled()) {
|
||||
+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
|
||||
+
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_add(&page->lru, &f->list);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
} else
|
||||
__free_slab(s, page);
|
||||
}
|
||||
@@ -2223,14 +2245,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
|
||||
pobjects = oldpage->pobjects;
|
||||
pages = oldpage->pages;
|
||||
if (drain && pobjects > s->cpu_partial) {
|
||||
+ struct slub_free_list *f;
|
||||
unsigned long flags;
|
||||
+ LIST_HEAD(tofree);
|
||||
/*
|
||||
* partial array is full. Move the existing
|
||||
* set to the per node partial list.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
|
||||
+ f = this_cpu_ptr(&slub_free_list);
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_splice_init(&f->list, &tofree);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
local_irq_restore(flags);
|
||||
+ free_delayed(&tofree);
|
||||
oldpage = NULL;
|
||||
pobjects = 0;
|
||||
pages = 0;
|
||||
@@ -2300,7 +2329,22 @@ static bool has_cpu_slab(int cpu, void *info)
|
||||
|
||||
static void flush_all(struct kmem_cache *s)
|
||||
{
|
||||
+ LIST_HEAD(tofree);
|
||||
+ int cpu;
|
||||
+
|
||||
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
|
||||
+ for_each_online_cpu(cpu) {
|
||||
+ struct slub_free_list *f;
|
||||
+
|
||||
+ if (!has_cpu_slab(cpu, s))
|
||||
+ continue;
|
||||
+
|
||||
+ f = &per_cpu(slub_free_list, cpu);
|
||||
+ raw_spin_lock_irq(&f->lock);
|
||||
+ list_splice_init(&f->list, &tofree);
|
||||
+ raw_spin_unlock_irq(&f->lock);
|
||||
+ free_delayed(&tofree);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2498,8 +2542,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
|
||||
* already disabled (which is the case for bulk allocation).
|
||||
*/
|
||||
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
- unsigned long addr, struct kmem_cache_cpu *c)
|
||||
+ unsigned long addr, struct kmem_cache_cpu *c,
|
||||
+ struct list_head *to_free)
|
||||
{
|
||||
+ struct slub_free_list *f;
|
||||
void *freelist;
|
||||
struct page *page;
|
||||
|
||||
@@ -2555,6 +2601,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
VM_BUG_ON(!c->page->frozen);
|
||||
c->freelist = get_freepointer(s, freelist);
|
||||
c->tid = next_tid(c->tid);
|
||||
+
|
||||
+out:
|
||||
+ f = this_cpu_ptr(&slub_free_list);
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_splice_init(&f->list, to_free);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
+
|
||||
return freelist;
|
||||
|
||||
new_slab:
|
||||
@@ -2570,7 +2623,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
|
||||
if (unlikely(!freelist)) {
|
||||
slab_out_of_memory(s, gfpflags, node);
|
||||
- return NULL;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
page = c->page;
|
||||
@@ -2583,7 +2636,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
goto new_slab; /* Slab failed checks. Next slab needed */
|
||||
|
||||
deactivate_slab(s, page, get_freepointer(s, freelist), c);
|
||||
- return freelist;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2595,6 +2648,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
{
|
||||
void *p;
|
||||
unsigned long flags;
|
||||
+ LIST_HEAD(tofree);
|
||||
|
||||
local_irq_save(flags);
|
||||
#ifdef CONFIG_PREEMPT
|
||||
@@ -2606,8 +2660,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
c = this_cpu_ptr(s->cpu_slab);
|
||||
#endif
|
||||
|
||||
- p = ___slab_alloc(s, gfpflags, node, addr, c);
|
||||
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
|
||||
local_irq_restore(flags);
|
||||
+ free_delayed(&tofree);
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -3085,6 +3140,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
struct kmem_cache_cpu *c;
|
||||
+ LIST_HEAD(to_free);
|
||||
int i;
|
||||
|
||||
/* memcg and kmem_cache debug support */
|
||||
@@ -3108,7 +3164,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
* of re-populating per CPU c->freelist
|
||||
*/
|
||||
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
|
||||
- _RET_IP_, c);
|
||||
+ _RET_IP_, c, &to_free);
|
||||
if (unlikely(!p[i]))
|
||||
goto error;
|
||||
|
||||
@@ -3120,6 +3176,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
}
|
||||
c->tid = next_tid(c->tid);
|
||||
local_irq_enable();
|
||||
+ free_delayed(&to_free);
|
||||
|
||||
/* Clear memory outside IRQ disabled fastpath loop */
|
||||
if (unlikely(flags & __GFP_ZERO)) {
|
||||
@@ -3134,6 +3191,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
return i;
|
||||
error:
|
||||
local_irq_enable();
|
||||
+ free_delayed(&to_free);
|
||||
slab_post_alloc_hook(s, flags, i, p);
|
||||
__kmem_cache_free_bulk(s, i, p);
|
||||
return 0;
|
||||
@@ -4183,6 +4241,12 @@ void __init kmem_cache_init(void)
|
||||
{
|
||||
static __initdata struct kmem_cache boot_kmem_cache,
|
||||
boot_kmem_cache_node;
|
||||
+ int cpu;
|
||||
+
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
|
||||
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
|
||||
+ }
|
||||
|
||||
if (debug_guardpage_minorder())
|
||||
slub_max_order = 0;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,238 +0,0 @@
|
||||
From d3dec69695332f82af11a39b0fd327ad173c4715 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:37 -0500
|
||||
Subject: [PATCH 076/328] mm: page_alloc: rt-friendly per-cpu pages
|
||||
|
||||
rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
|
||||
method into a preemptible, explicit-per-cpu-locks method.
|
||||
|
||||
Contains fixes from:
|
||||
Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 63 +++++++++++++++++++++++++++++++++----------------
|
||||
1 file changed, 43 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 9a4d150ea5b7..d6f9be9c6635 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -60,6 +60,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/mm.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/memcontrol.h>
|
||||
@@ -291,6 +292,18 @@ EXPORT_SYMBOL(nr_node_ids);
|
||||
EXPORT_SYMBOL(nr_online_nodes);
|
||||
#endif
|
||||
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define cpu_lock_irqsave(cpu, flags) \
|
||||
+ local_lock_irqsave_on(pa_lock, flags, cpu)
|
||||
+# define cpu_unlock_irqrestore(cpu, flags) \
|
||||
+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
|
||||
+#else
|
||||
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
|
||||
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
|
||||
+#endif
|
||||
+
|
||||
int page_group_by_mobility_disabled __read_mostly;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
@@ -1296,10 +1309,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
|
||||
return;
|
||||
|
||||
migratetype = get_pfnblock_migratetype(page, pfn);
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
__count_vm_events(PGFREE, 1 << order);
|
||||
free_one_page(page_zone(page), page, pfn, order, migratetype);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
}
|
||||
|
||||
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
|
||||
@@ -2568,13 +2581,13 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
|
||||
int to_drain, batch;
|
||||
LIST_HEAD(dst);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0)
|
||||
isolate_pcp_pages(to_drain, pcp, &dst);
|
||||
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
if (to_drain > 0)
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
@@ -2596,7 +2609,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
LIST_HEAD(dst);
|
||||
int count;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ cpu_lock_irqsave(cpu, flags);
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
|
||||
pcp = &pset->pcp;
|
||||
@@ -2604,7 +2617,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
|
||||
if (count)
|
||||
isolate_pcp_pages(count, pcp, &dst);
|
||||
|
||||
- local_irq_restore(flags);
|
||||
+ cpu_unlock_irqrestore(cpu, flags);
|
||||
|
||||
if (count)
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
@@ -2642,6 +2655,7 @@ void drain_local_pages(struct zone *zone)
|
||||
drain_pages(cpu);
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
static void drain_local_pages_wq(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
@@ -2655,6 +2669,7 @@ static void drain_local_pages_wq(struct work_struct *work)
|
||||
drain_local_pages(NULL);
|
||||
preempt_enable();
|
||||
}
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
|
||||
@@ -2721,7 +2736,14 @@ void drain_all_pages(struct zone *zone)
|
||||
else
|
||||
cpumask_clear_cpu(cpu, &cpus_with_pcps);
|
||||
}
|
||||
-
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ for_each_cpu(cpu, &cpus_with_pcps) {
|
||||
+ if (zone)
|
||||
+ drain_pages_zone(cpu, zone);
|
||||
+ else
|
||||
+ drain_pages(cpu);
|
||||
+ }
|
||||
+#else
|
||||
for_each_cpu(cpu, &cpus_with_pcps) {
|
||||
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
|
||||
INIT_WORK(work, drain_local_pages_wq);
|
||||
@@ -2729,6 +2751,7 @@ void drain_all_pages(struct zone *zone)
|
||||
}
|
||||
for_each_cpu(cpu, &cpus_with_pcps)
|
||||
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
|
||||
+#endif
|
||||
|
||||
mutex_unlock(&pcpu_drain_mutex);
|
||||
}
|
||||
@@ -2848,9 +2871,9 @@ void free_unref_page(struct page *page)
|
||||
if (!free_unref_page_prepare(page, pfn))
|
||||
return;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
free_unref_page_commit(page, pfn, &dst);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
if (!list_empty(&dst))
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
@@ -2877,7 +2900,7 @@ void free_unref_page_list(struct list_head *list)
|
||||
set_page_private(page, pfn);
|
||||
}
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
unsigned long pfn = page_private(page);
|
||||
enum zone_type type;
|
||||
@@ -2892,12 +2915,12 @@ void free_unref_page_list(struct list_head *list)
|
||||
* a large list of pages to free.
|
||||
*/
|
||||
if (++batch_count == SWAP_CLUSTER_MAX) {
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
batch_count = 0;
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
}
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
for (i = 0; i < __MAX_NR_ZONES; ) {
|
||||
struct page *page;
|
||||
@@ -3046,7 +3069,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
pcp = &this_cpu_ptr(zone->pageset)->pcp;
|
||||
list = &pcp->lists[migratetype];
|
||||
page = __rmqueue_pcplist(zone, migratetype, pcp, list);
|
||||
@@ -3054,7 +3077,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone);
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -3081,7 +3104,7 @@ struct page *rmqueue(struct zone *preferred_zone,
|
||||
* allocate greater than order-1 page units with __GFP_NOFAIL.
|
||||
*/
|
||||
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
|
||||
- spin_lock_irqsave(&zone->lock, flags);
|
||||
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
|
||||
|
||||
do {
|
||||
page = NULL;
|
||||
@@ -3101,14 +3124,14 @@ struct page *rmqueue(struct zone *preferred_zone,
|
||||
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
out:
|
||||
VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
|
||||
return page;
|
||||
|
||||
failed:
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -8129,7 +8152,7 @@ void zone_pcp_reset(struct zone *zone)
|
||||
struct per_cpu_pageset *pset;
|
||||
|
||||
/* avoid races with drain_pages() */
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
if (zone->pageset != &boot_pageset) {
|
||||
for_each_online_cpu(cpu) {
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
@@ -8138,7 +8161,7 @@ void zone_pcp_reset(struct zone *zone)
|
||||
free_percpu(zone->pageset);
|
||||
zone->pageset = &boot_pageset;
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,210 +0,0 @@
|
||||
From a03a4ee82ac46307acebdfe58e602aea9835a9a1 Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:51 -0500
|
||||
Subject: [PATCH 077/328] mm/swap: Convert to percpu locked
|
||||
|
||||
Replace global locks (get_cpu + local_irq_save) with "local_locks()".
|
||||
Currently there is one of for "rotate" and one for "swap".
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/swap.h | 2 ++
|
||||
mm/compaction.c | 6 ++++--
|
||||
mm/page_alloc.c | 3 ++-
|
||||
mm/swap.c | 38 ++++++++++++++++++++++----------------
|
||||
4 files changed, 30 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/include/linux/swap.h b/include/linux/swap.h
|
||||
index ee8f9f554a9e..2ad000e362bd 100644
|
||||
--- a/include/linux/swap.h
|
||||
+++ b/include/linux/swap.h
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/page-flags.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
struct notifier_block;
|
||||
@@ -331,6 +332,7 @@ extern unsigned long nr_free_pagecache_pages(void);
|
||||
|
||||
|
||||
/* linux/mm/swap.c */
|
||||
+DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
|
||||
extern void lru_cache_add(struct page *);
|
||||
extern void lru_cache_add_anon(struct page *page);
|
||||
extern void lru_cache_add_file(struct page *page);
|
||||
diff --git a/mm/compaction.c b/mm/compaction.c
|
||||
index 5079ddbec8f9..c40d3a13cbbd 100644
|
||||
--- a/mm/compaction.c
|
||||
+++ b/mm/compaction.c
|
||||
@@ -1668,10 +1668,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
|
||||
block_start_pfn(cc->migrate_pfn, cc->order);
|
||||
|
||||
if (cc->last_migrated_pfn < current_block_start) {
|
||||
- cpu = get_cpu();
|
||||
+ cpu = get_cpu_light();
|
||||
+ local_lock_irq(swapvec_lock);
|
||||
lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_irq(swapvec_lock);
|
||||
drain_local_pages(zone);
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
/* No more flushing until we migrate again */
|
||||
cc->last_migrated_pfn = 0;
|
||||
}
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index d6f9be9c6635..a1547f1be42c 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -7242,8 +7242,9 @@ void __init free_area_init(unsigned long *zones_size)
|
||||
|
||||
static int page_alloc_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
-
|
||||
+ local_lock_irq_on(swapvec_lock, cpu);
|
||||
lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_irq_on(swapvec_lock, cpu);
|
||||
drain_pages(cpu);
|
||||
|
||||
/*
|
||||
diff --git a/mm/swap.c b/mm/swap.c
|
||||
index 45fdbfb6b2a6..92f994b962f0 100644
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/uio.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/page_idle.h>
|
||||
|
||||
@@ -51,6 +52,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
|
||||
#endif
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
|
||||
+DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
@@ -253,11 +256,11 @@ void rotate_reclaimable_page(struct page *page)
|
||||
unsigned long flags;
|
||||
|
||||
get_page(page);
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(rotate_lock, flags);
|
||||
pvec = this_cpu_ptr(&lru_rotate_pvecs);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_move_tail(pvec);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(rotate_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,12 +310,13 @@ void activate_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ activate_page_pvecs);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
||||
- put_cpu_var(activate_page_pvecs);
|
||||
+ put_locked_var(swapvec_lock, activate_page_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -334,7 +338,7 @@ void activate_page(struct page *page)
|
||||
|
||||
static void __lru_cache_activate_page(struct page *page)
|
||||
{
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -356,7 +360,7 @@ static void __lru_cache_activate_page(struct page *page)
|
||||
}
|
||||
}
|
||||
|
||||
- put_cpu_var(lru_add_pvec);
|
||||
+ put_locked_var(swapvec_lock, lru_add_pvec);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -398,12 +402,12 @@ EXPORT_SYMBOL(mark_page_accessed);
|
||||
|
||||
static void __lru_cache_add(struct page *page)
|
||||
{
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
__pagevec_lru_add(pvec);
|
||||
- put_cpu_var(lru_add_pvec);
|
||||
+ put_locked_var(swapvec_lock, lru_add_pvec);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -581,9 +585,9 @@ void lru_add_drain_cpu(int cpu)
|
||||
unsigned long flags;
|
||||
|
||||
/* No harm done if a racing interrupt already did this */
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(rotate_lock, flags);
|
||||
pagevec_move_tail(pvec);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(rotate_lock, flags);
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
@@ -615,11 +619,12 @@ void deactivate_file_page(struct page *page)
|
||||
return;
|
||||
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ lru_deactivate_file_pvecs);
|
||||
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
- put_cpu_var(lru_deactivate_file_pvecs);
|
||||
+ put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -634,19 +639,20 @@ void mark_page_lazyfree(struct page *page)
|
||||
{
|
||||
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
|
||||
!PageSwapCache(page) && !PageUnevictable(page)) {
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ lru_lazyfree_pvecs);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
||||
- put_cpu_var(lru_lazyfree_pvecs);
|
||||
+ put_locked_var(swapvec_lock, lru_lazyfree_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
void lru_add_drain(void)
|
||||
{
|
||||
- lru_add_drain_cpu(get_cpu());
|
||||
- put_cpu();
|
||||
+ lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
|
||||
+ local_unlock_cpu(swapvec_lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,108 +0,0 @@
|
||||
From c48feb8fe1bad2aed0a15440a28da0bca8b5292a Mon Sep 17 00:00:00 2001
|
||||
From: Luiz Capitulino <lcapitulino@redhat.com>
|
||||
Date: Fri, 27 May 2016 15:03:28 +0200
|
||||
Subject: [PATCH 078/328] mm: perform lru_add_drain_all() remotely
|
||||
|
||||
lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run
|
||||
on all CPUs that have non-empty LRU pagevecs and then waiting for
|
||||
the scheduled work to complete. However, workqueue threads may never
|
||||
have the chance to run on a CPU that's running a SCHED_FIFO task.
|
||||
This causes lru_add_drain_all() to block forever.
|
||||
|
||||
This commit solves this problem by changing lru_add_drain_all()
|
||||
to drain the LRU pagevecs of remote CPUs. This is done by grabbing
|
||||
swapvec_lock and calling lru_add_drain_cpu().
|
||||
|
||||
PS: This is based on an idea and initial implementation by
|
||||
Rik van Riel.
|
||||
|
||||
Signed-off-by: Rik van Riel <riel@redhat.com>
|
||||
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/swap.c | 36 ++++++++++++++++++++++++++++++------
|
||||
1 file changed, 30 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/mm/swap.c b/mm/swap.c
|
||||
index 92f994b962f0..3885645a45ce 100644
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -585,9 +585,15 @@ void lru_add_drain_cpu(int cpu)
|
||||
unsigned long flags;
|
||||
|
||||
/* No harm done if a racing interrupt already did this */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_lock_irqsave_on(rotate_lock, flags, cpu);
|
||||
+ pagevec_move_tail(pvec);
|
||||
+ local_unlock_irqrestore_on(rotate_lock, flags, cpu);
|
||||
+#else
|
||||
local_lock_irqsave(rotate_lock, flags);
|
||||
pagevec_move_tail(pvec);
|
||||
local_unlock_irqrestore(rotate_lock, flags);
|
||||
+#endif
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
@@ -657,6 +663,16 @@ void lru_add_drain(void)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
|
||||
+{
|
||||
+ local_lock_on(swapvec_lock, cpu);
|
||||
+ lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_on(swapvec_lock, cpu);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
|
||||
|
||||
static void lru_add_drain_per_cpu(struct work_struct *dummy)
|
||||
@@ -664,6 +680,16 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
|
||||
lru_add_drain();
|
||||
}
|
||||
|
||||
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
|
||||
+{
|
||||
+ struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
+
|
||||
+ INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
+ queue_work_on(cpu, mm_percpu_wq, work);
|
||||
+ cpumask_set_cpu(cpu, has_work);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Doesn't need any cpu hotplug locking because we do rely on per-cpu
|
||||
* kworkers being shut down before our page_alloc_cpu_dead callback is
|
||||
@@ -688,21 +714,19 @@ void lru_add_drain_all(void)
|
||||
cpumask_clear(&has_work);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
- struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
|
||||
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
|
||||
- need_activate_page_drain(cpu)) {
|
||||
- INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
- queue_work_on(cpu, mm_percpu_wq, work);
|
||||
- cpumask_set_cpu(cpu, &has_work);
|
||||
- }
|
||||
+ need_activate_page_drain(cpu))
|
||||
+ remote_lru_add_drain(cpu, &has_work);
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
for_each_cpu(cpu, &has_work)
|
||||
flush_work(&per_cpu(lru_add_drain_work, cpu));
|
||||
+#endif
|
||||
|
||||
mutex_unlock(&lock);
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 4e41266214b4e88cf9fb9d2c20b5bbc83dcfbdcc Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:30:13 -0500
|
||||
Subject: [PATCH 079/328] mm/vmstat: Protect per cpu variables with preempt
|
||||
disable on RT
|
||||
|
||||
Disable preemption on -RT for the vmstat code. On vanila the code runs in
|
||||
IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the
|
||||
same ressources is not updated in parallel due to preemption.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/vmstat.h | 4 ++++
|
||||
mm/vmstat.c | 12 ++++++++++++
|
||||
2 files changed, 16 insertions(+)
|
||||
|
||||
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
|
||||
index f25cef84b41d..febee8649220 100644
|
||||
--- a/include/linux/vmstat.h
|
||||
+++ b/include/linux/vmstat.h
|
||||
@@ -54,7 +54,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
|
||||
*/
|
||||
static inline void __count_vm_event(enum vm_event_item item)
|
||||
{
|
||||
+ preempt_disable_rt();
|
||||
raw_cpu_inc(vm_event_states.event[item]);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
static inline void count_vm_event(enum vm_event_item item)
|
||||
@@ -64,7 +66,9 @@ static inline void count_vm_event(enum vm_event_item item)
|
||||
|
||||
static inline void __count_vm_events(enum vm_event_item item, long delta)
|
||||
{
|
||||
+ preempt_disable_rt();
|
||||
raw_cpu_add(vm_event_states.event[item], delta);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
static inline void count_vm_events(enum vm_event_item item, long delta)
|
||||
diff --git a/mm/vmstat.c b/mm/vmstat.c
|
||||
index ce81b0a7d018..cfa2a3bbdf91 100644
|
||||
--- a/mm/vmstat.c
|
||||
+++ b/mm/vmstat.c
|
||||
@@ -320,6 +320,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
long x;
|
||||
long t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -329,6 +330,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
x = 0;
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_zone_page_state);
|
||||
|
||||
@@ -340,6 +342,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
long x;
|
||||
long t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -349,6 +352,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
x = 0;
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_node_page_state);
|
||||
|
||||
@@ -381,6 +385,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
s8 __percpu *p = pcp->vm_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v > t)) {
|
||||
@@ -389,6 +394,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
zone_page_state_add(v + overstep, zone, item);
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -397,6 +403,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v > t)) {
|
||||
@@ -405,6 +412,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
node_page_state_add(v + overstep, pgdat, item);
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
@@ -425,6 +433,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
s8 __percpu *p = pcp->vm_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v < - t)) {
|
||||
@@ -433,6 +442,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
zone_page_state_add(v - overstep, zone, item);
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -441,6 +451,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v < - t)) {
|
||||
@@ -449,6 +460,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
node_page_state_add(v - overstep, pgdat, item);
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,75 +0,0 @@
|
||||
From 5af4ea849237914c63d3fd50079e6975aa28f9b2 Mon Sep 17 00:00:00 2001
|
||||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Sat, 1 Oct 2011 18:58:13 -0700
|
||||
Subject: [PATCH 080/328] ARM: Initialize split page table locks for vector
|
||||
page
|
||||
|
||||
Without this patch, ARM can not use SPLIT_PTLOCK_CPUS if
|
||||
PREEMPT_RT_FULL=y because vectors_user_mapping() creates a
|
||||
VM_ALWAYSDUMP mapping of the vector page (address 0xffff0000), but no
|
||||
ptl->lock has been allocated for the page. An attempt to coredump
|
||||
that page will result in a kernel NULL pointer dereference when
|
||||
follow_page() attempts to lock the page.
|
||||
|
||||
The call tree to the NULL pointer dereference is:
|
||||
|
||||
do_notify_resume()
|
||||
get_signal_to_deliver()
|
||||
do_coredump()
|
||||
elf_core_dump()
|
||||
get_dump_page()
|
||||
__get_user_pages()
|
||||
follow_page()
|
||||
pte_offset_map_lock() <----- a #define
|
||||
...
|
||||
rt_spin_lock()
|
||||
|
||||
The underlying problem is exposed by mm-shrink-the-page-frame-to-rt-size.patch.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Cc: Frank <Frank_Rowand@sonyusa.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/4E87C535.2030907@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/process.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
|
||||
index 82ab015bf42b..8d3c7ce34c24 100644
|
||||
--- a/arch/arm/kernel/process.c
|
||||
+++ b/arch/arm/kernel/process.c
|
||||
@@ -324,6 +324,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
+/*
|
||||
+ * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
|
||||
+ * initialized by pgtable_page_ctor() then a coredump of the vector page will
|
||||
+ * fail.
|
||||
+ */
|
||||
+static int __init vectors_user_mapping_init_page(void)
|
||||
+{
|
||||
+ struct page *page;
|
||||
+ unsigned long addr = 0xffff0000;
|
||||
+ pgd_t *pgd;
|
||||
+ pud_t *pud;
|
||||
+ pmd_t *pmd;
|
||||
+
|
||||
+ pgd = pgd_offset_k(addr);
|
||||
+ pud = pud_offset(pgd, addr);
|
||||
+ pmd = pmd_offset(pud, addr);
|
||||
+ page = pmd_page(*(pmd));
|
||||
+
|
||||
+ pgtable_page_ctor(page);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+late_initcall(vectors_user_mapping_init_page);
|
||||
+
|
||||
#ifdef CONFIG_KUSER_HELPERS
|
||||
/*
|
||||
* The vectors page is always readable from user space for the
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 5bc9982e129b20ecb8f6c32d3d342af5087ffdae Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 25 Oct 2012 10:32:35 +0100
|
||||
Subject: [PATCH 081/328] mm: Enable SLUB for RT
|
||||
|
||||
Avoid the memory allocation in IRQ section
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: factor out everything except the kcalloc() workaorund ]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/mm/slub.c b/mm/slub.c
|
||||
index 224663e20772..cbe47408c6eb 100644
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -3680,6 +3680,11 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
||||
const char *text)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ /* XXX move out of irq-off section */
|
||||
+ slab_err(s, page, text, s->name);
|
||||
+#else
|
||||
+
|
||||
void *addr = page_address(page);
|
||||
void *p;
|
||||
unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
|
||||
@@ -3701,6 +3706,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
||||
slab_unlock(page);
|
||||
kfree(map);
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,47 +0,0 @@
|
||||
From c908b13d5d4d0e1154b41e93d9fb8349b7b7197a Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 9 Jan 2013 12:08:15 +0100
|
||||
Subject: [PATCH 082/328] slub: Enable irqs for __GFP_WAIT
|
||||
|
||||
SYSTEM_RUNNING might be too late for enabling interrupts. Allocations
|
||||
with GFP_WAIT can happen before that. So use this as an indicator.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/mm/slub.c b/mm/slub.c
|
||||
index cbe47408c6eb..81c32ceab228 100644
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1570,10 +1570,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
void *start, *p;
|
||||
int idx, order;
|
||||
bool shuffle;
|
||||
+ bool enableirqs = false;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
if (gfpflags_allow_blocking(flags))
|
||||
+ enableirqs = true;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (system_state > SYSTEM_BOOTING)
|
||||
+ enableirqs = true;
|
||||
+#endif
|
||||
+ if (enableirqs)
|
||||
local_irq_enable();
|
||||
|
||||
flags |= s->allocflags;
|
||||
@@ -1632,7 +1639,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
page->frozen = 1;
|
||||
|
||||
out:
|
||||
- if (gfpflags_allow_blocking(flags))
|
||||
+ if (enableirqs)
|
||||
local_irq_disable();
|
||||
if (!page)
|
||||
return NULL;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 41e0143022ef4180dafc14f033e72efa7ac652de Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 15 Apr 2015 19:00:47 +0200
|
||||
Subject: [PATCH 083/328] slub: Disable SLUB_CPU_PARTIAL
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7
|
||||
|1 lock held by rcuop/7/87:
|
||||
| #0: (rcu_callback){......}, at: [<ffffffff8112c76a>] rcu_nocb_kthread+0x1ca/0x5d0
|
||||
|Preemption disabled at:[<ffffffff811eebd9>] put_cpu_partial+0x29/0x220
|
||||
|
|
||||
|CPU: 0 PID: 87 Comm: rcuop/7 Tainted: G W 4.0.0-rt0+ #477
|
||||
|Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
|
||||
| 000000000007a9fc ffff88013987baf8 ffffffff817441c7 0000000000000007
|
||||
| 0000000000000000 ffff88013987bb18 ffffffff810eee51 0000000000000000
|
||||
| ffff88013fc10200 ffff88013987bb48 ffffffff8174a1c4 000000000007a9fc
|
||||
|Call Trace:
|
||||
| [<ffffffff817441c7>] dump_stack+0x4f/0x90
|
||||
| [<ffffffff810eee51>] ___might_sleep+0x121/0x1b0
|
||||
| [<ffffffff8174a1c4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff811a689a>] __free_pages_ok+0xaa/0x540
|
||||
| [<ffffffff811a729d>] __free_pages+0x1d/0x30
|
||||
| [<ffffffff811eddd5>] __free_slab+0xc5/0x1e0
|
||||
| [<ffffffff811edf46>] free_delayed+0x56/0x70
|
||||
| [<ffffffff811eecfd>] put_cpu_partial+0x14d/0x220
|
||||
| [<ffffffff811efc98>] __slab_free+0x158/0x2c0
|
||||
| [<ffffffff811f0021>] kmem_cache_free+0x221/0x2d0
|
||||
| [<ffffffff81204d0c>] file_free_rcu+0x2c/0x40
|
||||
| [<ffffffff8112c7e3>] rcu_nocb_kthread+0x243/0x5d0
|
||||
| [<ffffffff810e951c>] kthread+0xfc/0x120
|
||||
| [<ffffffff8174abc8>] ret_from_fork+0x58/0x90
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
init/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 61e8b531649b..b4e88fb19c26 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1701,7 +1701,7 @@ config SLAB_FREELIST_HARDENED
|
||||
|
||||
config SLUB_CPU_PARTIAL
|
||||
default y
|
||||
- depends on SLUB && SMP
|
||||
+ depends on SLUB && SMP && !PREEMPT_RT_FULL
|
||||
bool "SLUB per cpu partial cache"
|
||||
help
|
||||
Per cpu partial caches accellerate objects allocation and freeing
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,74 +0,0 @@
|
||||
From 5d6ef143b9e65be0cda54dcea9150f3cfa951ffd Mon Sep 17 00:00:00 2001
|
||||
From: Yang Shi <yang.shi@windriver.com>
|
||||
Date: Wed, 30 Oct 2013 11:48:33 -0700
|
||||
Subject: [PATCH 084/328] mm/memcontrol: Don't call schedule_work_on in
|
||||
preemption disabled context
|
||||
|
||||
The following trace is triggered when running ltp oom test cases:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 17188, name: oom03
|
||||
Preemption disabled at:[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
|
||||
|
||||
CPU: 2 PID: 17188 Comm: oom03 Not tainted 3.10.10-rt3 #2
|
||||
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
|
||||
ffff88007684d730 ffff880070df9b58 ffffffff8169918d ffff880070df9b70
|
||||
ffffffff8106db31 ffff88007688b4a0 ffff880070df9b88 ffffffff8169d9c0
|
||||
ffff88007688b4a0 ffff880070df9bc8 ffffffff81059da1 0000000170df9bb0
|
||||
Call Trace:
|
||||
[<ffffffff8169918d>] dump_stack+0x19/0x1b
|
||||
[<ffffffff8106db31>] __might_sleep+0xf1/0x170
|
||||
[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
|
||||
[<ffffffff81059da1>] queue_work_on+0x61/0x100
|
||||
[<ffffffff8112b361>] drain_all_stock+0xe1/0x1c0
|
||||
[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
|
||||
[<ffffffff8112beda>] __mem_cgroup_try_charge+0x41a/0xc40
|
||||
[<ffffffff810f1c91>] ? release_pages+0x1b1/0x1f0
|
||||
[<ffffffff8106f200>] ? sched_exec+0x40/0xb0
|
||||
[<ffffffff8112cc87>] mem_cgroup_charge_common+0x37/0x70
|
||||
[<ffffffff8112e2c6>] mem_cgroup_newpage_charge+0x26/0x30
|
||||
[<ffffffff8110af68>] handle_pte_fault+0x618/0x840
|
||||
[<ffffffff8103ecf6>] ? unpin_current_cpu+0x16/0x70
|
||||
[<ffffffff81070f94>] ? migrate_enable+0xd4/0x200
|
||||
[<ffffffff8110cde5>] handle_mm_fault+0x145/0x1e0
|
||||
[<ffffffff810301e1>] __do_page_fault+0x1a1/0x4c0
|
||||
[<ffffffff8169c9eb>] ? preempt_schedule_irq+0x4b/0x70
|
||||
[<ffffffff8169e3b7>] ? retint_kernel+0x37/0x40
|
||||
[<ffffffff8103053e>] do_page_fault+0xe/0x10
|
||||
[<ffffffff8169e4c2>] page_fault+0x22/0x30
|
||||
|
||||
So, to prevent schedule_work_on from being called in preempt disabled context,
|
||||
replace the pair of get/put_cpu() to get/put_cpu_light().
|
||||
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/memcontrol.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index 3a3d109dce21..cf9e81fb342d 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2082,7 +2082,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
|
||||
* as well as workers from this path always operate on the local
|
||||
* per-cpu data. CPU up doesn't touch memcg_stock at all.
|
||||
*/
|
||||
- curcpu = get_cpu();
|
||||
+ curcpu = get_cpu_light();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
|
||||
struct mem_cgroup *memcg;
|
||||
@@ -2102,7 +2102,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
|
||||
}
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
mutex_unlock(&percpu_charge_mutex);
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,123 +0,0 @@
|
||||
From ab73b56574e07b881a37aa1a4b0040a331352d7c Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 28 Jan 2015 17:14:16 +0100
|
||||
Subject: [PATCH 085/328] mm/memcontrol: Replace local_irq_disable with local
|
||||
locks
|
||||
|
||||
There are a few local_irq_disable() which then take sleeping locks. This
|
||||
patch converts them local locks.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/memcontrol.c | 24 ++++++++++++++++--------
|
||||
1 file changed, 16 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index cf9e81fb342d..421ac74450f6 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -69,6 +69,7 @@
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
#include "slab.h"
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
@@ -94,6 +95,8 @@ int do_swap_account __read_mostly;
|
||||
#define do_swap_account 0
|
||||
#endif
|
||||
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(event_lock);
|
||||
+
|
||||
/* Whether legacy memory+swap accounting is active */
|
||||
static bool do_memsw_account(void)
|
||||
{
|
||||
@@ -4922,12 +4925,12 @@ static int mem_cgroup_move_account(struct page *page,
|
||||
|
||||
ret = 0;
|
||||
|
||||
- local_irq_disable();
|
||||
+ local_lock_irq(event_lock);
|
||||
mem_cgroup_charge_statistics(to, page, compound, nr_pages);
|
||||
memcg_check_events(to, page);
|
||||
mem_cgroup_charge_statistics(from, page, compound, -nr_pages);
|
||||
memcg_check_events(from, page);
|
||||
- local_irq_enable();
|
||||
+ local_unlock_irq(event_lock);
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
out:
|
||||
@@ -6046,10 +6049,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
|
||||
commit_charge(page, memcg, lrucare);
|
||||
|
||||
- local_irq_disable();
|
||||
+ local_lock_irq(event_lock);
|
||||
mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
|
||||
memcg_check_events(memcg, page);
|
||||
- local_irq_enable();
|
||||
+ local_unlock_irq(event_lock);
|
||||
|
||||
if (do_memsw_account() && PageSwapCache(page)) {
|
||||
swp_entry_t entry = { .val = page_private(page) };
|
||||
@@ -6118,7 +6121,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
|
||||
memcg_oom_recover(ug->memcg);
|
||||
}
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
|
||||
@@ -6126,7 +6129,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
|
||||
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
|
||||
__this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
|
||||
memcg_check_events(ug->memcg, ug->dummy_page);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
|
||||
if (!mem_cgroup_is_root(ug->memcg))
|
||||
css_put_many(&ug->memcg->css, nr_pages);
|
||||
@@ -6289,10 +6292,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
|
||||
|
||||
commit_charge(newpage, memcg, false);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
|
||||
memcg_check_events(memcg, newpage);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
|
||||
@@ -6484,6 +6487,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
||||
struct mem_cgroup *memcg, *swap_memcg;
|
||||
unsigned int nr_entries;
|
||||
unsigned short oldid;
|
||||
+ unsigned long flags;
|
||||
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
@@ -6529,13 +6533,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
||||
* important here to have the interrupts disabled because it is the
|
||||
* only synchronisation we have for updating the per-CPU variables.
|
||||
*/
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
VM_BUG_ON(!irqs_disabled());
|
||||
+#endif
|
||||
mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
|
||||
-nr_entries);
|
||||
memcg_check_events(memcg, page);
|
||||
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
css_put_many(&memcg->css, nr_entries);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,202 +0,0 @@
|
||||
From c8eeca08279f6363742db822856cc18ae1b7bdbd Mon Sep 17 00:00:00 2001
|
||||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Tue, 22 Mar 2016 11:16:09 +0100
|
||||
Subject: [PATCH 086/328] mm/zsmalloc: copy with get_cpu_var() and locking
|
||||
|
||||
get_cpu_var() disables preemption and triggers a might_sleep() splat later.
|
||||
This is replaced with get_locked_var().
|
||||
This bitspinlocks are replaced with a proper mutex which requires a slightly
|
||||
larger struct to allocate.
|
||||
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
|
||||
fixed the size magic]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/zsmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++----
|
||||
1 file changed, 74 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
|
||||
index 85cc29c93d93..63e83b47fa99 100644
|
||||
--- a/mm/zsmalloc.c
|
||||
+++ b/mm/zsmalloc.c
|
||||
@@ -56,6 +56,7 @@
|
||||
#include <linux/wait.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/fs.h>
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
#define ZSPAGE_MAGIC 0x58
|
||||
|
||||
@@ -73,9 +74,22 @@
|
||||
*/
|
||||
#define ZS_MAX_ZSPAGE_ORDER 2
|
||||
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
|
||||
-
|
||||
#define ZS_HANDLE_SIZE (sizeof(unsigned long))
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+struct zsmalloc_handle {
|
||||
+ unsigned long addr;
|
||||
+ struct mutex lock;
|
||||
+};
|
||||
+
|
||||
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Object location (<PFN>, <obj_idx>) is encoded as
|
||||
* as single (unsigned long) handle value.
|
||||
@@ -325,7 +339,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
|
||||
|
||||
static int create_cache(struct zs_pool *pool)
|
||||
{
|
||||
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
|
||||
+ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
|
||||
0, 0, NULL);
|
||||
if (!pool->handle_cachep)
|
||||
return 1;
|
||||
@@ -349,10 +363,27 @@ static void destroy_cache(struct zs_pool *pool)
|
||||
|
||||
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
|
||||
{
|
||||
- return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
|
||||
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
|
||||
+ void *p;
|
||||
+
|
||||
+ p = kmem_cache_alloc(pool->handle_cachep,
|
||||
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (p) {
|
||||
+ struct zsmalloc_handle *zh = p;
|
||||
+
|
||||
+ mutex_init(&zh->lock);
|
||||
+ }
|
||||
+#endif
|
||||
+ return (unsigned long)p;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
|
||||
+{
|
||||
+ return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
|
||||
{
|
||||
kmem_cache_free(pool->handle_cachep, (void *)handle);
|
||||
@@ -371,12 +402,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
|
||||
|
||||
static void record_obj(unsigned long handle, unsigned long obj)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ WRITE_ONCE(zh->addr, obj);
|
||||
+#else
|
||||
/*
|
||||
* lsb of @obj represents handle lock while other bits
|
||||
* represent object value the handle is pointing so
|
||||
* updating shouldn't do store tearing.
|
||||
*/
|
||||
WRITE_ONCE(*(unsigned long *)handle, obj);
|
||||
+#endif
|
||||
}
|
||||
|
||||
/* zpool driver */
|
||||
@@ -458,6 +495,7 @@ MODULE_ALIAS("zpool-zsmalloc");
|
||||
|
||||
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
|
||||
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
|
||||
|
||||
static bool is_zspage_isolated(struct zspage *zspage)
|
||||
{
|
||||
@@ -887,7 +925,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
|
||||
|
||||
static unsigned long handle_to_obj(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return zh->addr;
|
||||
+#else
|
||||
return *(unsigned long *)handle;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static unsigned long obj_to_head(struct page *page, void *obj)
|
||||
@@ -901,22 +945,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
|
||||
|
||||
static inline int testpin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_is_locked(&zh->lock);
|
||||
+#else
|
||||
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int trypin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_trylock(&zh->lock);
|
||||
+#else
|
||||
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void pin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_lock(&zh->lock);
|
||||
+#else
|
||||
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void unpin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_unlock(&zh->lock);
|
||||
+#else
|
||||
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void reset_page(struct page *page)
|
||||
@@ -1342,7 +1410,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
|
||||
class = pool->size_class[class_idx];
|
||||
off = (class->size * obj_idx) & ~PAGE_MASK;
|
||||
|
||||
- area = &get_cpu_var(zs_map_area);
|
||||
+ area = &get_locked_var(zs_map_area_lock, zs_map_area);
|
||||
area->vm_mm = mm;
|
||||
if (off + class->size <= PAGE_SIZE) {
|
||||
/* this object is contained entirely within a page */
|
||||
@@ -1396,7 +1464,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
|
||||
|
||||
__zs_unmap_object(area, pages, off, class->size);
|
||||
}
|
||||
- put_cpu_var(zs_map_area);
|
||||
+ put_locked_var(zs_map_area_lock, zs_map_area);
|
||||
|
||||
migrate_read_unlock(zspage);
|
||||
unpin_tag(handle);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 58952b3995a060f4fc7fbc02552ac489639d565e Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 11 Dec 2018 21:53:43 +0100
|
||||
Subject: [PATCH 087/328] x86/mm/pat: disable preemption __split_large_page()
|
||||
after spin_lock()
|
||||
|
||||
Commit "x86/mm/pat: Disable preemption around __flush_tlb_all()" added a
|
||||
warning if __flush_tlb_all() is invoked in preemptible context. On !RT
|
||||
the warning does not trigger because a spin lock is acquired which
|
||||
disables preemption. On RT the spin lock does not disable preemption and
|
||||
so the warning is seen.
|
||||
|
||||
Disable preemption to avoid the warning __flush_tlb_all().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/mm/pageattr.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
|
||||
index 101f3ad0d6ad..0b0396261ca1 100644
|
||||
--- a/arch/x86/mm/pageattr.c
|
||||
+++ b/arch/x86/mm/pageattr.c
|
||||
@@ -687,12 +687,18 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
pgprot_t ref_prot;
|
||||
|
||||
spin_lock(&pgd_lock);
|
||||
+ /*
|
||||
+ * Keep preemption disabled after __flush_tlb_all() which expects not be
|
||||
+ * preempted during the flush of the local TLB.
|
||||
+ */
|
||||
+ preempt_disable();
|
||||
/*
|
||||
* Check for races, another CPU might have split this page
|
||||
* up for us already:
|
||||
*/
|
||||
tmp = _lookup_address_cpa(cpa, address, &level);
|
||||
if (tmp != kpte) {
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
@@ -726,6 +732,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
break;
|
||||
|
||||
default:
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
@@ -764,6 +771,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
* going on.
|
||||
*/
|
||||
__flush_tlb_all();
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,175 +0,0 @@
|
||||
From 7f7e6402ea1895f3d2197122d4379c46a3a7fe14 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 25 Jan 2017 16:34:27 +0100
|
||||
Subject: [PATCH 088/328] radix-tree: use local locks
|
||||
|
||||
The preload functionality uses per-CPU variables and preempt-disable to
|
||||
ensure that it does not switch CPUs during its usage. This patch adds
|
||||
local_locks() instead preempt_disable() for the same purpose and to
|
||||
remain preemptible on -RT.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Reported-and-debugged-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/idr.h | 5 +----
|
||||
include/linux/radix-tree.h | 7 ++-----
|
||||
lib/radix-tree.c | 32 +++++++++++++++++++++++---------
|
||||
3 files changed, 26 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/include/linux/idr.h b/include/linux/idr.h
|
||||
index b6c6151c7446..81c9df5c04fa 100644
|
||||
--- a/include/linux/idr.h
|
||||
+++ b/include/linux/idr.h
|
||||
@@ -169,10 +169,7 @@ static inline bool idr_is_empty(const struct idr *idr)
|
||||
* Each idr_preload() should be matched with an invocation of this
|
||||
* function. See idr_preload() for details.
|
||||
*/
|
||||
-static inline void idr_preload_end(void)
|
||||
-{
|
||||
- preempt_enable();
|
||||
-}
|
||||
+void idr_preload_end(void);
|
||||
|
||||
/**
|
||||
* idr_for_each_entry() - Iterate over an IDR's elements of a given type.
|
||||
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
|
||||
index 34149e8b5f73..affb0fc4c5b6 100644
|
||||
--- a/include/linux/radix-tree.h
|
||||
+++ b/include/linux/radix-tree.h
|
||||
@@ -330,6 +330,8 @@ unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
|
||||
int radix_tree_preload(gfp_t gfp_mask);
|
||||
int radix_tree_maybe_preload(gfp_t gfp_mask);
|
||||
int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
|
||||
+void radix_tree_preload_end(void);
|
||||
+
|
||||
void radix_tree_init(void);
|
||||
void *radix_tree_tag_set(struct radix_tree_root *,
|
||||
unsigned long index, unsigned int tag);
|
||||
@@ -349,11 +351,6 @@ unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
|
||||
unsigned int max_items, unsigned int tag);
|
||||
int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
|
||||
|
||||
-static inline void radix_tree_preload_end(void)
|
||||
-{
|
||||
- preempt_enable();
|
||||
-}
|
||||
-
|
||||
int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
|
||||
int radix_tree_split(struct radix_tree_root *, unsigned long index,
|
||||
unsigned new_order);
|
||||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
|
||||
index e5cab5c4e383..9309e813bc1f 100644
|
||||
--- a/lib/radix-tree.c
|
||||
+++ b/lib/radix-tree.c
|
||||
@@ -38,7 +38,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
-
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
/* Number of nodes in fully populated tree of given height */
|
||||
static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
|
||||
@@ -87,6 +87,7 @@ struct radix_tree_preload {
|
||||
struct radix_tree_node *nodes;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock);
|
||||
|
||||
static inline struct radix_tree_node *entry_to_node(void *ptr)
|
||||
{
|
||||
@@ -405,12 +406,13 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
|
||||
* succeed in getting a node here (and never reach
|
||||
* kmem_cache_alloc)
|
||||
*/
|
||||
- rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
+ rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
|
||||
if (rtp->nr) {
|
||||
ret = rtp->nodes;
|
||||
rtp->nodes = ret->parent;
|
||||
rtp->nr--;
|
||||
}
|
||||
+ put_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
|
||||
/*
|
||||
* Update the allocation stack trace as this is more useful
|
||||
* for debugging.
|
||||
@@ -476,14 +478,14 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
|
||||
*/
|
||||
gfp_mask &= ~__GFP_ACCOUNT;
|
||||
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
while (rtp->nr < nr) {
|
||||
- preempt_enable();
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
|
||||
if (node == NULL)
|
||||
goto out;
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
if (rtp->nr < nr) {
|
||||
node->parent = rtp->nodes;
|
||||
@@ -525,7 +527,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
|
||||
if (gfpflags_allow_blocking(gfp_mask))
|
||||
return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
|
||||
/* Preloading doesn't help anything with this gfp mask, skip it */
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(radix_tree_maybe_preload);
|
||||
@@ -563,7 +565,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
|
||||
|
||||
/* Preloading doesn't help anything with this gfp mask, skip it */
|
||||
if (!gfpflags_allow_blocking(gfp_mask)) {
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -597,6 +599,12 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
|
||||
return __radix_tree_preload(gfp_mask, nr_nodes);
|
||||
}
|
||||
|
||||
+void radix_tree_preload_end(void)
|
||||
+{
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(radix_tree_preload_end);
|
||||
+
|
||||
static unsigned radix_tree_load_root(const struct radix_tree_root *root,
|
||||
struct radix_tree_node **nodep, unsigned long *maxindex)
|
||||
{
|
||||
@@ -2102,10 +2110,16 @@ EXPORT_SYMBOL(radix_tree_tagged);
|
||||
void idr_preload(gfp_t gfp_mask)
|
||||
{
|
||||
if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(idr_preload);
|
||||
|
||||
+void idr_preload_end(void)
|
||||
+{
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(idr_preload_end);
|
||||
+
|
||||
int ida_pre_get(struct ida *ida, gfp_t gfp)
|
||||
{
|
||||
/*
|
||||
@@ -2114,7 +2128,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
|
||||
* to return to the ida_pre_get() step.
|
||||
*/
|
||||
if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
|
||||
- preempt_enable();
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
|
||||
if (!this_cpu_read(ida_bitmap)) {
|
||||
struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,175 +0,0 @@
|
||||
From bb113ab4c9dea8e53db84af84d34864c4f3e9b2d Mon Sep 17 00:00:00 2001
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:34 -0500
|
||||
Subject: [PATCH 089/328] timers: Prepare for full preemption
|
||||
|
||||
When softirqs can be preempted we need to make sure that cancelling
|
||||
the timer from the active thread can not deadlock vs. a running timer
|
||||
callback. Add a waitqueue to resolve that.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/timer.h | 2 +-
|
||||
kernel/sched/core.c | 9 +++++++--
|
||||
kernel/time/timer.c | 45 +++++++++++++++++++++++++++++++++++++++----
|
||||
3 files changed, 49 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/include/linux/timer.h b/include/linux/timer.h
|
||||
index 7b066fd38248..54627d046b3a 100644
|
||||
--- a/include/linux/timer.h
|
||||
+++ b/include/linux/timer.h
|
||||
@@ -172,7 +172,7 @@ extern void add_timer(struct timer_list *timer);
|
||||
|
||||
extern int try_to_del_timer_sync(struct timer_list *timer);
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
extern int del_timer_sync(struct timer_list *timer);
|
||||
#else
|
||||
# define del_timer_sync(t) del_timer(t)
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index e6022cc2605b..986ed04425be 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -498,11 +498,14 @@ void resched_cpu(int cpu)
|
||||
*/
|
||||
int get_nohz_timer_target(void)
|
||||
{
|
||||
- int i, cpu = smp_processor_id();
|
||||
+ int i, cpu;
|
||||
struct sched_domain *sd;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
+ cpu = smp_processor_id();
|
||||
+
|
||||
if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
|
||||
- return cpu;
|
||||
+ goto preempt_en_rt;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd) {
|
||||
@@ -521,6 +524,8 @@ int get_nohz_timer_target(void)
|
||||
cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
+preempt_en_rt:
|
||||
+ preempt_enable_rt();
|
||||
return cpu;
|
||||
}
|
||||
|
||||
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
|
||||
index ae64cb819a9a..9019c9caf146 100644
|
||||
--- a/kernel/time/timer.c
|
||||
+++ b/kernel/time/timer.c
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compat.h>
|
||||
+#include <linux/swait.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
@@ -197,6 +198,9 @@ EXPORT_SYMBOL(jiffies_64);
|
||||
struct timer_base {
|
||||
raw_spinlock_t lock;
|
||||
struct timer_list *running_timer;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct swait_queue_head wait_for_running_timer;
|
||||
+#endif
|
||||
unsigned long clk;
|
||||
unsigned long next_expiry;
|
||||
unsigned int cpu;
|
||||
@@ -1178,6 +1182,33 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_timer_on);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+/*
|
||||
+ * Wait for a running timer
|
||||
+ */
|
||||
+static void wait_for_running_timer(struct timer_list *timer)
|
||||
+{
|
||||
+ struct timer_base *base;
|
||||
+ u32 tf = timer->flags;
|
||||
+
|
||||
+ if (tf & TIMER_MIGRATING)
|
||||
+ return;
|
||||
+
|
||||
+ base = get_timer_base(tf);
|
||||
+ swait_event_exclusive(base->wait_for_running_timer,
|
||||
+ base->running_timer != timer);
|
||||
+}
|
||||
+
|
||||
+# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
|
||||
+#else
|
||||
+static inline void wait_for_running_timer(struct timer_list *timer)
|
||||
+{
|
||||
+ cpu_relax();
|
||||
+}
|
||||
+
|
||||
+# define wakeup_timer_waiters(b) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* del_timer - deactivate a timer.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -1233,7 +1264,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
|
||||
}
|
||||
EXPORT_SYMBOL(try_to_del_timer_sync);
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
/**
|
||||
* del_timer_sync - deactivate a timer and wait for the handler to finish.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -1293,7 +1324,7 @@ int del_timer_sync(struct timer_list *timer)
|
||||
int ret = try_to_del_timer_sync(timer);
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
- cpu_relax();
|
||||
+ wait_for_running_timer(timer);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(del_timer_sync);
|
||||
@@ -1354,13 +1385,16 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
|
||||
|
||||
fn = timer->function;
|
||||
|
||||
- if (timer->flags & TIMER_IRQSAFE) {
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) &&
|
||||
+ timer->flags & TIMER_IRQSAFE) {
|
||||
raw_spin_unlock(&base->lock);
|
||||
call_timer_fn(timer, fn);
|
||||
+ base->running_timer = NULL;
|
||||
raw_spin_lock(&base->lock);
|
||||
} else {
|
||||
raw_spin_unlock_irq(&base->lock);
|
||||
call_timer_fn(timer, fn);
|
||||
+ base->running_timer = NULL;
|
||||
raw_spin_lock_irq(&base->lock);
|
||||
}
|
||||
}
|
||||
@@ -1683,8 +1717,8 @@ static inline void __run_timers(struct timer_base *base)
|
||||
while (levels--)
|
||||
expire_timers(base, heads + levels);
|
||||
}
|
||||
- base->running_timer = NULL;
|
||||
raw_spin_unlock_irq(&base->lock);
|
||||
+ wakeup_timer_waiters(base);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1929,6 +1963,9 @@ static void __init init_timer_cpu(int cpu)
|
||||
base->cpu = cpu;
|
||||
raw_spin_lock_init(&base->lock);
|
||||
base->clk = jiffies;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ init_swait_queue_head(&base->wait_for_running_timer);
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,36 +0,0 @@
|
||||
From c87615728aaaf5a59575f49682ed6339a9cb116f Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 6 Nov 2011 12:26:18 +0100
|
||||
Subject: [PATCH 090/328] x86: kvm Require const tsc for RT
|
||||
|
||||
Non constant TSC is a nightmare on bare metal already, but with
|
||||
virtualization it becomes a complete disaster because the workarounds
|
||||
are horrible latency wise. That's also a preliminary for running RT in
|
||||
a guest on top of a RT host.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/kvm/x86.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
||||
index ade694f94a49..2dfb7c81743e 100644
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -6873,6 +6873,13 @@ int kvm_arch_init(void *opaque)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
+ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
r = kvm_mmu_module_init();
|
||||
if (r)
|
||||
goto out_free_percpu;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,114 +0,0 @@
|
||||
From d46161e1a4fa5ff7b32deb64ac2e7698d0a56e49 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 4 Oct 2017 10:24:23 +0200
|
||||
Subject: [PATCH 091/328] pci/switchtec: Don't use completion's wait queue
|
||||
|
||||
The poll callback is using completion's wait_queue_head_t member and
|
||||
puts it in poll_wait() so the poll() caller gets a wakeup after command
|
||||
completed. This does not work on RT because we don't have a
|
||||
wait_queue_head_t in our completion implementation. Nobody in tree does
|
||||
like that in tree so this is the only driver that breaks.
|
||||
|
||||
Instead of using the completion here is waitqueue with a status flag as
|
||||
suggested by Logan.
|
||||
|
||||
I don't have the HW so I have no idea if it works as expected, so please
|
||||
test it.
|
||||
|
||||
Cc: Kurt Schwemmer <kurt.schwemmer@microsemi.com>
|
||||
Cc: Logan Gunthorpe <logang@deltatee.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/pci/switch/switchtec.c | 22 +++++++++++++---------
|
||||
1 file changed, 13 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
|
||||
index 43431816412c..a8df847bedee 100644
|
||||
--- a/drivers/pci/switch/switchtec.c
|
||||
+++ b/drivers/pci/switch/switchtec.c
|
||||
@@ -43,10 +43,11 @@ struct switchtec_user {
|
||||
|
||||
enum mrpc_state state;
|
||||
|
||||
- struct completion comp;
|
||||
+ wait_queue_head_t cmd_comp;
|
||||
struct kref kref;
|
||||
struct list_head list;
|
||||
|
||||
+ bool cmd_done;
|
||||
u32 cmd;
|
||||
u32 status;
|
||||
u32 return_code;
|
||||
@@ -68,7 +69,7 @@ static struct switchtec_user *stuser_create(struct switchtec_dev *stdev)
|
||||
stuser->stdev = stdev;
|
||||
kref_init(&stuser->kref);
|
||||
INIT_LIST_HEAD(&stuser->list);
|
||||
- init_completion(&stuser->comp);
|
||||
+ init_waitqueue_head(&stuser->cmd_comp);
|
||||
stuser->event_cnt = atomic_read(&stdev->event_cnt);
|
||||
|
||||
dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
|
||||
@@ -147,7 +148,7 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser)
|
||||
kref_get(&stuser->kref);
|
||||
stuser->read_len = sizeof(stuser->data);
|
||||
stuser_set_state(stuser, MRPC_QUEUED);
|
||||
- init_completion(&stuser->comp);
|
||||
+ stuser->cmd_done = false;
|
||||
list_add_tail(&stuser->list, &stdev->mrpc_queue);
|
||||
|
||||
mrpc_cmd_submit(stdev);
|
||||
@@ -184,7 +185,8 @@ static void mrpc_complete_cmd(struct switchtec_dev *stdev)
|
||||
stuser->read_len);
|
||||
|
||||
out:
|
||||
- complete_all(&stuser->comp);
|
||||
+ stuser->cmd_done = true;
|
||||
+ wake_up_interruptible(&stuser->cmd_comp);
|
||||
list_del_init(&stuser->list);
|
||||
stuser_put(stuser);
|
||||
stdev->mrpc_busy = 0;
|
||||
@@ -454,10 +456,11 @@ static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
|
||||
mutex_unlock(&stdev->mrpc_mutex);
|
||||
|
||||
if (filp->f_flags & O_NONBLOCK) {
|
||||
- if (!try_wait_for_completion(&stuser->comp))
|
||||
+ if (!READ_ONCE(stuser->cmd_done))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
- rc = wait_for_completion_interruptible(&stuser->comp);
|
||||
+ rc = wait_event_interruptible(stuser->cmd_comp,
|
||||
+ stuser->cmd_done);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
@@ -505,7 +508,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
|
||||
struct switchtec_dev *stdev = stuser->stdev;
|
||||
__poll_t ret = 0;
|
||||
|
||||
- poll_wait(filp, &stuser->comp.wait, wait);
|
||||
+ poll_wait(filp, &stuser->cmd_comp, wait);
|
||||
poll_wait(filp, &stdev->event_wq, wait);
|
||||
|
||||
if (lock_mutex_and_test_alive(stdev))
|
||||
@@ -513,7 +516,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
|
||||
|
||||
mutex_unlock(&stdev->mrpc_mutex);
|
||||
|
||||
- if (try_wait_for_completion(&stuser->comp))
|
||||
+ if (READ_ONCE(stuser->cmd_done))
|
||||
ret |= EPOLLIN | EPOLLRDNORM;
|
||||
|
||||
if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
|
||||
@@ -1037,7 +1040,8 @@ static void stdev_kill(struct switchtec_dev *stdev)
|
||||
|
||||
/* Wake up and kill any users waiting on an MRPC request */
|
||||
list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
|
||||
- complete_all(&stuser->comp);
|
||||
+ stuser->cmd_done = true;
|
||||
+ wake_up_interruptible(&stuser->cmd_comp);
|
||||
list_del_init(&stuser->list);
|
||||
stuser_put(stuser);
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,41 +0,0 @@
|
||||
From 5048f6148f091b822260d482639172336a66cbc3 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 28 Oct 2013 12:19:57 +0100
|
||||
Subject: [PATCH 092/328] wait.h: include atomic.h
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
| CC init/main.o
|
||||
|In file included from include/linux/mmzone.h:9:0,
|
||||
| from include/linux/gfp.h:4,
|
||||
| from include/linux/kmod.h:22,
|
||||
| from include/linux/module.h:13,
|
||||
| from init/main.c:15:
|
||||
|include/linux/wait.h: In function ‘wait_on_atomic_t’:
|
||||
|include/linux/wait.h:982:2: error: implicit declaration of function ‘atomic_read’ [-Werror=implicit-function-declaration]
|
||||
| if (atomic_read(val) == 0)
|
||||
| ^
|
||||
|
||||
This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/wait.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/include/linux/wait.h b/include/linux/wait.h
|
||||
index ed7c122cb31f..2b5ef8e94d19 100644
|
||||
--- a/include/linux/wait.h
|
||||
+++ b/include/linux/wait.h
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <asm/current.h>
|
||||
#include <uapi/linux/wait.h>
|
||||
+#include <linux/atomic.h>
|
||||
|
||||
typedef struct wait_queue_entry wait_queue_entry_t;
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,245 +0,0 @@
|
||||
From 370c2439db620266b1bb104cc624841eec515e5c Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Wagner <daniel.wagner@bmw-carit.de>
|
||||
Date: Fri, 11 Jul 2014 15:26:11 +0200
|
||||
Subject: [PATCH 093/328] work-simple: Simple work queue implemenation
|
||||
|
||||
Provides a framework for enqueuing callbacks from irq context
|
||||
PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
|
||||
|
||||
Bases on wait-simple.
|
||||
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
|
||||
---
|
||||
include/linux/swork.h | 24 ++++++
|
||||
kernel/sched/Makefile | 2 +-
|
||||
kernel/sched/swork.c | 173 ++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 198 insertions(+), 1 deletion(-)
|
||||
create mode 100644 include/linux/swork.h
|
||||
create mode 100644 kernel/sched/swork.c
|
||||
|
||||
diff --git a/include/linux/swork.h b/include/linux/swork.h
|
||||
new file mode 100644
|
||||
index 000000000000..f175fa9a6016
|
||||
--- /dev/null
|
||||
+++ b/include/linux/swork.h
|
||||
@@ -0,0 +1,24 @@
|
||||
+#ifndef _LINUX_SWORK_H
|
||||
+#define _LINUX_SWORK_H
|
||||
+
|
||||
+#include <linux/list.h>
|
||||
+
|
||||
+struct swork_event {
|
||||
+ struct list_head item;
|
||||
+ unsigned long flags;
|
||||
+ void (*func)(struct swork_event *);
|
||||
+};
|
||||
+
|
||||
+static inline void INIT_SWORK(struct swork_event *event,
|
||||
+ void (*func)(struct swork_event *))
|
||||
+{
|
||||
+ event->flags = 0;
|
||||
+ event->func = func;
|
||||
+}
|
||||
+
|
||||
+bool swork_queue(struct swork_event *sev);
|
||||
+
|
||||
+int swork_get(void);
|
||||
+void swork_put(void);
|
||||
+
|
||||
+#endif /* _LINUX_SWORK_H */
|
||||
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
|
||||
index 7fe183404c38..2b765aa4e2c4 100644
|
||||
--- a/kernel/sched/Makefile
|
||||
+++ b/kernel/sched/Makefile
|
||||
@@ -18,7 +18,7 @@ endif
|
||||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle.o fair.o rt.o deadline.o
|
||||
-obj-y += wait.o wait_bit.o swait.o completion.o
|
||||
+obj-y += wait.o wait_bit.o swait.o swork.o completion.o
|
||||
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
||||
diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c
|
||||
new file mode 100644
|
||||
index 000000000000..a5b89fdacf19
|
||||
--- /dev/null
|
||||
+++ b/kernel/sched/swork.c
|
||||
@@ -0,0 +1,173 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
|
||||
+ *
|
||||
+ * Provides a framework for enqueuing callbacks from irq context
|
||||
+ * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/swait.h>
|
||||
+#include <linux/swork.h>
|
||||
+#include <linux/kthread.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/spinlock.h>
|
||||
+#include <linux/export.h>
|
||||
+
|
||||
+#define SWORK_EVENT_PENDING (1 << 0)
|
||||
+
|
||||
+static DEFINE_MUTEX(worker_mutex);
|
||||
+static struct sworker *glob_worker;
|
||||
+
|
||||
+struct sworker {
|
||||
+ struct list_head events;
|
||||
+ struct swait_queue_head wq;
|
||||
+
|
||||
+ raw_spinlock_t lock;
|
||||
+
|
||||
+ struct task_struct *task;
|
||||
+ int refs;
|
||||
+};
|
||||
+
|
||||
+static bool swork_readable(struct sworker *worker)
|
||||
+{
|
||||
+ bool r;
|
||||
+
|
||||
+ if (kthread_should_stop())
|
||||
+ return true;
|
||||
+
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ r = !list_empty(&worker->events);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+static int swork_kthread(void *arg)
|
||||
+{
|
||||
+ struct sworker *worker = arg;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ swait_event_interruptible_exclusive(worker->wq,
|
||||
+ swork_readable(worker));
|
||||
+ if (kthread_should_stop())
|
||||
+ break;
|
||||
+
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ while (!list_empty(&worker->events)) {
|
||||
+ struct swork_event *sev;
|
||||
+
|
||||
+ sev = list_first_entry(&worker->events,
|
||||
+ struct swork_event, item);
|
||||
+ list_del(&sev->item);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+
|
||||
+ WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
|
||||
+ &sev->flags));
|
||||
+ sev->func(sev);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ }
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct sworker *swork_create(void)
|
||||
+{
|
||||
+ struct sworker *worker;
|
||||
+
|
||||
+ worker = kzalloc(sizeof(*worker), GFP_KERNEL);
|
||||
+ if (!worker)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ INIT_LIST_HEAD(&worker->events);
|
||||
+ raw_spin_lock_init(&worker->lock);
|
||||
+ init_swait_queue_head(&worker->wq);
|
||||
+
|
||||
+ worker->task = kthread_run(swork_kthread, worker, "kswork");
|
||||
+ if (IS_ERR(worker->task)) {
|
||||
+ kfree(worker);
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+ }
|
||||
+
|
||||
+ return worker;
|
||||
+}
|
||||
+
|
||||
+static void swork_destroy(struct sworker *worker)
|
||||
+{
|
||||
+ kthread_stop(worker->task);
|
||||
+
|
||||
+ WARN_ON(!list_empty(&worker->events));
|
||||
+ kfree(worker);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * swork_queue - queue swork
|
||||
+ *
|
||||
+ * Returns %false if @work was already on a queue, %true otherwise.
|
||||
+ *
|
||||
+ * The work is queued and processed on a random CPU
|
||||
+ */
|
||||
+bool swork_queue(struct swork_event *sev)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
|
||||
+ return false;
|
||||
+
|
||||
+ raw_spin_lock_irqsave(&glob_worker->lock, flags);
|
||||
+ list_add_tail(&sev->item, &glob_worker->events);
|
||||
+ raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
|
||||
+
|
||||
+ swake_up_one(&glob_worker->wq);
|
||||
+ return true;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_queue);
|
||||
+
|
||||
+/**
|
||||
+ * swork_get - get an instance of the sworker
|
||||
+ *
|
||||
+ * Returns an negative error code if the initialization if the worker did not
|
||||
+ * work, %0 otherwise.
|
||||
+ *
|
||||
+ */
|
||||
+int swork_get(void)
|
||||
+{
|
||||
+ struct sworker *worker;
|
||||
+
|
||||
+ mutex_lock(&worker_mutex);
|
||||
+ if (!glob_worker) {
|
||||
+ worker = swork_create();
|
||||
+ if (IS_ERR(worker)) {
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ glob_worker = worker;
|
||||
+ }
|
||||
+
|
||||
+ glob_worker->refs++;
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_get);
|
||||
+
|
||||
+/**
|
||||
+ * swork_put - puts an instance of the sworker
|
||||
+ *
|
||||
+ * Will destroy the sworker thread. This function must not be called until all
|
||||
+ * queued events have been completed.
|
||||
+ */
|
||||
+void swork_put(void)
|
||||
+{
|
||||
+ mutex_lock(&worker_mutex);
|
||||
+
|
||||
+ glob_worker->refs--;
|
||||
+ if (glob_worker->refs > 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ swork_destroy(glob_worker);
|
||||
+ glob_worker = NULL;
|
||||
+out:
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_put);
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,36 +0,0 @@
|
||||
From 8c88098a7081d7cd354fb9e2a64598e6e10ce525 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 10 Sep 2018 18:00:31 +0200
|
||||
Subject: [PATCH 094/328] work-simple: drop a shit statement in
|
||||
SWORK_EVENT_PENDING
|
||||
|
||||
Dan Carpenter reported
|
||||
| smatch warnings:
|
||||
|kernel/sched/swork.c:63 swork_kthread() warn: test_bit() takes a bit number
|
||||
|
||||
This is not a bug because we shift by zero (and use the same value in
|
||||
both places).
|
||||
Nevertheless I'm dropping that shift by zero to keep smatch quiet.
|
||||
|
||||
Cc: Daniel Wagner <daniel.wagner@siemens.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/swork.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c
|
||||
index a5b89fdacf19..c90d14b9b126 100644
|
||||
--- a/kernel/sched/swork.c
|
||||
+++ b/kernel/sched/swork.c
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
-#define SWORK_EVENT_PENDING (1 << 0)
|
||||
+#define SWORK_EVENT_PENDING 1
|
||||
|
||||
static DEFINE_MUTEX(worker_mutex);
|
||||
static struct sworker *glob_worker;
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,390 +0,0 @@
|
||||
From ae24940034c02ed671e3a5cc9c4cf31ebfc24fed Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 11 Jan 2013 11:23:51 +0100
|
||||
Subject: [PATCH 095/328] completion: Use simple wait queues
|
||||
|
||||
Completions have no long lasting callbacks and therefor do not need
|
||||
the complex waitqueue variant. Use simple waitqueues which reduces the
|
||||
contention on the waitqueue lock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/powerpc/platforms/ps3/device-init.c | 4 +--
|
||||
.../wireless/intersil/orinoco/orinoco_usb.c | 4 +--
|
||||
drivers/usb/gadget/function/f_fs.c | 2 +-
|
||||
drivers/usb/gadget/legacy/inode.c | 4 +--
|
||||
include/linux/completion.h | 8 ++---
|
||||
include/linux/suspend.h | 6 ++++
|
||||
include/linux/swait.h | 2 ++
|
||||
kernel/power/hibernate.c | 7 ++++
|
||||
kernel/power/suspend.c | 4 +++
|
||||
kernel/sched/completion.c | 34 +++++++++----------
|
||||
kernel/sched/core.c | 10 ++++--
|
||||
kernel/sched/swait.c | 21 +++++++++++-
|
||||
12 files changed, 75 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
|
||||
index e7075aaff1bb..1580464a9d5b 100644
|
||||
--- a/arch/powerpc/platforms/ps3/device-init.c
|
||||
+++ b/arch/powerpc/platforms/ps3/device-init.c
|
||||
@@ -752,8 +752,8 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
|
||||
}
|
||||
pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
|
||||
|
||||
- res = wait_event_interruptible(dev->done.wait,
|
||||
- dev->done.done || kthread_should_stop());
|
||||
+ res = swait_event_interruptible_exclusive(dev->done.wait,
|
||||
+ dev->done.done || kthread_should_stop());
|
||||
if (kthread_should_stop())
|
||||
res = -EINTR;
|
||||
if (res) {
|
||||
diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
|
||||
index b704e4bce171..c364abaac548 100644
|
||||
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
|
||||
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
|
||||
@@ -697,8 +697,8 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv,
|
||||
while (!ctx->done.done && msecs--)
|
||||
udelay(1000);
|
||||
} else {
|
||||
- wait_event_interruptible(ctx->done.wait,
|
||||
- ctx->done.done);
|
||||
+ swait_event_interruptible_exclusive(ctx->done.wait,
|
||||
+ ctx->done.done);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
|
||||
index 2050993fb58b..e2ca75a6e241 100644
|
||||
--- a/drivers/usb/gadget/function/f_fs.c
|
||||
+++ b/drivers/usb/gadget/function/f_fs.c
|
||||
@@ -1626,7 +1626,7 @@ static void ffs_data_put(struct ffs_data *ffs)
|
||||
pr_info("%s(): freeing\n", __func__);
|
||||
ffs_data_clear(ffs);
|
||||
BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
|
||||
- waitqueue_active(&ffs->ep0req_completion.wait) ||
|
||||
+ swait_active(&ffs->ep0req_completion.wait) ||
|
||||
waitqueue_active(&ffs->wait));
|
||||
destroy_workqueue(ffs->io_completion_wq);
|
||||
kfree(ffs->dev_name);
|
||||
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
|
||||
index 37ca0e669bd8..56a16587b221 100644
|
||||
--- a/drivers/usb/gadget/legacy/inode.c
|
||||
+++ b/drivers/usb/gadget/legacy/inode.c
|
||||
@@ -343,7 +343,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
if (likely (value == 0)) {
|
||||
- value = wait_event_interruptible (done.wait, done.done);
|
||||
+ value = swait_event_interruptible_exclusive(done.wait, done.done);
|
||||
if (value != 0) {
|
||||
spin_lock_irq (&epdata->dev->lock);
|
||||
if (likely (epdata->ep != NULL)) {
|
||||
@@ -352,7 +352,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
|
||||
usb_ep_dequeue (epdata->ep, epdata->req);
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
- wait_event (done.wait, done.done);
|
||||
+ swait_event_exclusive(done.wait, done.done);
|
||||
if (epdata->status == -ECONNRESET)
|
||||
epdata->status = -EINTR;
|
||||
} else {
|
||||
diff --git a/include/linux/completion.h b/include/linux/completion.h
|
||||
index 519e94915d18..bf8e77001f18 100644
|
||||
--- a/include/linux/completion.h
|
||||
+++ b/include/linux/completion.h
|
||||
@@ -9,7 +9,7 @@
|
||||
* See kernel/sched/completion.c for details.
|
||||
*/
|
||||
|
||||
-#include <linux/wait.h>
|
||||
+#include <linux/swait.h>
|
||||
|
||||
/*
|
||||
* struct completion - structure used to maintain state for a "completion"
|
||||
@@ -25,7 +25,7 @@
|
||||
*/
|
||||
struct completion {
|
||||
unsigned int done;
|
||||
- wait_queue_head_t wait;
|
||||
+ struct swait_queue_head wait;
|
||||
};
|
||||
|
||||
#define init_completion_map(x, m) __init_completion(x)
|
||||
@@ -34,7 +34,7 @@ static inline void complete_acquire(struct completion *x) {}
|
||||
static inline void complete_release(struct completion *x) {}
|
||||
|
||||
#define COMPLETION_INITIALIZER(work) \
|
||||
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
|
||||
+ { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
|
||||
|
||||
#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
|
||||
(*({ init_completion_map(&(work), &(map)); &(work); }))
|
||||
@@ -85,7 +85,7 @@ static inline void complete_release(struct completion *x) {}
|
||||
static inline void __init_completion(struct completion *x)
|
||||
{
|
||||
x->done = 0;
|
||||
- init_waitqueue_head(&x->wait);
|
||||
+ init_swait_queue_head(&x->wait);
|
||||
}
|
||||
|
||||
/**
|
||||
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
|
||||
index 3f529ad9a9d2..328439ce71f5 100644
|
||||
--- a/include/linux/suspend.h
|
||||
+++ b/include/linux/suspend.h
|
||||
@@ -196,6 +196,12 @@ struct platform_s2idle_ops {
|
||||
void (*end)(void);
|
||||
};
|
||||
|
||||
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
|
||||
+extern bool pm_in_action;
|
||||
+#else
|
||||
+# define pm_in_action false
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_SUSPEND
|
||||
extern suspend_state_t mem_sleep_current;
|
||||
extern suspend_state_t mem_sleep_default;
|
||||
diff --git a/include/linux/swait.h b/include/linux/swait.h
|
||||
index 73e06e9986d4..f426a0661aa0 100644
|
||||
--- a/include/linux/swait.h
|
||||
+++ b/include/linux/swait.h
|
||||
@@ -160,7 +160,9 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
|
||||
extern void swake_up_one(struct swait_queue_head *q);
|
||||
extern void swake_up_all(struct swait_queue_head *q);
|
||||
extern void swake_up_locked(struct swait_queue_head *q);
|
||||
+extern void swake_up_all_locked(struct swait_queue_head *q);
|
||||
|
||||
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
|
||||
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
|
||||
|
||||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
|
||||
index f5ce9f7ec132..0f00ba01376f 100644
|
||||
--- a/kernel/power/hibernate.c
|
||||
+++ b/kernel/power/hibernate.c
|
||||
@@ -690,6 +690,10 @@ static int load_image_and_restore(void)
|
||||
return error;
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_SUSPEND
|
||||
+bool pm_in_action;
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* hibernate - Carry out system hibernation, including saving the image.
|
||||
*/
|
||||
@@ -703,6 +707,8 @@ int hibernate(void)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
+ pm_in_action = true;
|
||||
+
|
||||
lock_system_sleep();
|
||||
/* The snapshot device should not be opened while we're running */
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
@@ -781,6 +787,7 @@ int hibernate(void)
|
||||
atomic_inc(&snapshot_device_available);
|
||||
Unlock:
|
||||
unlock_system_sleep();
|
||||
+ pm_in_action = false;
|
||||
pr_info("hibernation exit\n");
|
||||
|
||||
return error;
|
||||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
|
||||
index 0bd595a0b610..a4456772d98e 100644
|
||||
--- a/kernel/power/suspend.c
|
||||
+++ b/kernel/power/suspend.c
|
||||
@@ -600,6 +600,8 @@ static int enter_state(suspend_state_t state)
|
||||
return error;
|
||||
}
|
||||
|
||||
+bool pm_in_action;
|
||||
+
|
||||
/**
|
||||
* pm_suspend - Externally visible function for suspending the system.
|
||||
* @state: System sleep state to enter.
|
||||
@@ -614,6 +616,7 @@ int pm_suspend(suspend_state_t state)
|
||||
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
+ pm_in_action = true;
|
||||
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
|
||||
error = enter_state(state);
|
||||
if (error) {
|
||||
@@ -623,6 +626,7 @@ int pm_suspend(suspend_state_t state)
|
||||
suspend_stats.success++;
|
||||
}
|
||||
pr_info("suspend exit\n");
|
||||
+ pm_in_action = false;
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(pm_suspend);
|
||||
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
|
||||
index a1ad5b7d5521..755a58084978 100644
|
||||
--- a/kernel/sched/completion.c
|
||||
+++ b/kernel/sched/completion.c
|
||||
@@ -29,12 +29,12 @@ void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
|
||||
if (x->done != UINT_MAX)
|
||||
x->done++;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ swake_up_locked(&x->wait);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
@@ -58,10 +58,10 @@ void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done = UINT_MAX;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ swake_up_all_locked(&x->wait);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete_all);
|
||||
|
||||
@@ -70,20 +70,20 @@ do_wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
if (!x->done) {
|
||||
- DECLARE_WAITQUEUE(wait, current);
|
||||
+ DECLARE_SWAITQUEUE(wait);
|
||||
|
||||
- __add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
|
||||
+ __prepare_to_swait(&x->wait, &wait);
|
||||
do {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
__set_current_state(state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
timeout = action(timeout);
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
} while (!x->done && timeout);
|
||||
- __remove_wait_queue(&x->wait, &wait);
|
||||
+ __finish_swait(&x->wait, &wait);
|
||||
if (!x->done)
|
||||
return timeout;
|
||||
}
|
||||
@@ -100,9 +100,9 @@ __wait_for_common(struct completion *x,
|
||||
|
||||
complete_acquire(x);
|
||||
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
timeout = do_wait_for_common(x, action, timeout, state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
|
||||
complete_release(x);
|
||||
|
||||
@@ -291,12 +291,12 @@ bool try_wait_for_completion(struct completion *x)
|
||||
if (!READ_ONCE(x->done))
|
||||
return false;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = false;
|
||||
else if (x->done != UINT_MAX)
|
||||
x->done--;
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(try_wait_for_completion);
|
||||
@@ -322,8 +322,8 @@ bool completion_done(struct completion *x)
|
||||
* otherwise we can end up freeing the completion before complete()
|
||||
* is done referencing it.
|
||||
*/
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 986ed04425be..584978640512 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -7154,7 +7154,10 @@ void migrate_disable(void)
|
||||
return;
|
||||
}
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (p->migrate_disable) {
|
||||
@@ -7184,7 +7187,10 @@ void migrate_enable(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
WARN_ON_ONCE(p->migrate_disable <= 0);
|
||||
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
|
||||
index 66b59ac77c22..c7cb30cdd1b7 100644
|
||||
--- a/kernel/sched/swait.c
|
||||
+++ b/kernel/sched/swait.c
|
||||
@@ -32,6 +32,25 @@ void swake_up_locked(struct swait_queue_head *q)
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_locked);
|
||||
|
||||
+void swake_up_all_locked(struct swait_queue_head *q)
|
||||
+{
|
||||
+ struct swait_queue *curr;
|
||||
+ int wakes = 0;
|
||||
+
|
||||
+ while (!list_empty(&q->task_list)) {
|
||||
+
|
||||
+ curr = list_first_entry(&q->task_list, typeof(*curr),
|
||||
+ task_list);
|
||||
+ wake_up_process(curr->task);
|
||||
+ list_del_init(&curr->task_list);
|
||||
+ wakes++;
|
||||
+ }
|
||||
+ if (pm_in_action)
|
||||
+ return;
|
||||
+ WARN(wakes > 2, "complete_all() with %d waiters\n", wakes);
|
||||
+}
|
||||
+EXPORT_SYMBOL(swake_up_all_locked);
|
||||
+
|
||||
void swake_up_one(struct swait_queue_head *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -69,7 +88,7 @@ void swake_up_all(struct swait_queue_head *q)
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_all);
|
||||
|
||||
-static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
wait->task = current;
|
||||
if (list_empty(&wait->task_list))
|
||||
--
|
||||
2.25.1
|
||||
|
@ -1,88 +0,0 @@
|
||||
From 4ab27b1ec5f678a5dd444c6e1d3cdff6eeabfa12 Mon Sep 17 00:00:00 2001
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 16 Feb 2015 18:49:10 +0100
|
||||
Subject: [PATCH 096/328] fs/aio: simple simple work
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
|
||||
|2 locks held by rcuos/2/26:
|
||||
| #0: (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
|
||||
| #1: (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
|
||||
|Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
|Call Trace:
|
||||
| [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
|
||||
| [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
|
||||
| [<ffffffff81589304>] rt_spin_lock+0x24/0x70
|
||||
| [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
|
||||
| [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
|
||||
| [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
| [<ffffffff8106e046>] kthread+0xd6/0xf0
|
||||
| [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
replace this preempt_disable() friendly swork.
|
||||
|
||||
Reported-By: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Suggested-by: Benjamin LaHaise <bcrl@kvack.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/aio.c | 15 +++++++++++++--
|
||||
1 file changed, 13 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/fs/aio.c b/fs/aio.c
|
||||
index b5fbf2061868..93f8cf7fdeab 100644
|
||||
--- a/fs/aio.c
|
||||
+++ b/fs/aio.c
|
||||
@@ -42,6 +42,7 @@
|
||||
#include <linux/ramfs.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/mount.h>
|
||||
+#include <linux/swork.h>
|
||||
|
||||
#include <asm/kmap_types.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -121,6 +122,7 @@ struct kioctx {
|
||||
long nr_pages;
|
||||
|
||||
struct rcu_work free_rwork; /* see free_ioctx() */
|
||||
+ struct swork_event free_swork; /* see free_ioctx() */
|
||||
|
||||
/*
|
||||
* signals when all in-flight requests are done
|
||||
@@ -265,6 +267,7 @@ static int __init aio_setup(void)
|
||||
.mount = aio_mount,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
+ BUG_ON(swork_get());
|
||||
aio_mnt = kern_mount(&aio_fs);
|
||||
if (IS_ERR(aio_mnt))
|
||||
panic("Failed to create aio fs mount.");
|
||||
@@ -606,9 +609,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
|
||||
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
|
||||
* now it's safe to cancel any that need to be.
|
||||
*/
|
||||
-static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+static void free_ioctx_users_work(struct swork_event *sev)
|
||||
{
|
||||
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_swork);
|
||||
struct aio_kiocb *req;
|
||||
|
||||
spin_lock_irq(&ctx->ctx_lock);
|
||||
@@ -626,6 +629,14 @@ static void free_ioctx_users(struct percpu_ref *ref)
|
||||
percpu_ref_put(&ctx->reqs);
|
||||
}
|
||||
|
||||
+static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+{
|
||||
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+
|
||||
+ INIT_SWORK(&ctx->free_swork, free_ioctx_users_work);
|
||||
+ swork_queue(&ctx->free_swork);
|
||||
+}
|
||||
+
|
||||
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
|
||||
{
|
||||
unsigned i, new_nr;
|
||||
--
|
||||
2.25.1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user