mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-11-02 06:57:48 +00:00
enable 4.19.x-rt with preempt-rt Linux 4.19.15
Signed-off-by: Tiejun Chen <tiejun.china@gmail.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
kernel:
|
||||
image: linuxkit/kernel:4.14.87-rt
|
||||
image: linuxkit/kernel:4.19.25-rt
|
||||
cmdline: "console=tty0"
|
||||
init:
|
||||
- linuxkit/init:a2166a6048ce041eebe005ab99454cfdeaa5c848
|
||||
|
||||
@@ -265,12 +265,14 @@ $(eval $(call kernel,4.20.13,4.20.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.19.26,4.19.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.14.104,4.14.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.14.104,4.14.x,,-dbg))
|
||||
$(eval $(call kernel,4.19.25,4.19.x,-rt,))
|
||||
$(eval $(call kernel,4.9.161,4.9.x,$(EXTRA),$(DEBUG)))
|
||||
|
||||
else ifeq ($(ARCH),aarch64)
|
||||
$(eval $(call kernel,4.20.13,4.20.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.19.26,4.19.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.14.104,4.14.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.19.25,4.19.x,-rt,))
|
||||
|
||||
else ifeq ($(ARCH),s390x)
|
||||
$(eval $(call kernel,4.20.13,4.20.x,$(EXTRA),$(DEBUG)))
|
||||
|
||||
20
kernel/config-4.19.x-aarch64-rt
Normal file
20
kernel/config-4.19.x-aarch64-rt
Normal file
@@ -0,0 +1,20 @@
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
||||
CONFIG_HZ_1000=y
|
||||
CONFIG_HZ=1000
|
||||
22
kernel/config-4.19.x-x86_64-rt
Normal file
22
kernel/config-4.19.x-x86_64-rt
Normal file
@@ -0,0 +1,22 @@
|
||||
CONFIG_RWSEM_GENERIC_SPINLOCK=y
|
||||
# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
|
||||
CONFIG_PREEMPT_RCU=y
|
||||
CONFIG_TASKS_RCU=y
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
||||
@@ -0,0 +1,202 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:18 +0200
|
||||
Subject: [PATCH 1/7] ARM: at91: add TCB registers definitions
|
||||
|
||||
Add registers and bits definitions for the timer counter blocks found on
|
||||
Atmel ARM SoCs.
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/soc/at91/atmel_tcb.h | 183 +++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 183 insertions(+)
|
||||
create mode 100644 include/soc/at91/atmel_tcb.h
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/soc/at91/atmel_tcb.h
|
||||
@@ -0,0 +1,183 @@
|
||||
+//SPDX-License-Identifier: GPL-2.0
|
||||
+/* Copyright (C) 2018 Microchip */
|
||||
+
|
||||
+#ifndef __SOC_ATMEL_TCB_H
|
||||
+#define __SOC_ATMEL_TCB_H
|
||||
+
|
||||
+/* Channel registers */
|
||||
+#define ATMEL_TC_COFFS(c) ((c) * 0x40)
|
||||
+#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c)
|
||||
+#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4)
|
||||
+#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8)
|
||||
+#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc)
|
||||
+#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10)
|
||||
+#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14)
|
||||
+#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18)
|
||||
+#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c)
|
||||
+#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20)
|
||||
+#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24)
|
||||
+#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28)
|
||||
+#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c)
|
||||
+#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30)
|
||||
+
|
||||
+/* Block registers */
|
||||
+#define ATMEL_TC_BCR 0xc0
|
||||
+#define ATMEL_TC_BMR 0xc4
|
||||
+#define ATMEL_TC_QIER 0xc8
|
||||
+#define ATMEL_TC_QIDR 0xcc
|
||||
+#define ATMEL_TC_QIMR 0xd0
|
||||
+#define ATMEL_TC_QISR 0xd4
|
||||
+#define ATMEL_TC_FMR 0xd8
|
||||
+#define ATMEL_TC_WPMR 0xe4
|
||||
+
|
||||
+/* CCR fields */
|
||||
+#define ATMEL_TC_CCR_CLKEN BIT(0)
|
||||
+#define ATMEL_TC_CCR_CLKDIS BIT(1)
|
||||
+#define ATMEL_TC_CCR_SWTRG BIT(2)
|
||||
+
|
||||
+/* Common CMR fields */
|
||||
+#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0)
|
||||
+#define ATMEL_TC_CMR_TCLK(x) (x)
|
||||
+#define ATMEL_TC_CMR_XC(x) ((x) + 5)
|
||||
+#define ATMEL_TC_CMR_CLKI BIT(3)
|
||||
+#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4)
|
||||
+#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4)
|
||||
+#define ATMEL_TC_CMR_WAVE BIT(15)
|
||||
+
|
||||
+/* Capture mode CMR fields */
|
||||
+#define ATMEL_TC_CMR_LDBSTOP BIT(6)
|
||||
+#define ATMEL_TC_CMR_LDBDIS BIT(7)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8)
|
||||
+#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8)
|
||||
+#define ATMEL_TC_CMR_ABETRG BIT(10)
|
||||
+#define ATMEL_TC_CMR_CPCTRG BIT(14)
|
||||
+#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_NONE (0 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_RISING (1 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16)
|
||||
+#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_NONE (0 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_RISING (1 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18)
|
||||
+#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18)
|
||||
+#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20)
|
||||
+#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20)
|
||||
+
|
||||
+/* Waveform mode CMR fields */
|
||||
+#define ATMEL_TC_CMR_CPCSTOP BIT(6)
|
||||
+#define ATMEL_TC_CMR_CPCDIS BIT(7)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8)
|
||||
+#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10)
|
||||
+#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10)
|
||||
+#define ATMEL_TC_CMR_ENETRG BIT(12)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13)
|
||||
+#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13)
|
||||
+#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16)
|
||||
+#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16)
|
||||
+#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18)
|
||||
+#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18)
|
||||
+#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20)
|
||||
+#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20)
|
||||
+#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22)
|
||||
+#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22)
|
||||
+#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24)
|
||||
+#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24)
|
||||
+#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26)
|
||||
+#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26)
|
||||
+#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28)
|
||||
+#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28)
|
||||
+#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30)
|
||||
+#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30)
|
||||
+#define ATMEL_TC_CMR_ACTION_NONE 0
|
||||
+#define ATMEL_TC_CMR_ACTION_SET 1
|
||||
+#define ATMEL_TC_CMR_ACTION_CLEAR 2
|
||||
+#define ATMEL_TC_CMR_ACTION_TOGGLE 3
|
||||
+
|
||||
+/* SMMR fields */
|
||||
+#define ATMEL_TC_SMMR_GCEN BIT(0)
|
||||
+#define ATMEL_TC_SMMR_DOWN BIT(1)
|
||||
+
|
||||
+/* SR/IER/IDR/IMR fields */
|
||||
+#define ATMEL_TC_COVFS BIT(0)
|
||||
+#define ATMEL_TC_LOVRS BIT(1)
|
||||
+#define ATMEL_TC_CPAS BIT(2)
|
||||
+#define ATMEL_TC_CPBS BIT(3)
|
||||
+#define ATMEL_TC_CPCS BIT(4)
|
||||
+#define ATMEL_TC_LDRAS BIT(5)
|
||||
+#define ATMEL_TC_LDRBS BIT(6)
|
||||
+#define ATMEL_TC_ETRGS BIT(7)
|
||||
+#define ATMEL_TC_CLKSTA BIT(16)
|
||||
+#define ATMEL_TC_MTIOA BIT(17)
|
||||
+#define ATMEL_TC_MTIOB BIT(18)
|
||||
+
|
||||
+/* EMR fields */
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0
|
||||
+#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4)
|
||||
+#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4)
|
||||
+#define ATMEL_TC_EMR_NOCLKDIV BIT(8)
|
||||
+
|
||||
+/* BCR fields */
|
||||
+#define ATMEL_TC_BCR_SYNC BIT(0)
|
||||
+
|
||||
+/* BMR fields */
|
||||
+#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2)
|
||||
+#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c)))
|
||||
+#define ATMEL_TC_BMR_QDEN BIT(8)
|
||||
+#define ATMEL_TC_BMR_POSEN BIT(9)
|
||||
+#define ATMEL_TC_BMR_SPEEDEN BIT(10)
|
||||
+#define ATMEL_TC_BMR_QDTRANS BIT(11)
|
||||
+#define ATMEL_TC_BMR_EDGPHA BIT(12)
|
||||
+#define ATMEL_TC_BMR_INVA BIT(13)
|
||||
+#define ATMEL_TC_BMR_INVB BIT(14)
|
||||
+#define ATMEL_TC_BMR_INVIDX BIT(15)
|
||||
+#define ATMEL_TC_BMR_SWAP BIT(16)
|
||||
+#define ATMEL_TC_BMR_IDXPHB BIT(17)
|
||||
+#define ATMEL_TC_BMR_AUTOC BIT(18)
|
||||
+#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20)
|
||||
+#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20)
|
||||
+#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26)
|
||||
+#define ATMEL_TC_MAXCMP(x) ((x) << 26)
|
||||
+
|
||||
+/* QEDC fields */
|
||||
+#define ATMEL_TC_QEDC_IDX BIT(0)
|
||||
+#define ATMEL_TC_QEDC_DIRCHG BIT(1)
|
||||
+#define ATMEL_TC_QEDC_QERR BIT(2)
|
||||
+#define ATMEL_TC_QEDC_MPE BIT(3)
|
||||
+#define ATMEL_TC_QEDC_DIR BIT(8)
|
||||
+
|
||||
+/* FMR fields */
|
||||
+#define ATMEL_TC_FMR_ENCF(x) BIT(x)
|
||||
+
|
||||
+/* WPMR fields */
|
||||
+#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8)
|
||||
+#define ATMEL_TC_WPMR_WPEN BIT(0)
|
||||
+
|
||||
+static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
|
||||
+
|
||||
+static const struct of_device_id atmel_tcb_dt_ids[] = {
|
||||
+ {
|
||||
+ .compatible = "atmel,at91rm9200-tcb",
|
||||
+ .data = (void *)16,
|
||||
+ }, {
|
||||
+ .compatible = "atmel,at91sam9x5-tcb",
|
||||
+ .data = (void *)32,
|
||||
+ }, {
|
||||
+ /* sentinel */
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+#endif /* __SOC_ATMEL_TCB_H */
|
||||
@@ -0,0 +1,473 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:19 +0200
|
||||
Subject: [PATCH 2/7] clocksource/drivers: Add a new driver for the Atmel ARM
|
||||
TC blocks
|
||||
|
||||
Add a driver for the Atmel Timer Counter Blocks. This driver provides a
|
||||
clocksource and two clockevent devices.
|
||||
|
||||
One of the clockevent device is linked to the clocksource counter and so it
|
||||
will run at the same frequency. This will be used when there is only on TCB
|
||||
channel available for timers.
|
||||
|
||||
The other clockevent device runs on a separate TCB channel when available.
|
||||
|
||||
This driver uses regmap and syscon to be able to probe early in the boot
|
||||
and avoid having to switch on the TCB clocksource later. Using regmap also
|
||||
means that unused TCB channels may be used by other drivers (PWM for
|
||||
example). read/writel are still used to access channel specific registers
|
||||
to avoid the performance impact of regmap (mainly locking).
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/Kconfig | 8
|
||||
drivers/clocksource/Makefile | 3
|
||||
drivers/clocksource/timer-atmel-tcb.c | 410 ++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 420 insertions(+), 1 deletion(-)
|
||||
create mode 100644 drivers/clocksource/timer-atmel-tcb.c
|
||||
|
||||
--- a/drivers/clocksource/Kconfig
|
||||
+++ b/drivers/clocksource/Kconfig
|
||||
@@ -404,6 +404,14 @@ config ATMEL_ST
|
||||
help
|
||||
Support for the Atmel ST timer.
|
||||
|
||||
+config ATMEL_ARM_TCB_CLKSRC
|
||||
+ bool "Microchip ARM TC Block" if COMPILE_TEST
|
||||
+ select REGMAP_MMIO
|
||||
+ depends on GENERIC_CLOCKEVENTS
|
||||
+ help
|
||||
+ This enables build of clocksource and clockevent driver for
|
||||
+ the integrated Timer Counter Blocks in Microchip ARM SoCs.
|
||||
+
|
||||
config CLKSRC_EXYNOS_MCT
|
||||
bool "Exynos multi core timer driver" if COMPILE_TEST
|
||||
depends on ARM || ARM64
|
||||
--- a/drivers/clocksource/Makefile
|
||||
+++ b/drivers/clocksource/Makefile
|
||||
@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
|
||||
obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
|
||||
obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
|
||||
obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
|
||||
-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
|
||||
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
|
||||
+obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
|
||||
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
|
||||
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
|
||||
obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
|
||||
--- /dev/null
|
||||
+++ b/drivers/clocksource/timer-atmel-tcb.c
|
||||
@@ -0,0 +1,410 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+#include <linux/clk.h>
|
||||
+#include <linux/clockchips.h>
|
||||
+#include <linux/clocksource.h>
|
||||
+#include <linux/interrupt.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/mfd/syscon.h>
|
||||
+#include <linux/of_address.h>
|
||||
+#include <linux/of_irq.h>
|
||||
+#include <linux/regmap.h>
|
||||
+#include <linux/sched_clock.h>
|
||||
+#include <soc/at91/atmel_tcb.h>
|
||||
+
|
||||
+struct atmel_tcb_clksrc {
|
||||
+ struct clocksource clksrc;
|
||||
+ struct clock_event_device clkevt;
|
||||
+ struct regmap *regmap;
|
||||
+ void __iomem *base;
|
||||
+ struct clk *clk[2];
|
||||
+ char name[20];
|
||||
+ int channels[2];
|
||||
+ int bits;
|
||||
+ int irq;
|
||||
+ struct {
|
||||
+ u32 cmr;
|
||||
+ u32 imr;
|
||||
+ u32 rc;
|
||||
+ bool clken;
|
||||
+ } cache[2];
|
||||
+ u32 bmr_cache;
|
||||
+ bool registered;
|
||||
+ bool clk_enabled;
|
||||
+};
|
||||
+
|
||||
+static struct atmel_tcb_clksrc tc;
|
||||
+
|
||||
+static struct clk *tcb_clk_get(struct device_node *node, int channel)
|
||||
+{
|
||||
+ struct clk *clk;
|
||||
+ char clk_name[] = "t0_clk";
|
||||
+
|
||||
+ clk_name[1] += channel;
|
||||
+ clk = of_clk_get_by_name(node->parent, clk_name);
|
||||
+ if (!IS_ERR(clk))
|
||||
+ return clk;
|
||||
+
|
||||
+ return of_clk_get_by_name(node->parent, "t0_clk");
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Clocksource and clockevent using the same channel(s)
|
||||
+ */
|
||||
+static u64 tc_get_cycles(struct clocksource *cs)
|
||||
+{
|
||||
+ u32 lower, upper;
|
||||
+
|
||||
+ do {
|
||||
+ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
|
||||
+ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
|
||||
+
|
||||
+ return (upper << 16) | lower;
|
||||
+}
|
||||
+
|
||||
+static u64 tc_get_cycles32(struct clocksource *cs)
|
||||
+{
|
||||
+ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+}
|
||||
+
|
||||
+static u64 notrace tc_sched_clock_read(void)
|
||||
+{
|
||||
+ return tc_get_cycles(&tc.clksrc);
|
||||
+}
|
||||
+
|
||||
+static u64 notrace tc_sched_clock_read32(void)
|
||||
+{
|
||||
+ return tc_get_cycles32(&tc.clksrc);
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_next_event(unsigned long delta,
|
||||
+ struct clock_event_device *d)
|
||||
+{
|
||||
+ u32 old, next, cur;
|
||||
+
|
||||
+ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+ next = old + delta;
|
||||
+ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
|
||||
+ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
|
||||
+
|
||||
+ /* check whether the delta elapsed while setting the register */
|
||||
+ if ((next < old && cur < old && cur > next) ||
|
||||
+ (next > old && (cur < old || cur > next))) {
|
||||
+ /*
|
||||
+ * Clear the CPCS bit in the status register to avoid
|
||||
+ * generating a spurious interrupt next time a valid
|
||||
+ * timer event is configured.
|
||||
+ */
|
||||
+ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
|
||||
+ return -ETIME;
|
||||
+ }
|
||||
+
|
||||
+ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static irqreturn_t tc_clkevt_irq(int irq, void *handle)
|
||||
+{
|
||||
+ unsigned int sr;
|
||||
+
|
||||
+ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
|
||||
+ if (sr & ATMEL_TC_CPCS) {
|
||||
+ tc.clkevt.event_handler(&tc.clkevt);
|
||||
+ return IRQ_HANDLED;
|
||||
+ }
|
||||
+
|
||||
+ return IRQ_NONE;
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_oneshot(struct clock_event_device *dev)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(dev))
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Because both clockevent devices may share the same IRQ, we don't want
|
||||
+ * the less likely one to stay requested
|
||||
+ */
|
||||
+ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
|
||||
+ tc.name, &tc);
|
||||
+}
|
||||
+
|
||||
+static int tcb_clkevt_shutdown(struct clock_event_device *dev)
|
||||
+{
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
|
||||
+ if (tc.bits == 16)
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
|
||||
+
|
||||
+ if (!clockevent_state_detached(dev))
|
||||
+ free_irq(tc.irq, &tc);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
|
||||
+ int mck_divisor_idx)
|
||||
+{
|
||||
+ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
|
||||
+ writel(mck_divisor_idx /* likely divide-by-8 */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
|
||||
+ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
|
||||
+ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
|
||||
+ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
|
||||
+ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
|
||||
+
|
||||
+ /* second channel: waveform mode, input TIOA */
|
||||
+ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[1]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
|
||||
+
|
||||
+ /* chain both channel, we assume the previous channel */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BMR,
|
||||
+ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
|
||||
+ /* then reset all the timers */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
|
||||
+ int mck_divisor_idx)
|
||||
+{
|
||||
+ /* channel 0: waveform mode, input mclk/8 */
|
||||
+ writel(mck_divisor_idx /* likely divide-by-8 */
|
||||
+ | ATMEL_TC_CMR_WAVE
|
||||
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
|
||||
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
|
||||
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
|
||||
+
|
||||
+ /* then reset all the timers */
|
||||
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static void tc_clksrc_suspend(struct clocksource *cs)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
|
||||
+ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
|
||||
+ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
|
||||
+ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
|
||||
+ tc.cache[i].clken = !!(readl(tc.base +
|
||||
+ ATMEL_TC_SR(tc.channels[i])) &
|
||||
+ ATMEL_TC_CLKSTA);
|
||||
+ }
|
||||
+
|
||||
+ if (tc.bits == 16)
|
||||
+ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
|
||||
+}
|
||||
+
|
||||
+static void tc_clksrc_resume(struct clocksource *cs)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
|
||||
+ /* Restore registers for the channel, RA and RB are not used */
|
||||
+ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
|
||||
+ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
|
||||
+ /* Disable all the interrupts */
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
|
||||
+ /* Reenable interrupts that were enabled before suspending */
|
||||
+ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
|
||||
+
|
||||
+ /* Start the clock if it was used */
|
||||
+ if (tc.cache[i].clken)
|
||||
+ writel(ATMEL_TC_CCR_CLKEN, tc.base +
|
||||
+ ATMEL_TC_CCR(tc.channels[i]));
|
||||
+ }
|
||||
+
|
||||
+ /* in case of dual channel, chain channels */
|
||||
+ if (tc.bits == 16)
|
||||
+ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
|
||||
+ /* Finally, trigger all the channels*/
|
||||
+ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
|
||||
+}
|
||||
+
|
||||
+static int __init tcb_clksrc_register(struct device_node *node,
|
||||
+ struct regmap *regmap, void __iomem *base,
|
||||
+ int channel, int channel1, int irq,
|
||||
+ int bits)
|
||||
+{
|
||||
+ u32 rate, divided_rate = 0;
|
||||
+ int best_divisor_idx = -1;
|
||||
+ int i, err = -1;
|
||||
+ u64 (*tc_sched_clock)(void);
|
||||
+
|
||||
+ tc.regmap = regmap;
|
||||
+ tc.base = base;
|
||||
+ tc.channels[0] = channel;
|
||||
+ tc.channels[1] = channel1;
|
||||
+ tc.irq = irq;
|
||||
+ tc.bits = bits;
|
||||
+
|
||||
+ tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
|
||||
+ if (IS_ERR(tc.clk[0]))
|
||||
+ return PTR_ERR(tc.clk[0]);
|
||||
+ err = clk_prepare_enable(tc.clk[0]);
|
||||
+ if (err) {
|
||||
+ pr_debug("can't enable T0 clk\n");
|
||||
+ goto err_clk;
|
||||
+ }
|
||||
+
|
||||
+ /* How fast will we be counting? Pick something over 5 MHz. */
|
||||
+ rate = (u32)clk_get_rate(tc.clk[0]);
|
||||
+ for (i = 0; i < 5; i++) {
|
||||
+ unsigned int divisor = atmel_tc_divisors[i];
|
||||
+ unsigned int tmp;
|
||||
+
|
||||
+ if (!divisor)
|
||||
+ continue;
|
||||
+
|
||||
+ tmp = rate / divisor;
|
||||
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
|
||||
+ if (best_divisor_idx > 0) {
|
||||
+ if (tmp < 5 * 1000 * 1000)
|
||||
+ continue;
|
||||
+ }
|
||||
+ divided_rate = tmp;
|
||||
+ best_divisor_idx = i;
|
||||
+ }
|
||||
+
|
||||
+ if (tc.bits == 32) {
|
||||
+ tc.clksrc.read = tc_get_cycles32;
|
||||
+ tcb_setup_single_chan(&tc, best_divisor_idx);
|
||||
+ tc_sched_clock = tc_sched_clock_read32;
|
||||
+ snprintf(tc.name, sizeof(tc.name), "%s:%d",
|
||||
+ kbasename(node->parent->full_name), tc.channels[0]);
|
||||
+ } else {
|
||||
+ tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
|
||||
+ if (IS_ERR(tc.clk[1]))
|
||||
+ goto err_disable_t0;
|
||||
+
|
||||
+ err = clk_prepare_enable(tc.clk[1]);
|
||||
+ if (err) {
|
||||
+ pr_debug("can't enable T1 clk\n");
|
||||
+ goto err_clk1;
|
||||
+ }
|
||||
+ tc.clksrc.read = tc_get_cycles,
|
||||
+ tcb_setup_dual_chan(&tc, best_divisor_idx);
|
||||
+ tc_sched_clock = tc_sched_clock_read;
|
||||
+ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
|
||||
+ kbasename(node->parent->full_name), tc.channels[0],
|
||||
+ tc.channels[1]);
|
||||
+ }
|
||||
+
|
||||
+ pr_debug("%s at %d.%03d MHz\n", tc.name,
|
||||
+ divided_rate / 1000000,
|
||||
+ ((divided_rate + 500000) % 1000000) / 1000);
|
||||
+
|
||||
+ tc.clksrc.name = tc.name;
|
||||
+ tc.clksrc.suspend = tc_clksrc_suspend;
|
||||
+ tc.clksrc.resume = tc_clksrc_resume;
|
||||
+ tc.clksrc.rating = 200;
|
||||
+ tc.clksrc.mask = CLOCKSOURCE_MASK(32);
|
||||
+ tc.clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
|
||||
+
|
||||
+ err = clocksource_register_hz(&tc.clksrc, divided_rate);
|
||||
+ if (err)
|
||||
+ goto err_disable_t1;
|
||||
+
|
||||
+ sched_clock_register(tc_sched_clock, 32, divided_rate);
|
||||
+
|
||||
+ tc.registered = true;
|
||||
+
|
||||
+ /* Set up and register clockevents */
|
||||
+ tc.clkevt.name = tc.name;
|
||||
+ tc.clkevt.cpumask = cpumask_of(0);
|
||||
+ tc.clkevt.set_next_event = tcb_clkevt_next_event;
|
||||
+ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
|
||||
+ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
|
||||
+ tc.clkevt.features = CLOCK_EVT_FEAT_ONESHOT;
|
||||
+ tc.clkevt.rating = 125;
|
||||
+
|
||||
+ clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
|
||||
+ BIT(tc.bits) - 1);
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_disable_t1:
|
||||
+ if (tc.bits == 16)
|
||||
+ clk_disable_unprepare(tc.clk[1]);
|
||||
+
|
||||
+err_clk1:
|
||||
+ if (tc.bits == 16)
|
||||
+ clk_put(tc.clk[1]);
|
||||
+
|
||||
+err_disable_t0:
|
||||
+ clk_disable_unprepare(tc.clk[0]);
|
||||
+
|
||||
+err_clk:
|
||||
+ clk_put(tc.clk[0]);
|
||||
+
|
||||
+ pr_err("%s: unable to register clocksource/clockevent\n",
|
||||
+ tc.clksrc.name);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int __init tcb_clksrc_init(struct device_node *node)
|
||||
+{
|
||||
+ const struct of_device_id *match;
|
||||
+ struct regmap *regmap;
|
||||
+ void __iomem *tcb_base;
|
||||
+ u32 channel;
|
||||
+ int irq, err, chan1 = -1;
|
||||
+ unsigned bits;
|
||||
+
|
||||
+ if (tc.registered)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ /*
|
||||
+ * The regmap has to be used to access registers that are shared
|
||||
+ * between channels on the same TCB but we keep direct IO access for
|
||||
+ * the counters to avoid the impact on performance
|
||||
+ */
|
||||
+ regmap = syscon_node_to_regmap(node->parent);
|
||||
+ if (IS_ERR(regmap))
|
||||
+ return PTR_ERR(regmap);
|
||||
+
|
||||
+ tcb_base = of_iomap(node->parent, 0);
|
||||
+ if (!tcb_base) {
|
||||
+ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
|
||||
+ return -ENXIO;
|
||||
+ }
|
||||
+
|
||||
+ match = of_match_node(atmel_tcb_dt_ids, node->parent);
|
||||
+ bits = (uintptr_t)match->data;
|
||||
+
|
||||
+ err = of_property_read_u32_index(node, "reg", 0, &channel);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ irq = of_irq_get(node->parent, channel);
|
||||
+ if (irq < 0) {
|
||||
+ irq = of_irq_get(node->parent, 0);
|
||||
+ if (irq < 0)
|
||||
+ return irq;
|
||||
+ }
|
||||
+
|
||||
+ if (bits == 16) {
|
||||
+ of_property_read_u32_index(node, "reg", 1, &chan1);
|
||||
+ if (chan1 == -1) {
|
||||
+ pr_err("%s: clocksource needs two channels\n",
|
||||
+ node->parent->full_name);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
|
||||
+ bits);
|
||||
+}
|
||||
+TIMER_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer", tcb_clksrc_init);
|
||||
@@ -0,0 +1,264 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:20 +0200
|
||||
Subject: [PATCH 3/7] clocksource/drivers: timer-atmel-tcb: add clockevent
|
||||
device on separate channel
|
||||
|
||||
Add an other clockevent device that uses a separate TCB channel when
|
||||
available.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/timer-atmel-tcb.c | 217 +++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 212 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/clocksource/timer-atmel-tcb.c
|
||||
+++ b/drivers/clocksource/timer-atmel-tcb.c
|
||||
@@ -32,7 +32,7 @@ struct atmel_tcb_clksrc {
|
||||
bool clk_enabled;
|
||||
};
|
||||
|
||||
-static struct atmel_tcb_clksrc tc;
|
||||
+static struct atmel_tcb_clksrc tc, tce;
|
||||
|
||||
static struct clk *tcb_clk_get(struct device_node *node, int channel)
|
||||
{
|
||||
@@ -48,6 +48,203 @@ static struct clk *tcb_clk_get(struct de
|
||||
}
|
||||
|
||||
/*
|
||||
+ * Clockevent device using its own channel
|
||||
+ */
|
||||
+
|
||||
+static void tc_clkevt2_clk_disable(struct clock_event_device *d)
|
||||
+{
|
||||
+ clk_disable(tce.clk[0]);
|
||||
+ tce.clk_enabled = false;
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_clk_enable(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (tce.clk_enabled)
|
||||
+ return;
|
||||
+ clk_enable(tce.clk[0]);
|
||||
+ tce.clk_enabled = true;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_stop(struct clock_event_device *d)
|
||||
+{
|
||||
+ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_shutdown(struct clock_event_device *d)
|
||||
+{
|
||||
+ tc_clkevt2_stop(d);
|
||||
+ if (!clockevent_state_detached(d))
|
||||
+ tc_clkevt2_clk_disable(d);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
+ * because using one of the divided clocks would usually mean the
|
||||
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
+ *
|
||||
+ * A divided clock could be good for high resolution timers, since
|
||||
+ * 30.5 usec resolution can seem "low".
|
||||
+ */
|
||||
+static int tc_clkevt2_set_oneshot(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
+ tc_clkevt2_stop(d);
|
||||
+
|
||||
+ tc_clkevt2_clk_enable(d);
|
||||
+
|
||||
+ /* slow clock, count up to RC, then irq and stop */
|
||||
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP |
|
||||
+ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC,
|
||||
+ tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_set_periodic(struct clock_event_device *d)
|
||||
+{
|
||||
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
+ tc_clkevt2_stop(d);
|
||||
+
|
||||
+ /* By not making the gentime core emulate periodic mode on top
|
||||
+ * of oneshot, we get lower overhead and improved accuracy.
|
||||
+ */
|
||||
+ tc_clkevt2_clk_enable(d);
|
||||
+
|
||||
+ /* slow clock, count up to RC, then irq and restart */
|
||||
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE |
|
||||
+ ATMEL_TC_CMR_WAVESEL_UPRC,
|
||||
+ tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+
|
||||
+ /* Enable clock and interrupts on RC compare */
|
||||
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_clkevt2_next_event(unsigned long delta,
|
||||
+ struct clock_event_device *d)
|
||||
+{
|
||||
+ writel(delta, tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tce.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static irqreturn_t tc_clkevt2_irq(int irq, void *handle)
|
||||
+{
|
||||
+ unsigned int sr;
|
||||
+
|
||||
+ sr = readl(tce.base + ATMEL_TC_SR(tce.channels[0]));
|
||||
+ if (sr & ATMEL_TC_CPCS) {
|
||||
+ tce.clkevt.event_handler(&tce.clkevt);
|
||||
+ return IRQ_HANDLED;
|
||||
+ }
|
||||
+
|
||||
+ return IRQ_NONE;
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_suspend(struct clock_event_device *d)
|
||||
+{
|
||||
+ tce.cache[0].cmr = readl(tce.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ tce.cache[0].imr = readl(tce.base + ATMEL_TC_IMR(tce.channels[0]));
|
||||
+ tce.cache[0].rc = readl(tce.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ tce.cache[0].clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channels[0])) &
|
||||
+ ATMEL_TC_CLKSTA);
|
||||
+}
|
||||
+
|
||||
+static void tc_clkevt2_resume(struct clock_event_device *d)
|
||||
+{
|
||||
+ /* Restore registers for the channel, RA and RB are not used */
|
||||
+ writel(tce.cache[0].cmr, tc.base + ATMEL_TC_CMR(tce.channels[0]));
|
||||
+ writel(tce.cache[0].rc, tc.base + ATMEL_TC_RC(tce.channels[0]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RA(tce.channels[0]));
|
||||
+ writel(0, tc.base + ATMEL_TC_RB(tce.channels[0]));
|
||||
+ /* Disable all the interrupts */
|
||||
+ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channels[0]));
|
||||
+ /* Reenable interrupts that were enabled before suspending */
|
||||
+ writel(tce.cache[0].imr, tc.base + ATMEL_TC_IER(tce.channels[0]));
|
||||
+
|
||||
+ /* Start the clock if it was used */
|
||||
+ if (tce.cache[0].clken)
|
||||
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
|
||||
+ tc.base + ATMEL_TC_CCR(tce.channels[0]));
|
||||
+}
|
||||
+
|
||||
+static int __init tc_clkevt_register(struct device_node *node,
|
||||
+ struct regmap *regmap, void __iomem *base,
|
||||
+ int channel, int irq, int bits)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct clk *slow_clk;
|
||||
+
|
||||
+ tce.regmap = regmap;
|
||||
+ tce.base = base;
|
||||
+ tce.channels[0] = channel;
|
||||
+ tce.irq = irq;
|
||||
+
|
||||
+ slow_clk = of_clk_get_by_name(node->parent, "slow_clk");
|
||||
+ if (IS_ERR(slow_clk))
|
||||
+ return PTR_ERR(slow_clk);
|
||||
+
|
||||
+ ret = clk_prepare_enable(slow_clk);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ tce.clk[0] = tcb_clk_get(node, tce.channels[0]);
|
||||
+ if (IS_ERR(tce.clk[0])) {
|
||||
+ ret = PTR_ERR(tce.clk[0]);
|
||||
+ goto err_slow;
|
||||
+ }
|
||||
+
|
||||
+ snprintf(tce.name, sizeof(tce.name), "%s:%d",
|
||||
+ kbasename(node->parent->full_name), channel);
|
||||
+ tce.clkevt.cpumask = cpumask_of(0);
|
||||
+ tce.clkevt.name = tce.name;
|
||||
+ tce.clkevt.set_next_event = tc_clkevt2_next_event,
|
||||
+ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown,
|
||||
+ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic,
|
||||
+ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot,
|
||||
+ tce.clkevt.suspend = tc_clkevt2_suspend,
|
||||
+ tce.clkevt.resume = tc_clkevt2_resume,
|
||||
+ tce.clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
|
||||
+ tce.clkevt.rating = 140;
|
||||
+
|
||||
+ /* try to enable clk to avoid future errors in mode change */
|
||||
+ ret = clk_prepare_enable(tce.clk[0]);
|
||||
+ if (ret)
|
||||
+ goto err_slow;
|
||||
+ clk_disable(tce.clk[0]);
|
||||
+
|
||||
+ clockevents_config_and_register(&tce.clkevt, 32768, 1,
|
||||
+ CLOCKSOURCE_MASK(bits));
|
||||
+
|
||||
+ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED,
|
||||
+ tce.clkevt.name, &tce);
|
||||
+ if (ret)
|
||||
+ goto err_clk;
|
||||
+
|
||||
+ tce.registered = true;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_clk:
|
||||
+ clk_unprepare(tce.clk[0]);
|
||||
+err_slow:
|
||||
+ clk_disable_unprepare(slow_clk);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
* Clocksource and clockevent using the same channel(s)
|
||||
*/
|
||||
static u64 tc_get_cycles(struct clocksource *cs)
|
||||
@@ -363,7 +560,7 @@ static int __init tcb_clksrc_init(struct
|
||||
int irq, err, chan1 = -1;
|
||||
unsigned bits;
|
||||
|
||||
- if (tc.registered)
|
||||
+ if (tc.registered && tce.registered)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
@@ -395,12 +592,22 @@ static int __init tcb_clksrc_init(struct
|
||||
return irq;
|
||||
}
|
||||
|
||||
+ if (tc.registered)
|
||||
+ return tc_clkevt_register(node, regmap, tcb_base, channel, irq,
|
||||
+ bits);
|
||||
+
|
||||
if (bits == 16) {
|
||||
of_property_read_u32_index(node, "reg", 1, &chan1);
|
||||
if (chan1 == -1) {
|
||||
- pr_err("%s: clocksource needs two channels\n",
|
||||
- node->parent->full_name);
|
||||
- return -EINVAL;
|
||||
+ if (tce.registered) {
|
||||
+ pr_err("%s: clocksource needs two channels\n",
|
||||
+ node->parent->full_name);
|
||||
+ return -EINVAL;
|
||||
+ } else {
|
||||
+ return tc_clkevt_register(node, regmap,
|
||||
+ tcb_base, channel,
|
||||
+ irq, bits);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:21 +0200
|
||||
Subject: [PATCH 4/7] clocksource/drivers: atmel-pit: make option silent
|
||||
|
||||
To conform with the other option, make the ATMEL_PIT option silent so it
|
||||
can be selected from the platform
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/Kconfig | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/clocksource/Kconfig
|
||||
+++ b/drivers/clocksource/Kconfig
|
||||
@@ -393,8 +393,11 @@ config ARMV7M_SYSTICK
|
||||
This options enables support for the ARMv7M system timer unit
|
||||
|
||||
config ATMEL_PIT
|
||||
+ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST
|
||||
select TIMER_OF if OF
|
||||
- def_bool SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ help
|
||||
+ This enables build of clocksource and clockevent driver for
|
||||
+ the integrated PIT in Microchip ARM SoCs.
|
||||
|
||||
config ATMEL_ST
|
||||
bool "Atmel ST timer support" if COMPILE_TEST
|
||||
@@ -0,0 +1,48 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:22 +0200
|
||||
Subject: [PATCH 5/7] ARM: at91: Implement clocksource selection
|
||||
|
||||
Allow selecting and unselecting the PIT clocksource driver so it doesn't
|
||||
have to be compile when unused.
|
||||
|
||||
Tested-by: Alexander Dahl <ada@thorsis.com>
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++
|
||||
1 file changed, 25 insertions(+)
|
||||
|
||||
--- a/arch/arm/mach-at91/Kconfig
|
||||
+++ b/arch/arm/mach-at91/Kconfig
|
||||
@@ -107,6 +107,31 @@ config SOC_AT91SAM9
|
||||
AT91SAM9X35
|
||||
AT91SAM9XE
|
||||
|
||||
+comment "Clocksource driver selection"
|
||||
+
|
||||
+config ATMEL_CLOCKSOURCE_PIT
|
||||
+ bool "Periodic Interval Timer (PIT) support"
|
||||
+ depends on SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ default SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ select ATMEL_PIT
|
||||
+ help
|
||||
+ Select this to get a clocksource based on the Atmel Periodic Interval
|
||||
+ Timer. It has a relatively low resolution and the TC Block clocksource
|
||||
+ should be preferred.
|
||||
+
|
||||
+config ATMEL_CLOCKSOURCE_TCB
|
||||
+ bool "Timer Counter Blocks (TCB) support"
|
||||
+ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST
|
||||
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
|
||||
+ depends on !ATMEL_TCLIB
|
||||
+ select ATMEL_ARM_TCB_CLKSRC
|
||||
+ help
|
||||
+ Select this to get a high precision clocksource based on a
|
||||
+ TC block with a 5+ MHz base clock rate.
|
||||
+ On platforms with 16-bit counters, two timer channels are combined
|
||||
+ to make a single 32-bit timer.
|
||||
+ It can also be used as a clock event device supporting oneshot mode.
|
||||
+
|
||||
config HAVE_AT91_UTMI
|
||||
bool
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:23 +0200
|
||||
Subject: [PATCH 6/7] ARM: configs: at91: use new TCB timer driver
|
||||
|
||||
Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to
|
||||
timer-atmel-tcb.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/configs/at91_dt_defconfig | 1 -
|
||||
arch/arm/configs/sama5_defconfig | 1 -
|
||||
2 files changed, 2 deletions(-)
|
||||
|
||||
--- a/arch/arm/configs/at91_dt_defconfig
|
||||
+++ b/arch/arm/configs/at91_dt_defconfig
|
||||
@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
CONFIG_BLK_DEV_RAM_COUNT=4
|
||||
CONFIG_BLK_DEV_RAM_SIZE=8192
|
||||
-CONFIG_ATMEL_TCLIB=y
|
||||
CONFIG_ATMEL_SSC=y
|
||||
CONFIG_SCSI=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
--- a/arch/arm/configs/sama5_defconfig
|
||||
+++ b/arch/arm/configs/sama5_defconfig
|
||||
@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y
|
||||
CONFIG_BLK_DEV_RAM=y
|
||||
CONFIG_BLK_DEV_RAM_COUNT=4
|
||||
CONFIG_BLK_DEV_RAM_SIZE=8192
|
||||
-CONFIG_ATMEL_TCLIB=y
|
||||
CONFIG_ATMEL_SSC=y
|
||||
CONFIG_EEPROM_AT24=y
|
||||
CONFIG_SCSI=y
|
||||
@@ -0,0 +1,35 @@
|
||||
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Date: Thu, 13 Sep 2018 13:30:24 +0200
|
||||
Subject: [PATCH 7/7] ARM: configs: at91: unselect PIT
|
||||
|
||||
The PIT is not required anymore to successfully boot and may actually harm
|
||||
in case preempt-rt is used because the PIT interrupt is shared.
|
||||
Disable it so the TCB clocksource is used.
|
||||
|
||||
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/configs/at91_dt_defconfig | 1 +
|
||||
arch/arm/configs/sama5_defconfig | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/arm/configs/at91_dt_defconfig
|
||||
+++ b/arch/arm/configs/at91_dt_defconfig
|
||||
@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y
|
||||
CONFIG_ARCH_AT91=y
|
||||
CONFIG_SOC_AT91RM9200=y
|
||||
CONFIG_SOC_AT91SAM9=y
|
||||
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
|
||||
CONFIG_AEABI=y
|
||||
CONFIG_UACCESS_WITH_MEMCPY=y
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
--- a/arch/arm/configs/sama5_defconfig
|
||||
+++ b/arch/arm/configs/sama5_defconfig
|
||||
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
|
||||
CONFIG_SOC_SAMA5D2=y
|
||||
CONFIG_SOC_SAMA5D3=y
|
||||
CONFIG_SOC_SAMA5D4=y
|
||||
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
|
||||
CONFIG_AEABI=y
|
||||
CONFIG_UACCESS_WITH_MEMCPY=y
|
||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||
@@ -0,0 +1,162 @@
|
||||
From: Marc Zyngier <marc.zyngier@arm.com>
|
||||
Date: Fri, 27 Jul 2018 13:38:54 +0100
|
||||
Subject: [PATCH] irqchip/gic-v3-its: Move pending table allocation to init
|
||||
time
|
||||
|
||||
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/irqchip/irq-gic-v3-its.c | 80 ++++++++++++++++++++++++-------------
|
||||
include/linux/irqchip/arm-gic-v3.h | 1
|
||||
2 files changed, 53 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/drivers/irqchip/irq-gic-v3-its.c
|
||||
+++ b/drivers/irqchip/irq-gic-v3-its.c
|
||||
@@ -179,6 +179,7 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
|
||||
static DEFINE_IDA(its_vpeid_ida);
|
||||
|
||||
#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist))
|
||||
+#define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu))
|
||||
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
|
||||
#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
|
||||
|
||||
@@ -1628,7 +1629,7 @@ static void its_free_prop_table(struct p
|
||||
get_order(LPI_PROPBASE_SZ));
|
||||
}
|
||||
|
||||
-static int __init its_alloc_lpi_tables(void)
|
||||
+static int __init its_alloc_lpi_prop_table(void)
|
||||
{
|
||||
phys_addr_t paddr;
|
||||
|
||||
@@ -1951,30 +1952,47 @@ static void its_free_pending_table(struc
|
||||
get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
|
||||
}
|
||||
|
||||
-static void its_cpu_init_lpis(void)
|
||||
+static int __init allocate_lpi_tables(void)
|
||||
{
|
||||
- void __iomem *rbase = gic_data_rdist_rd_base();
|
||||
- struct page *pend_page;
|
||||
- u64 val, tmp;
|
||||
+ int err, cpu;
|
||||
|
||||
- /* If we didn't allocate the pending table yet, do it now */
|
||||
- pend_page = gic_data_rdist()->pend_page;
|
||||
- if (!pend_page) {
|
||||
- phys_addr_t paddr;
|
||||
+ err = its_alloc_lpi_prop_table();
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ /*
|
||||
+ * We allocate all the pending tables anyway, as we may have a
|
||||
+ * mix of RDs that have had LPIs enabled, and some that
|
||||
+ * don't. We'll free the unused ones as each CPU comes online.
|
||||
+ */
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ struct page *pend_page;
|
||||
|
||||
pend_page = its_allocate_pending_table(GFP_NOWAIT);
|
||||
if (!pend_page) {
|
||||
- pr_err("Failed to allocate PENDBASE for CPU%d\n",
|
||||
- smp_processor_id());
|
||||
- return;
|
||||
+ pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
- paddr = page_to_phys(pend_page);
|
||||
- pr_info("CPU%d: using LPI pending table @%pa\n",
|
||||
- smp_processor_id(), &paddr);
|
||||
- gic_data_rdist()->pend_page = pend_page;
|
||||
+ gic_data_rdist_cpu(cpu)->pend_page = pend_page;
|
||||
}
|
||||
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void its_cpu_init_lpis(void)
|
||||
+{
|
||||
+ void __iomem *rbase = gic_data_rdist_rd_base();
|
||||
+ struct page *pend_page;
|
||||
+ phys_addr_t paddr;
|
||||
+ u64 val, tmp;
|
||||
+
|
||||
+ if (gic_data_rdist()->lpi_enabled)
|
||||
+ return;
|
||||
+
|
||||
+ pend_page = gic_data_rdist()->pend_page;
|
||||
+ paddr = page_to_phys(pend_page);
|
||||
+
|
||||
/* set PROPBASE */
|
||||
val = (page_to_phys(gic_rdists->prop_page) |
|
||||
GICR_PROPBASER_InnerShareable |
|
||||
@@ -2026,6 +2044,10 @@ static void its_cpu_init_lpis(void)
|
||||
|
||||
/* Make sure the GIC has seen the above */
|
||||
dsb(sy);
|
||||
+ gic_data_rdist()->lpi_enabled = true;
|
||||
+ pr_info("GICv3: CPU%d: using LPI pending table @%pa\n",
|
||||
+ smp_processor_id(),
|
||||
+ &paddr);
|
||||
}
|
||||
|
||||
static void its_cpu_init_collection(struct its_node *its)
|
||||
@@ -3521,16 +3543,6 @@ static int redist_disable_lpis(void)
|
||||
u64 timeout = USEC_PER_SEC;
|
||||
u64 val;
|
||||
|
||||
- /*
|
||||
- * If coming via a CPU hotplug event, we don't need to disable
|
||||
- * LPIs before trying to re-enable them. They are already
|
||||
- * configured and all is well in the world. Detect this case
|
||||
- * by checking the allocation of the pending table for the
|
||||
- * current CPU.
|
||||
- */
|
||||
- if (gic_data_rdist()->pend_page)
|
||||
- return 0;
|
||||
-
|
||||
if (!gic_rdists_supports_plpis()) {
|
||||
pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
|
||||
return -ENXIO;
|
||||
@@ -3540,7 +3552,18 @@ static int redist_disable_lpis(void)
|
||||
if (!(val & GICR_CTLR_ENABLE_LPIS))
|
||||
return 0;
|
||||
|
||||
- pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
|
||||
+ /*
|
||||
+ * If coming via a CPU hotplug event, we don't need to disable
|
||||
+ * LPIs before trying to re-enable them. They are already
|
||||
+ * configured and all is well in the world.
|
||||
+ */
|
||||
+ if (gic_data_rdist()->lpi_enabled)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * From that point on, we only try to do some damage control.
|
||||
+ */
|
||||
+ pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
|
||||
smp_processor_id());
|
||||
add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
|
||||
|
||||
@@ -3796,7 +3819,8 @@ int __init its_init(struct fwnode_handle
|
||||
}
|
||||
|
||||
gic_rdists = rdists;
|
||||
- err = its_alloc_lpi_tables();
|
||||
+
|
||||
+ err = allocate_lpi_tables();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
--- a/include/linux/irqchip/arm-gic-v3.h
|
||||
+++ b/include/linux/irqchip/arm-gic-v3.h
|
||||
@@ -585,6 +585,7 @@ struct rdists {
|
||||
void __iomem *rd_base;
|
||||
struct page *pend_page;
|
||||
phys_addr_t phys_base;
|
||||
+ bool lpi_enabled;
|
||||
} __percpu *rdist;
|
||||
struct page *prop_page;
|
||||
u64 flags;
|
||||
@@ -0,0 +1,194 @@
|
||||
From: Julia Cartwright <julia@ni.com>
|
||||
Date: Fri, 28 Sep 2018 21:03:51 +0000
|
||||
Subject: [PATCH] kthread: convert worker lock to raw spinlock
|
||||
|
||||
In order to enable the queuing of kthread work items from hardirq
|
||||
context even when PREEMPT_RT_FULL is enabled, convert the worker
|
||||
spin_lock to a raw_spin_lock.
|
||||
|
||||
This is only acceptable to do because the work performed under the lock
|
||||
is well-bounded and minimal.
|
||||
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Guenter Roeck <linux@roeck-us.net>
|
||||
Reported-and-tested-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
|
||||
Reported-by: Tim Sander <tim@krieglstein.org>
|
||||
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kthread.h | 2 +-
|
||||
kernel/kthread.c | 42 +++++++++++++++++++++---------------------
|
||||
2 files changed, 22 insertions(+), 22 deletions(-)
|
||||
|
||||
--- a/include/linux/kthread.h
|
||||
+++ b/include/linux/kthread.h
|
||||
@@ -85,7 +85,7 @@ enum {
|
||||
|
||||
struct kthread_worker {
|
||||
unsigned int flags;
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
struct list_head work_list;
|
||||
struct list_head delayed_work_list;
|
||||
struct task_struct *task;
|
||||
--- a/kernel/kthread.c
|
||||
+++ b/kernel/kthread.c
|
||||
@@ -599,7 +599,7 @@ void __kthread_init_worker(struct kthrea
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
memset(worker, 0, sizeof(struct kthread_worker));
|
||||
- spin_lock_init(&worker->lock);
|
||||
+ raw_spin_lock_init(&worker->lock);
|
||||
lockdep_set_class_and_name(&worker->lock, key, name);
|
||||
INIT_LIST_HEAD(&worker->work_list);
|
||||
INIT_LIST_HEAD(&worker->delayed_work_list);
|
||||
@@ -641,21 +641,21 @@ int kthread_worker_fn(void *worker_ptr)
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
worker->task = NULL;
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
work = NULL;
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
if (!list_empty(&worker->work_list)) {
|
||||
work = list_first_entry(&worker->work_list,
|
||||
struct kthread_work, node);
|
||||
list_del_init(&work->node);
|
||||
}
|
||||
worker->current_work = work;
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (work) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
@@ -812,12 +812,12 @@ bool kthread_queue_work(struct kthread_w
|
||||
bool ret = false;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
if (!queuing_blocked(worker, work)) {
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
ret = true;
|
||||
}
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_queue_work);
|
||||
@@ -843,7 +843,7 @@ void kthread_delayed_work_timer_fn(struc
|
||||
if (WARN_ON_ONCE(!worker))
|
||||
return;
|
||||
|
||||
- spin_lock(&worker->lock);
|
||||
+ raw_spin_lock(&worker->lock);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -852,7 +852,7 @@ void kthread_delayed_work_timer_fn(struc
|
||||
list_del_init(&work->node);
|
||||
kthread_insert_work(worker, work, &worker->work_list);
|
||||
|
||||
- spin_unlock(&worker->lock);
|
||||
+ raw_spin_unlock(&worker->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
|
||||
|
||||
@@ -908,14 +908,14 @@ bool kthread_queue_delayed_work(struct k
|
||||
unsigned long flags;
|
||||
bool ret = false;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
|
||||
if (!queuing_blocked(worker, work)) {
|
||||
__kthread_queue_delayed_work(worker, dwork, delay);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
|
||||
@@ -951,7 +951,7 @@ void kthread_flush_work(struct kthread_w
|
||||
if (!worker)
|
||||
return;
|
||||
|
||||
- spin_lock_irq(&worker->lock);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -963,7 +963,7 @@ void kthread_flush_work(struct kthread_w
|
||||
else
|
||||
noop = true;
|
||||
|
||||
- spin_unlock_irq(&worker->lock);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (!noop)
|
||||
wait_for_completion(&fwork.done);
|
||||
@@ -996,9 +996,9 @@ static bool __kthread_cancel_work(struct
|
||||
* any queuing is blocked by setting the canceling counter.
|
||||
*/
|
||||
work->canceling++;
|
||||
- spin_unlock_irqrestore(&worker->lock, *flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, *flags);
|
||||
del_timer_sync(&dwork->timer);
|
||||
- spin_lock_irqsave(&worker->lock, *flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, *flags);
|
||||
work->canceling--;
|
||||
}
|
||||
|
||||
@@ -1045,7 +1045,7 @@ bool kthread_mod_delayed_work(struct kth
|
||||
unsigned long flags;
|
||||
int ret = false;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
|
||||
/* Do not bother with canceling when never queued. */
|
||||
if (!work->worker)
|
||||
@@ -1062,7 +1062,7 @@ bool kthread_mod_delayed_work(struct kth
|
||||
fast_queue:
|
||||
__kthread_queue_delayed_work(worker, dwork, delay);
|
||||
out:
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
|
||||
@@ -1076,7 +1076,7 @@ static bool __kthread_cancel_work_sync(s
|
||||
if (!worker)
|
||||
goto out;
|
||||
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
/* Work must not be used with >1 worker, see kthread_queue_work(). */
|
||||
WARN_ON_ONCE(work->worker != worker);
|
||||
|
||||
@@ -1090,13 +1090,13 @@ static bool __kthread_cancel_work_sync(s
|
||||
* In the meantime, block any queuing by setting the canceling counter.
|
||||
*/
|
||||
work->canceling++;
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
kthread_flush_work(work);
|
||||
- spin_lock_irqsave(&worker->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&worker->lock, flags);
|
||||
work->canceling--;
|
||||
|
||||
out_fast:
|
||||
- spin_unlock_irqrestore(&worker->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
|
||||
Date: Mon, 8 Oct 2018 14:09:37 +0300
|
||||
Subject: [PATCH] crypto: caam/qi - simplify CGR allocation, freeing
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
[Upstream commit 29e83c757006fd751966bdc53392bb22d74179c6]
|
||||
|
||||
CGRs (Congestion Groups) have to be freed by the same CPU that
|
||||
initialized them.
|
||||
This is why currently the driver takes special measures; however, using
|
||||
set_cpus_allowed_ptr() is incorrect - as reported by Sebastian.
|
||||
|
||||
Instead of the generic solution of replacing set_cpus_allowed_ptr() with
|
||||
work_on_cpu_safe(), we use the qman_delete_cgr_safe() QBMan API instead
|
||||
of qman_delete_cgr() - which internally takes care of proper CGR
|
||||
deletion.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20181005125443.dfhd2asqktm22ney@linutronix.de
|
||||
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
---
|
||||
drivers/crypto/caam/qi.c | 43 ++++---------------------------------------
|
||||
drivers/crypto/caam/qi.h | 2 +-
|
||||
2 files changed, 5 insertions(+), 40 deletions(-)
|
||||
|
||||
--- a/drivers/crypto/caam/qi.c
|
||||
+++ b/drivers/crypto/caam/qi.c
|
||||
@@ -84,13 +84,6 @@ static u64 times_congested;
|
||||
#endif
|
||||
|
||||
/*
|
||||
- * CPU from where the module initialised. This is required because QMan driver
|
||||
- * requires CGRs to be removed from same CPU from where they were originally
|
||||
- * allocated.
|
||||
- */
|
||||
-static int mod_init_cpu;
|
||||
-
|
||||
-/*
|
||||
* This is a a cache of buffers, from which the users of CAAM QI driver
|
||||
* can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than
|
||||
* doing malloc on the hotpath.
|
||||
@@ -492,12 +485,11 @@ void caam_drv_ctx_rel(struct caam_drv_ct
|
||||
}
|
||||
EXPORT_SYMBOL(caam_drv_ctx_rel);
|
||||
|
||||
-int caam_qi_shutdown(struct device *qidev)
|
||||
+void caam_qi_shutdown(struct device *qidev)
|
||||
{
|
||||
- int i, ret;
|
||||
+ int i;
|
||||
struct caam_qi_priv *priv = dev_get_drvdata(qidev);
|
||||
const cpumask_t *cpus = qman_affine_cpus();
|
||||
- struct cpumask old_cpumask = current->cpus_allowed;
|
||||
|
||||
for_each_cpu(i, cpus) {
|
||||
struct napi_struct *irqtask;
|
||||
@@ -510,26 +502,12 @@ int caam_qi_shutdown(struct device *qide
|
||||
dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
|
||||
}
|
||||
|
||||
- /*
|
||||
- * QMan driver requires CGRs to be deleted from same CPU from where they
|
||||
- * were instantiated. Hence we get the module removal execute from the
|
||||
- * same CPU from where it was originally inserted.
|
||||
- */
|
||||
- set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
|
||||
-
|
||||
- ret = qman_delete_cgr(&priv->cgr);
|
||||
- if (ret)
|
||||
- dev_err(qidev, "Deletion of CGR failed: %d\n", ret);
|
||||
- else
|
||||
- qman_release_cgrid(priv->cgr.cgrid);
|
||||
+ qman_delete_cgr_safe(&priv->cgr);
|
||||
+ qman_release_cgrid(priv->cgr.cgrid);
|
||||
|
||||
kmem_cache_destroy(qi_cache);
|
||||
|
||||
- /* Now that we're done with the CGRs, restore the cpus allowed mask */
|
||||
- set_cpus_allowed_ptr(current, &old_cpumask);
|
||||
-
|
||||
platform_device_unregister(priv->qi_pdev);
|
||||
- return ret;
|
||||
}
|
||||
|
||||
static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
|
||||
@@ -718,22 +696,11 @@ int caam_qi_init(struct platform_device
|
||||
struct device *ctrldev = &caam_pdev->dev, *qidev;
|
||||
struct caam_drv_private *ctrlpriv;
|
||||
const cpumask_t *cpus = qman_affine_cpus();
|
||||
- struct cpumask old_cpumask = current->cpus_allowed;
|
||||
static struct platform_device_info qi_pdev_info = {
|
||||
.name = "caam_qi",
|
||||
.id = PLATFORM_DEVID_NONE
|
||||
};
|
||||
|
||||
- /*
|
||||
- * QMAN requires CGRs to be removed from same CPU+portal from where it
|
||||
- * was originally allocated. Hence we need to note down the
|
||||
- * initialisation CPU and use the same CPU for module exit.
|
||||
- * We select the first CPU to from the list of portal owning CPUs.
|
||||
- * Then we pin module init to this CPU.
|
||||
- */
|
||||
- mod_init_cpu = cpumask_first(cpus);
|
||||
- set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
|
||||
-
|
||||
qi_pdev_info.parent = ctrldev;
|
||||
qi_pdev_info.dma_mask = dma_get_mask(ctrldev);
|
||||
qi_pdev = platform_device_register_full(&qi_pdev_info);
|
||||
@@ -795,8 +762,6 @@ int caam_qi_init(struct platform_device
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- /* Done with the CGRs; restore the cpus allowed mask */
|
||||
- set_cpus_allowed_ptr(current, &old_cpumask);
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
|
||||
×_congested, &caam_fops_u64_ro);
|
||||
--- a/drivers/crypto/caam/qi.h
|
||||
+++ b/drivers/crypto/caam/qi.h
|
||||
@@ -174,7 +174,7 @@ int caam_drv_ctx_update(struct caam_drv_
|
||||
void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
|
||||
|
||||
int caam_qi_init(struct platform_device *pdev);
|
||||
-int caam_qi_shutdown(struct device *dev);
|
||||
+void caam_qi_shutdown(struct device *dev);
|
||||
|
||||
/**
|
||||
* qi_cache_alloc - Allocate buffers from CAAM-QI cache
|
||||
@@ -0,0 +1,141 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 7 Jan 2019 13:52:31 +0100
|
||||
Subject: [PATCH] sched/fair: Robustify CFS-bandwidth timer locking
|
||||
|
||||
Traditionally hrtimer callbacks were run with IRQs disabled, but with
|
||||
the introduction of HRTIMER_MODE_SOFT it is possible they run from
|
||||
SoftIRQ context, which does _NOT_ have IRQs disabled.
|
||||
|
||||
Allow for the CFS bandwidth timers (period_timer and slack_timer) to
|
||||
be ran from SoftIRQ context; this entails removing the assumption that
|
||||
IRQs are already disabled from the locking.
|
||||
|
||||
While mainline doesn't strictly need this, -RT forces all timers not
|
||||
explicitly marked with MODE_HARD into MODE_SOFT and trips over this.
|
||||
And marking these timers as MODE_HARD doesn't make sense as they're
|
||||
not required for RT operation and can potentially be quite expensive.
|
||||
|
||||
Cc: Ingo Molnar <mingo@redhat.com>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reported-by: Tom Putzeys <tom.putzeys@be.atlascopco.com>
|
||||
Tested-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/fair.c | 30 ++++++++++++++++--------------
|
||||
1 file changed, 16 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -4553,7 +4553,7 @@ static u64 distribute_cfs_runtime(struct
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct rq_flags rf;
|
||||
|
||||
- rq_lock(rq, &rf);
|
||||
+ rq_lock_irqsave(rq, &rf);
|
||||
if (!cfs_rq_throttled(cfs_rq))
|
||||
goto next;
|
||||
|
||||
@@ -4570,7 +4570,7 @@ static u64 distribute_cfs_runtime(struct
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
|
||||
next:
|
||||
- rq_unlock(rq, &rf);
|
||||
+ rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
if (!remaining)
|
||||
break;
|
||||
@@ -4586,7 +4586,7 @@ static u64 distribute_cfs_runtime(struct
|
||||
* period the timer is deactivated until scheduling resumes; cfs_b->idle is
|
||||
* used to track this state.
|
||||
*/
|
||||
-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
|
||||
{
|
||||
u64 runtime, runtime_expires;
|
||||
int throttled;
|
||||
@@ -4628,11 +4628,11 @@ static int do_sched_cfs_period_timer(str
|
||||
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
|
||||
runtime = cfs_b->runtime;
|
||||
cfs_b->distribute_running = 1;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
/* we can't nest cfs_b->lock while distributing bandwidth */
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime,
|
||||
runtime_expires);
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
|
||||
cfs_b->distribute_running = 0;
|
||||
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
||||
@@ -4741,17 +4741,18 @@ static __always_inline void return_cfs_r
|
||||
static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||
{
|
||||
u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
|
||||
+ unsigned long flags;
|
||||
u64 expires;
|
||||
|
||||
/* confirm we're still not at a refresh boundary */
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
if (cfs_b->distribute_running) {
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4762,18 +4763,18 @@ static void do_sched_cfs_slack_timer(str
|
||||
if (runtime)
|
||||
cfs_b->distribute_running = 1;
|
||||
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
|
||||
if (!runtime)
|
||||
return;
|
||||
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
|
||||
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
if (expires == cfs_b->runtime_expires)
|
||||
cfs_b->runtime -= min(runtime, cfs_b->runtime);
|
||||
cfs_b->distribute_running = 0;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4851,20 +4852,21 @@ static enum hrtimer_restart sched_cfs_pe
|
||||
{
|
||||
struct cfs_bandwidth *cfs_b =
|
||||
container_of(timer, struct cfs_bandwidth, period_timer);
|
||||
+ unsigned long flags;
|
||||
int overrun;
|
||||
int idle = 0;
|
||||
|
||||
- raw_spin_lock(&cfs_b->lock);
|
||||
+ raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
for (;;) {
|
||||
overrun = hrtimer_forward_now(timer, cfs_b->period);
|
||||
if (!overrun)
|
||||
break;
|
||||
|
||||
- idle = do_sched_cfs_period_timer(cfs_b, overrun);
|
||||
+ idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
|
||||
}
|
||||
if (idle)
|
||||
cfs_b->period_active = 0;
|
||||
- raw_spin_unlock(&cfs_b->lock);
|
||||
+ raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
|
||||
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
||||
}
|
||||
411
kernel/patches-4.19.x-rt/0012-arm-convert-boot-lock-to-raw.patch
Normal file
411
kernel/patches-4.19.x-rt/0012-arm-convert-boot-lock-to-raw.patch
Normal file
@@ -0,0 +1,411 @@
|
||||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Mon, 19 Sep 2011 14:51:14 -0700
|
||||
Subject: arm: Convert arm boot_lock to raw
|
||||
|
||||
The arm boot_lock is used by the secondary processor startup code. The locking
|
||||
task is the idle thread, which has idle->sched_class == &idle_sched_class.
|
||||
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
|
||||
lock, the attempt to wake it when the lock becomes available will fail:
|
||||
|
||||
try_to_wake_up()
|
||||
...
|
||||
activate_task()
|
||||
enqueue_task()
|
||||
p->sched_class->enqueue_task(rq, p, flags)
|
||||
|
||||
Fix by converting boot_lock to a raw spin lock.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Tony Lindgren <tony@atomide.com>
|
||||
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
|
||||
Tested-by: Krzysztof Kozlowski <krzk@kernel.org> [Exynos5422 Linaro PM-QA]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
|
||||
arch/arm/mach-hisi/platmcpm.c | 22 +++++++++++-----------
|
||||
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
|
||||
arch/arm/mach-prima2/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-qcom/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-spear/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-sti/platsmp.c | 10 +++++-----
|
||||
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
|
||||
8 files changed, 47 insertions(+), 47 deletions(-)
|
||||
|
||||
--- a/arch/arm/mach-exynos/platsmp.c
|
||||
+++ b/arch/arm/mach-exynos/platsmp.c
|
||||
@@ -239,7 +239,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void exynos_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -252,8 +252,8 @@ static void exynos_secondary_init(unsign
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
|
||||
@@ -317,7 +317,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -344,7 +344,7 @@ static int exynos_boot_secondary(unsigne
|
||||
|
||||
if (timeout == 0) {
|
||||
printk(KERN_ERR "cpu1 power enable failed");
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
@@ -390,7 +390,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
fail:
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? ret : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-hisi/platmcpm.c
|
||||
+++ b/arch/arm/mach-hisi/platmcpm.c
|
||||
@@ -61,7 +61,7 @@
|
||||
|
||||
static void __iomem *sysctrl, *fabric;
|
||||
static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
static u32 fabric_phys_addr;
|
||||
/*
|
||||
* [0]: bootwrapper physical address
|
||||
@@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned
|
||||
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto out;
|
||||
@@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned
|
||||
|
||||
out:
|
||||
hip04_cpu_table[cluster][cpu]++;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l
|
||||
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
||||
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
hip04_cpu_table[cluster][cpu]--;
|
||||
if (hip04_cpu_table[cluster][cpu] == 1) {
|
||||
/* A power_up request went ahead of us. */
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return;
|
||||
} else if (hip04_cpu_table[cluster][cpu] > 1) {
|
||||
pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
|
||||
@@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l
|
||||
}
|
||||
|
||||
last_man = hip04_cluster_is_down(cluster);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
if (last_man) {
|
||||
/* Since it's Cortex A15, disable L2 prefetching. */
|
||||
asm volatile(
|
||||
@@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l
|
||||
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
|
||||
|
||||
count = TIMEOUT_MSEC / POLL_MSEC;
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
for (tries = 0; tries < count; tries++) {
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto err;
|
||||
@@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l
|
||||
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
|
||||
if (data & CORE_WFI_STATUS(cpu))
|
||||
break;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
/* Wait for clean L2 when the whole cluster is down. */
|
||||
msleep(POLL_MSEC);
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
@@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l
|
||||
goto err;
|
||||
if (hip04_cluster_is_down(cluster))
|
||||
hip04_set_snoop_filter(cluster, 0);
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 1;
|
||||
err:
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
--- a/arch/arm/mach-omap2/omap-smp.c
|
||||
+++ b/arch/arm/mach-omap2/omap-smp.c
|
||||
@@ -69,7 +69,7 @@ static const struct omap_smp_config omap
|
||||
.startup_addr = omap5_secondary_startup,
|
||||
};
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void __iomem *omap4_get_scu_base(void)
|
||||
{
|
||||
@@ -177,8 +177,8 @@ static void omap4_secondary_init(unsigne
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -191,7 +191,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Update the AuxCoreBoot0 with boot state for secondary core.
|
||||
@@ -270,7 +270,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/arch/arm/mach-prima2/platsmp.c
|
||||
+++ b/arch/arm/mach-prima2/platsmp.c
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
static void __iomem *clk_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sirfsoc_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsig
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static const struct of_device_id clk_ids[] = {
|
||||
@@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
/* make sure write buffer is drained */
|
||||
mb();
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-qcom/platsmp.c
|
||||
+++ b/arch/arm/mach-qcom/platsmp.c
|
||||
@@ -46,7 +46,7 @@
|
||||
|
||||
extern void secondary_startup_arm(void);
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void qcom_cpu_die(unsigned int cpu)
|
||||
@@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int scss_release_secondary(unsigned int cpu)
|
||||
@@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Send the secondary CPU a soft interrupt, thereby causing
|
||||
@@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
--- a/arch/arm/mach-spear/platsmp.c
|
||||
+++ b/arch/arm/mach-spear/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
|
||||
|
||||
@@ -47,8 +47,8 @@ static void spear13xx_secondary_init(uns
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-sti/platsmp.c
|
||||
+++ b/arch/arm/mach-sti/platsmp.c
|
||||
@@ -35,7 +35,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sti_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/plat-versatile/platsmp.c
|
||||
+++ b/arch/arm/plat-versatile/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void versatile_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned i
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* This is really belt and braces; we hold unintended secondary
|
||||
@@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 17 Jul 2018 18:25:31 +0200
|
||||
Subject: [PATCH] x86/ioapic: Don't let setaffinity unmask threaded EOI
|
||||
interrupt too early
|
||||
|
||||
There is an issue with threaded interrupts which are marked ONESHOT
|
||||
and using the fasteoi handler.
|
||||
|
||||
if (IS_ONESHOT())
|
||||
mask_irq();
|
||||
|
||||
....
|
||||
....
|
||||
|
||||
cond_unmask_eoi_irq()
|
||||
chip->irq_eoi();
|
||||
|
||||
So if setaffinity is pending then the interrupt will be moved and then
|
||||
unmasked, which is wrong as it should be kept masked up to the point where
|
||||
the threaded handler finished. It's not a real problem, the interrupt will
|
||||
just be able to fire before the threaded handler has finished, though the irq
|
||||
masked state will be wrong for a bit.
|
||||
|
||||
The patch below should cure the issue. It also renames the horribly
|
||||
misnomed functions so it becomes clear what they are supposed to do.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: add the body of the patch, use the same functions in both
|
||||
ifdef paths (spotted by Andy Shevchenko)]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/kernel/apic/io_apic.c | 23 +++++++++++++----------
|
||||
1 file changed, 13 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/apic/io_apic.c
|
||||
+++ b/arch/x86/kernel/apic/io_apic.c
|
||||
@@ -1722,19 +1722,20 @@ static bool io_apic_level_ack_pending(st
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
/* If we are moving the irq we need to mask it */
|
||||
if (unlikely(irqd_is_setaffinity_pending(data))) {
|
||||
- mask_ioapic_irq(data);
|
||||
+ if (!irqd_irq_masked(data))
|
||||
+ mask_ioapic_irq(data);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
- if (unlikely(masked)) {
|
||||
+ if (unlikely(moveit)) {
|
||||
/* Only migrate the irq if the ack has been received.
|
||||
*
|
||||
* On rare occasions the broadcast level triggered ack gets
|
||||
@@ -1763,15 +1764,17 @@ static inline void ioapic_irqd_unmask(st
|
||||
*/
|
||||
if (!io_apic_level_ack_pending(data->chip_data))
|
||||
irq_move_masked_irq(data);
|
||||
- unmask_ioapic_irq(data);
|
||||
+ /* If the irq is masked in the core, leave it */
|
||||
+ if (!irqd_irq_masked(data))
|
||||
+ unmask_ioapic_irq(data);
|
||||
}
|
||||
}
|
||||
#else
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -1780,11 +1783,11 @@ static void ioapic_ack_level(struct irq_
|
||||
{
|
||||
struct irq_cfg *cfg = irqd_cfg(irq_data);
|
||||
unsigned long v;
|
||||
- bool masked;
|
||||
+ bool moveit;
|
||||
int i;
|
||||
|
||||
irq_complete_move(cfg);
|
||||
- masked = ioapic_irqd_mask(irq_data);
|
||||
+ moveit = ioapic_prepare_move(irq_data);
|
||||
|
||||
/*
|
||||
* It appears there is an erratum which affects at least version 0x11
|
||||
@@ -1839,7 +1842,7 @@ static void ioapic_ack_level(struct irq_
|
||||
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
|
||||
}
|
||||
|
||||
- ioapic_irqd_unmask(irq_data, masked);
|
||||
+ ioapic_finish_move(irq_data, moveit);
|
||||
}
|
||||
|
||||
static void ioapic_ir_ack_level(struct irq_data *irq_data)
|
||||
@@ -0,0 +1,69 @@
|
||||
From: Yang Shi <yang.shi@linaro.org>
|
||||
Date: Thu, 10 Nov 2016 16:17:55 -0800
|
||||
Subject: [PATCH] arm: kprobe: replace patch_lock to raw lock
|
||||
|
||||
When running kprobe on -rt kernel, the below bug is caught:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
|
||||
in_atomic(): 1, irqs_disabled(): 128, pid: 14, name: migration/0
|
||||
INFO: lockdep is turned off.
|
||||
irq event stamp: 238
|
||||
hardirqs last enabled at (237): [<80b5aecc>] _raw_spin_unlock_irqrestore+0x88/0x90
|
||||
hardirqs last disabled at (238): [<80b56d88>] __schedule+0xec/0x94c
|
||||
softirqs last enabled at (0): [<80225584>] copy_process.part.5+0x30c/0x1994
|
||||
softirqs last disabled at (0): [< (null)>] (null)
|
||||
Preemption disabled at:[<802f2b98>] cpu_stopper_thread+0xc0/0x140
|
||||
|
||||
CPU: 0 PID: 14 Comm: migration/0 Tainted: G O 4.8.3-rt2 #1
|
||||
Hardware name: Freescale LS1021A
|
||||
[<80212e7c>] (unwind_backtrace) from [<8020cd2c>] (show_stack+0x20/0x24)
|
||||
[<8020cd2c>] (show_stack) from [<80689e14>] (dump_stack+0xa0/0xcc)
|
||||
[<80689e14>] (dump_stack) from [<8025a43c>] (___might_sleep+0x1b8/0x2a4)
|
||||
[<8025a43c>] (___might_sleep) from [<80b5b324>] (rt_spin_lock+0x34/0x74)
|
||||
[<80b5b324>] (rt_spin_lock) from [<80b5c31c>] (__patch_text_real+0x70/0xe8)
|
||||
[<80b5c31c>] (__patch_text_real) from [<80b5c3ac>] (patch_text_stop_machine+0x18/0x20)
|
||||
[<80b5c3ac>] (patch_text_stop_machine) from [<802f2920>] (multi_cpu_stop+0xfc/0x134)
|
||||
[<802f2920>] (multi_cpu_stop) from [<802f2ba0>] (cpu_stopper_thread+0xc8/0x140)
|
||||
[<802f2ba0>] (cpu_stopper_thread) from [<802563a4>] (smpboot_thread_fn+0x1a4/0x354)
|
||||
[<802563a4>] (smpboot_thread_fn) from [<80251d38>] (kthread+0x104/0x11c)
|
||||
[<80251d38>] (kthread) from [<80207f70>] (ret_from_fork+0x14/0x24)
|
||||
|
||||
Since patch_text_stop_machine() is called in stop_machine() which disables IRQ,
|
||||
sleepable lock should be not used in this atomic context, so replace patch_lock
|
||||
to raw lock.
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/patch.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/arm/kernel/patch.c
|
||||
+++ b/arch/arm/kernel/patch.c
|
||||
@@ -16,7 +16,7 @@ struct patch {
|
||||
unsigned int insn;
|
||||
};
|
||||
|
||||
-static DEFINE_SPINLOCK(patch_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(patch_lock);
|
||||
|
||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||
__acquires(&patch_lock)
|
||||
@@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *a
|
||||
return addr;
|
||||
|
||||
if (flags)
|
||||
- spin_lock_irqsave(&patch_lock, *flags);
|
||||
+ raw_spin_lock_irqsave(&patch_lock, *flags);
|
||||
else
|
||||
__acquire(&patch_lock);
|
||||
|
||||
@@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fi
|
||||
clear_fixmap(fixmap);
|
||||
|
||||
if (flags)
|
||||
- spin_unlock_irqrestore(&patch_lock, *flags);
|
||||
+ raw_spin_unlock_irqrestore(&patch_lock, *flags);
|
||||
else
|
||||
__release(&patch_lock);
|
||||
}
|
||||
83
kernel/patches-4.19.x-rt/0015-arm-unwind-use_raw_lock.patch
Normal file
83
kernel/patches-4.19.x-rt/0015-arm-unwind-use_raw_lock.patch
Normal file
@@ -0,0 +1,83 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 20 Sep 2013 14:31:54 +0200
|
||||
Subject: arm/unwind: use a raw_spin_lock
|
||||
|
||||
Mostly unwind is done with irqs enabled however SLUB may call it with
|
||||
irqs disabled while creating a new SLUB cache.
|
||||
|
||||
I had system freeze while loading a module which called
|
||||
kmem_cache_create() on init. That means SLUB's __slab_alloc() disabled
|
||||
interrupts and then
|
||||
|
||||
->new_slab_objects()
|
||||
->new_slab()
|
||||
->setup_object()
|
||||
->setup_object_debug()
|
||||
->init_tracking()
|
||||
->set_track()
|
||||
->save_stack_trace()
|
||||
->save_stack_trace_tsk()
|
||||
->walk_stackframe()
|
||||
->unwind_frame()
|
||||
->unwind_find_idx()
|
||||
=>spin_lock_irqsave(&unwind_lock);
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/unwind.c | 14 +++++++-------
|
||||
1 file changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm/kernel/unwind.c
|
||||
+++ b/arch/arm/kernel/unwind.c
|
||||
@@ -93,7 +93,7 @@ extern const struct unwind_idx __start_u
|
||||
static const struct unwind_idx *__origin_unwind_idx;
|
||||
extern const struct unwind_idx __stop_unwind_idx[];
|
||||
|
||||
-static DEFINE_SPINLOCK(unwind_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(unwind_lock);
|
||||
static LIST_HEAD(unwind_tables);
|
||||
|
||||
/* Convert a prel31 symbol to an absolute address */
|
||||
@@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_f
|
||||
/* module unwind tables */
|
||||
struct unwind_table *table;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_for_each_entry(table, &unwind_tables, list) {
|
||||
if (addr >= table->begin_addr &&
|
||||
addr < table->end_addr) {
|
||||
@@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_f
|
||||
break;
|
||||
}
|
||||
}
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
}
|
||||
|
||||
pr_debug("%s: idx = %p\n", __func__, idx);
|
||||
@@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(un
|
||||
tab->begin_addr = text_addr;
|
||||
tab->end_addr = text_addr + text_size;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_add_tail(&tab->list, &unwind_tables);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
return tab;
|
||||
}
|
||||
@@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_tabl
|
||||
if (!tab)
|
||||
return;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_del(&tab->list);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
kfree(tab);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 3 Jul 2018 18:19:48 +0200
|
||||
Subject: [PATCH] cgroup: use irqsave in cgroup_rstat_flush_locked()
|
||||
|
||||
All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock
|
||||
either with spin_lock_irq() or spin_lock_irqsave().
|
||||
cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which
|
||||
is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated()
|
||||
in IRQ context and therefore requires _irqsave() locking suffix in
|
||||
cgroup_rstat_flush_locked().
|
||||
Since there is no difference between spin_lock_t and raw_spin_lock_t
|
||||
on !RT lockdep does not complain here. On RT lockdep complains because
|
||||
the interrupts were not disabled here and a deadlock is possible.
|
||||
|
||||
Acquire the raw_spin_lock_t with disabled interrupts.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cgroup/rstat.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -157,8 +157,9 @@ static void cgroup_rstat_flush_locked(st
|
||||
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
|
||||
cpu);
|
||||
struct cgroup *pos = NULL;
|
||||
+ unsigned long flags;
|
||||
|
||||
- raw_spin_lock(cpu_lock);
|
||||
+ raw_spin_lock_irqsave(cpu_lock, flags);
|
||||
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
@@ -170,7 +171,7 @@ static void cgroup_rstat_flush_locked(st
|
||||
css->ss->css_rstat_flush(css, cpu);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
- raw_spin_unlock(cpu_lock);
|
||||
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
|
||||
|
||||
/* if @may_sleep, play nice and yield if necessary */
|
||||
if (may_sleep && (need_resched() ||
|
||||
@@ -0,0 +1,53 @@
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Tue, 3 Jul 2018 13:34:30 -0500
|
||||
Subject: [PATCH] fscache: initialize cookie hash table raw spinlocks
|
||||
|
||||
The fscache cookie mechanism uses a hash table of hlist_bl_head structures. The
|
||||
PREEMPT_RT patcheset adds a raw spinlock to this structure and so on PREEMPT_RT
|
||||
the structures get used uninitialized, causing warnings about bad magic numbers
|
||||
when spinlock debugging is turned on.
|
||||
|
||||
Use the init function for fscache cookies.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/fscache/cookie.c | 8 ++++++++
|
||||
fs/fscache/main.c | 1 +
|
||||
include/linux/fscache.h | 1 +
|
||||
3 files changed, 10 insertions(+)
|
||||
|
||||
--- a/fs/fscache/cookie.c
|
||||
+++ b/fs/fscache/cookie.c
|
||||
@@ -962,3 +962,11 @@ int __fscache_check_consistency(struct f
|
||||
return -ESTALE;
|
||||
}
|
||||
EXPORT_SYMBOL(__fscache_check_consistency);
|
||||
+
|
||||
+void __init fscache_cookie_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < (1 << fscache_cookie_hash_shift) - 1; i++)
|
||||
+ INIT_HLIST_BL_HEAD(&fscache_cookie_hash[i]);
|
||||
+}
|
||||
--- a/fs/fscache/main.c
|
||||
+++ b/fs/fscache/main.c
|
||||
@@ -149,6 +149,7 @@ static int __init fscache_init(void)
|
||||
ret = -ENOMEM;
|
||||
goto error_cookie_jar;
|
||||
}
|
||||
+ fscache_cookie_init();
|
||||
|
||||
fscache_root = kobject_create_and_add("fscache", kernel_kobj);
|
||||
if (!fscache_root)
|
||||
--- a/include/linux/fscache.h
|
||||
+++ b/include/linux/fscache.h
|
||||
@@ -230,6 +230,7 @@ extern void __fscache_readpages_cancel(s
|
||||
extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
|
||||
extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
|
||||
bool (*)(void *), void *);
|
||||
+extern void fscache_cookie_init(void);
|
||||
|
||||
/**
|
||||
* fscache_register_netfs - Register a filesystem as desiring caching services
|
||||
@@ -0,0 +1,33 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 29 Aug 2018 21:59:04 +0200
|
||||
Subject: [PATCH] Drivers: hv: vmbus: include header for get_irq_regs()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
On !RT the header file get_irq_regs() gets pulled in via other header files. On
|
||||
RT it does not and the build fails:
|
||||
|
||||
drivers/hv/vmbus_drv.c:975 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration]
|
||||
drivers/hv/hv.c:115 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration]
|
||||
|
||||
Add the header file for get_irq_regs() in a common header so it used by
|
||||
vmbus_drv.c by hv.c for their get_irq_regs() usage.
|
||||
|
||||
Reported-by: Bernhard Landauer <oberon@manjaro.org>
|
||||
Reported-by: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/hv/hyperv_vmbus.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/hv/hyperv_vmbus.h
|
||||
+++ b/drivers/hv/hyperv_vmbus.h
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/hyperv.h>
|
||||
#include <linux/interrupt.h>
|
||||
+#include <linux/irq.h>
|
||||
|
||||
#include "hv_trace.h"
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 11 Oct 2018 16:39:59 +0200
|
||||
Subject: [PATCH] percpu: include irqflags.h for raw_local_irq_save()
|
||||
|
||||
The header percpu.h header file is using raw_local_irq_save() but does
|
||||
not include irqflags.h for its definition. It compiles because the
|
||||
header file is included via an other header file.
|
||||
On -RT the build fails because raw_local_irq_save() is not defined.
|
||||
|
||||
Include irqflags.h in percpu.h.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/asm-generic/percpu.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/include/asm-generic/percpu.h
|
||||
+++ b/include/asm-generic/percpu.h
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
+#include <linux/irqflags.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
25
kernel/patches-4.19.x-rt/0020-efi-Allow-efi-runtime.patch
Normal file
25
kernel/patches-4.19.x-rt/0020-efi-Allow-efi-runtime.patch
Normal file
@@ -0,0 +1,25 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 15:06:10 +0200
|
||||
Subject: [PATCH] efi: Allow efi=runtime
|
||||
|
||||
In case the option "efi=noruntime" is default at built-time, the user
|
||||
could overwrite its sate by `efi=runtime' and allow it again.
|
||||
|
||||
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/firmware/efi/efi.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/drivers/firmware/efi/efi.c
|
||||
+++ b/drivers/firmware/efi/efi.c
|
||||
@@ -113,6 +113,9 @@ static int __init parse_efi_cmdline(char
|
||||
if (parse_option_str(str, "noruntime"))
|
||||
disable_runtime = true;
|
||||
|
||||
+ if (parse_option_str(str, "runtime"))
|
||||
+ disable_runtime = false;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
early_param("efi", parse_efi_cmdline);
|
||||
@@ -0,0 +1,48 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 24 Jul 2018 14:48:55 +0200
|
||||
Subject: [PATCH] x86/efi: drop task_lock() from efi_switch_mm()
|
||||
|
||||
efi_switch_mm() is a wrapper around switch_mm() which saves current's
|
||||
->active_mm, sets the requests mm as ->active_mm and invokes
|
||||
switch_mm().
|
||||
I don't think that task_lock() is required during that procedure. It
|
||||
protects ->mm which isn't changed here.
|
||||
|
||||
It needs to be mentioned that during the whole procedure (switch to
|
||||
EFI's mm and back) the preemption needs to be disabled. A context switch
|
||||
at this point would reset the cr3 value based on current->mm. Also, this
|
||||
function may not be invoked at the same time on a different CPU because
|
||||
it would overwrite the efi_scratch.prev_mm information.
|
||||
|
||||
Remove task_lock() and also update the comment to reflect it.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/platform/efi/efi_64.c | 10 ++++------
|
||||
1 file changed, 4 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/arch/x86/platform/efi/efi_64.c
|
||||
+++ b/arch/x86/platform/efi/efi_64.c
|
||||
@@ -619,18 +619,16 @@ void __init efi_dump_pagetable(void)
|
||||
|
||||
/*
|
||||
* Makes the calling thread switch to/from efi_mm context. Can be used
|
||||
- * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well
|
||||
- * as during efi runtime calls i.e current->active_mm == current_mm.
|
||||
- * We are not mm_dropping()/mm_grabbing() any mm, because we are not
|
||||
- * losing/creating any references.
|
||||
+ * in a kernel thread and user context. Preemption needs to remain disabled
|
||||
+ * while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm
|
||||
+ * can not change under us.
|
||||
+ * It should be ensured that there are no concurent calls to this function.
|
||||
*/
|
||||
void efi_switch_mm(struct mm_struct *mm)
|
||||
{
|
||||
- task_lock(current);
|
||||
efi_scratch.prev_mm = current->active_mm;
|
||||
current->active_mm = mm;
|
||||
switch_mm(efi_scratch.prev_mm, mm, NULL);
|
||||
- task_unlock(current);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EFI_MIXED
|
||||
@@ -0,0 +1,71 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 09:13:42 +0200
|
||||
Subject: [PATCH] arm64: KVM: compute_layout before altenates are applied
|
||||
|
||||
compute_layout() is invoked as part of an alternative fixup under
|
||||
stop_machine() and needs a sleeping lock as part of get_random_long().
|
||||
|
||||
Invoke compute_layout() before the alternatives are applied.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm64/include/asm/alternative.h | 6 ++++++
|
||||
arch/arm64/kernel/alternative.c | 1 +
|
||||
arch/arm64/kvm/va_layout.c | 7 +------
|
||||
3 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/arch/arm64/include/asm/alternative.h
|
||||
+++ b/arch/arm64/include/asm/alternative.h
|
||||
@@ -35,6 +35,12 @@ void apply_alternatives_module(void *sta
|
||||
static inline void apply_alternatives_module(void *start, size_t length) { }
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_KVM_ARM_HOST
|
||||
+void kvm_compute_layout(void);
|
||||
+#else
|
||||
+static inline void kvm_compute_layout(void) { }
|
||||
+#endif
|
||||
+
|
||||
#define ALTINSTR_ENTRY(feature,cb) \
|
||||
" .word 661b - .\n" /* label */ \
|
||||
" .if " __stringify(cb) " == 0\n" \
|
||||
--- a/arch/arm64/kernel/alternative.c
|
||||
+++ b/arch/arm64/kernel/alternative.c
|
||||
@@ -224,6 +224,7 @@ static int __apply_alternatives_multi_st
|
||||
void __init apply_alternatives_all(void)
|
||||
{
|
||||
/* better not try code patching on a live SMP system */
|
||||
+ kvm_compute_layout();
|
||||
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
|
||||
}
|
||||
|
||||
--- a/arch/arm64/kvm/va_layout.c
|
||||
+++ b/arch/arm64/kvm/va_layout.c
|
||||
@@ -33,7 +33,7 @@ static u8 tag_lsb;
|
||||
static u64 tag_val;
|
||||
static u64 va_mask;
|
||||
|
||||
-static void compute_layout(void)
|
||||
+__init void kvm_compute_layout(void)
|
||||
{
|
||||
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
|
||||
u64 hyp_va_msb;
|
||||
@@ -121,8 +121,6 @@ void __init kvm_update_va_mask(struct al
|
||||
|
||||
BUG_ON(nr_inst != 5);
|
||||
|
||||
- if (!has_vhe() && !va_mask)
|
||||
- compute_layout();
|
||||
|
||||
for (i = 0; i < nr_inst; i++) {
|
||||
u32 rd, rn, insn, oinsn;
|
||||
@@ -167,9 +165,6 @@ void kvm_patch_vector_branch(struct alt_
|
||||
return;
|
||||
}
|
||||
|
||||
- if (!va_mask)
|
||||
- compute_layout();
|
||||
-
|
||||
/*
|
||||
* Compute HYP VA by using the same computation as kern_hyp_va()
|
||||
*/
|
||||
@@ -0,0 +1,95 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 31 Aug 2018 14:16:30 +0200
|
||||
Subject: [PATCH] of: allocate / free phandle cache outside of the devtree_lock
|
||||
|
||||
The phandle cache code allocates memory while holding devtree_lock which
|
||||
is a raw_spinlock_t. Memory allocation (and free()) is not possible on
|
||||
RT while a raw_spinlock_t is held.
|
||||
Invoke the kfree() and kcalloc() while the lock is dropped.
|
||||
|
||||
Cc: Rob Herring <robh+dt@kernel.org>
|
||||
Cc: Frank Rowand <frowand.list@gmail.com>
|
||||
Cc: devicetree@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/of/base.c | 19 +++++++++++++------
|
||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/of/base.c
|
||||
+++ b/drivers/of/base.c
|
||||
@@ -130,31 +130,34 @@ static u32 phandle_cache_mask;
|
||||
/*
|
||||
* Caller must hold devtree_lock.
|
||||
*/
|
||||
-static void __of_free_phandle_cache(void)
|
||||
+static struct device_node** __of_free_phandle_cache(void)
|
||||
{
|
||||
u32 cache_entries = phandle_cache_mask + 1;
|
||||
u32 k;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
if (!phandle_cache)
|
||||
- return;
|
||||
+ return NULL;
|
||||
|
||||
for (k = 0; k < cache_entries; k++)
|
||||
of_node_put(phandle_cache[k]);
|
||||
|
||||
- kfree(phandle_cache);
|
||||
+ shadow = phandle_cache;
|
||||
phandle_cache = NULL;
|
||||
+ return shadow;
|
||||
}
|
||||
|
||||
int of_free_phandle_cache(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
|
||||
- __of_free_phandle_cache();
|
||||
+ shadow = __of_free_phandle_cache();
|
||||
|
||||
raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
-
|
||||
+ kfree(shadow);
|
||||
return 0;
|
||||
}
|
||||
#if !defined(CONFIG_MODULES)
|
||||
@@ -189,10 +192,11 @@ void of_populate_phandle_cache(void)
|
||||
u32 cache_entries;
|
||||
struct device_node *np;
|
||||
u32 phandles = 0;
|
||||
+ struct device_node **shadow;
|
||||
|
||||
raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
|
||||
- __of_free_phandle_cache();
|
||||
+ shadow = __of_free_phandle_cache();
|
||||
|
||||
for_each_of_allnodes(np)
|
||||
if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
|
||||
@@ -200,12 +204,14 @@ void of_populate_phandle_cache(void)
|
||||
|
||||
if (!phandles)
|
||||
goto out;
|
||||
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
|
||||
cache_entries = roundup_pow_of_two(phandles);
|
||||
phandle_cache_mask = cache_entries - 1;
|
||||
|
||||
phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
|
||||
GFP_ATOMIC);
|
||||
+ raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
if (!phandle_cache)
|
||||
goto out;
|
||||
|
||||
@@ -217,6 +223,7 @@ void of_populate_phandle_cache(void)
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
+ kfree(shadow);
|
||||
}
|
||||
|
||||
void __init of_core_init(void)
|
||||
@@ -0,0 +1,91 @@
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Tue, 18 Sep 2018 10:29:31 -0500
|
||||
Subject: [PATCH] mm/kasan: make quarantine_lock a raw_spinlock_t
|
||||
|
||||
The static lock quarantine_lock is used in quarantine.c to protect the
|
||||
quarantine queue datastructures. It is taken inside quarantine queue
|
||||
manipulation routines (quarantine_put(), quarantine_reduce() and
|
||||
quarantine_remove_cache()), with IRQs disabled.
|
||||
This is not a problem on a stock kernel but is problematic on an RT
|
||||
kernel where spin locks are sleeping spinlocks, which can sleep and can
|
||||
not be acquired with disabled interrupts.
|
||||
|
||||
Convert the quarantine_lock to a raw spinlock_t. The usage of
|
||||
quarantine_lock is confined to quarantine.c and the work performed while
|
||||
the lock is held is limited.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/kasan/quarantine.c | 18 +++++++++---------
|
||||
1 file changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/mm/kasan/quarantine.c
|
||||
+++ b/mm/kasan/quarantine.c
|
||||
@@ -103,7 +103,7 @@ static int quarantine_head;
|
||||
static int quarantine_tail;
|
||||
/* Total size of all objects in global_quarantine across all batches. */
|
||||
static unsigned long quarantine_size;
|
||||
-static DEFINE_SPINLOCK(quarantine_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(quarantine_lock);
|
||||
DEFINE_STATIC_SRCU(remove_cache_srcu);
|
||||
|
||||
/* Maximum size of the global queue. */
|
||||
@@ -190,7 +190,7 @@ void quarantine_put(struct kasan_free_me
|
||||
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
|
||||
qlist_move_all(q, &temp);
|
||||
|
||||
- spin_lock(&quarantine_lock);
|
||||
+ raw_spin_lock(&quarantine_lock);
|
||||
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
|
||||
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
|
||||
if (global_quarantine[quarantine_tail].bytes >=
|
||||
@@ -203,7 +203,7 @@ void quarantine_put(struct kasan_free_me
|
||||
if (new_tail != quarantine_head)
|
||||
quarantine_tail = new_tail;
|
||||
}
|
||||
- spin_unlock(&quarantine_lock);
|
||||
+ raw_spin_unlock(&quarantine_lock);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
@@ -230,7 +230,7 @@ void quarantine_reduce(void)
|
||||
* expected case).
|
||||
*/
|
||||
srcu_idx = srcu_read_lock(&remove_cache_srcu);
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
|
||||
/*
|
||||
* Update quarantine size in case of hotplug. Allocate a fraction of
|
||||
@@ -254,7 +254,7 @@ void quarantine_reduce(void)
|
||||
quarantine_head = 0;
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, NULL);
|
||||
srcu_read_unlock(&remove_cache_srcu, srcu_idx);
|
||||
@@ -310,17 +310,17 @@ void quarantine_remove_cache(struct kmem
|
||||
*/
|
||||
on_each_cpu(per_cpu_remove_cache, cache, 1);
|
||||
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
for (i = 0; i < QUARANTINE_BATCHES; i++) {
|
||||
if (qlist_empty(&global_quarantine[i]))
|
||||
continue;
|
||||
qlist_move_cache(&global_quarantine[i], &to_free, cache);
|
||||
/* Scanning whole quarantine can take a while. */
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
cond_resched();
|
||||
- spin_lock_irqsave(&quarantine_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
|
||||
}
|
||||
- spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, cache);
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
From: Paul E. McKenney <paulmck@linux.ibm.com>
|
||||
Date: Mon, 29 Oct 2018 11:53:01 +0100
|
||||
Subject: [PATCH] EXP rcu: Revert expedited GP parallelization cleverness
|
||||
|
||||
(Commit 258ba8e089db23f760139266c232f01bad73f85c from linux-rcu)
|
||||
|
||||
This commit reverts a series of commits starting with fcc635436501 ("rcu:
|
||||
Make expedited GPs handle CPU 0 being offline") and its successors, thus
|
||||
queueing each rcu_node structure's expedited grace-period initialization
|
||||
work on the first CPU of that rcu_node structure.
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/rcu/tree_exp.h | 9 +--------
|
||||
1 file changed, 1 insertion(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/rcu/tree_exp.h
|
||||
+++ b/kernel/rcu/tree_exp.h
|
||||
@@ -472,7 +472,6 @@ static void sync_rcu_exp_select_node_cpu
|
||||
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
smp_call_func_t func)
|
||||
{
|
||||
- int cpu;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
|
||||
@@ -494,13 +493,7 @@ static void sync_rcu_exp_select_cpus(str
|
||||
continue;
|
||||
}
|
||||
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
|
||||
- preempt_disable();
|
||||
- cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
|
||||
- /* If all offline, queue the work on an unbound CPU. */
|
||||
- if (unlikely(cpu > rnp->grphi))
|
||||
- cpu = WORK_CPU_UNBOUND;
|
||||
- queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
|
||||
- preempt_enable();
|
||||
+ queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
|
||||
rnp->exp_need_flush = true;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
From: He Zhe <zhe.he@windriver.com>
|
||||
Date: Wed, 19 Dec 2018 16:30:57 +0100
|
||||
Subject: [PATCH] kmemleak: Turn kmemleak_lock to raw spinlock on RT
|
||||
|
||||
kmemleak_lock, as a rwlock on RT, can possibly be held in atomic context and
|
||||
causes the follow BUG.
|
||||
|
||||
BUG: scheduling while atomic: migration/15/132/0x00000002
|
||||
Preemption disabled at:
|
||||
[<ffffffff8c927c11>] cpu_stopper_thread+0x71/0x100
|
||||
CPU: 15 PID: 132 Comm: migration/15 Not tainted 4.19.0-rt1-preempt-rt #1
|
||||
Call Trace:
|
||||
schedule+0x3d/0xe0
|
||||
__rt_spin_lock+0x26/0x30
|
||||
__write_rt_lock+0x23/0x1a0
|
||||
rt_write_lock+0x2a/0x30
|
||||
find_and_remove_object+0x1e/0x80
|
||||
delete_object_full+0x10/0x20
|
||||
kmemleak_free+0x32/0x50
|
||||
kfree+0x104/0x1f0
|
||||
intel_pmu_cpu_dying+0x67/0x70
|
||||
x86_pmu_dying_cpu+0x1a/0x30
|
||||
cpuhp_invoke_callback+0x92/0x700
|
||||
take_cpu_down+0x70/0xa0
|
||||
multi_cpu_stop+0x62/0xc0
|
||||
cpu_stopper_thread+0x79/0x100
|
||||
smpboot_thread_fn+0x20f/0x2d0
|
||||
kthread+0x121/0x140
|
||||
|
||||
And on v4.18 stable tree the following call trace, caused by grabbing
|
||||
kmemleak_lock again, is also observed.
|
||||
|
||||
kernel BUG at kernel/locking/rtmutex.c:1048!
|
||||
CPU: 5 PID: 689 Comm: mkfs.ext4 Not tainted 4.18.16-rt9-preempt-rt #1
|
||||
Call Trace:
|
||||
rt_write_lock+0x2a/0x30
|
||||
create_object+0x17d/0x2b0
|
||||
kmemleak_alloc+0x34/0x50
|
||||
kmem_cache_alloc+0x146/0x220
|
||||
mempool_alloc_slab+0x15/0x20
|
||||
mempool_alloc+0x65/0x170
|
||||
sg_pool_alloc+0x21/0x60
|
||||
sg_alloc_table_chained+0x8b/0xb0
|
||||
…
|
||||
blk_flush_plug_list+0x204/0x230
|
||||
schedule+0x87/0xe0
|
||||
rt_write_lock+0x2a/0x30
|
||||
create_object+0x17d/0x2b0
|
||||
kmemleak_alloc+0x34/0x50
|
||||
__kmalloc_node+0x1cd/0x340
|
||||
alloc_request_size+0x30/0x70
|
||||
mempool_alloc+0x65/0x170
|
||||
get_request+0x4e3/0x8d0
|
||||
blk_queue_bio+0x153/0x470
|
||||
generic_make_request+0x1dc/0x3f0
|
||||
submit_bio+0x49/0x140
|
||||
…
|
||||
|
||||
kmemleak is an error detecting feature. We would not expect as good performance
|
||||
as without it. As there is no raw rwlock defining helpers, we turn kmemleak_lock
|
||||
to a raw spinlock.
|
||||
|
||||
Signed-off-by: He Zhe <zhe.he@windriver.com>
|
||||
Cc: catalin.marinas@arm.com
|
||||
Cc: bigeasy@linutronix.de
|
||||
Cc: tglx@linutronix.de
|
||||
Cc: rostedt@goodmis.org
|
||||
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
|
||||
Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com
|
||||
Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/kmemleak.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/mm/kmemleak.c
|
||||
+++ b/mm/kmemleak.c
|
||||
@@ -26,7 +26,7 @@
|
||||
*
|
||||
* The following locks and mutexes are used by kmemleak:
|
||||
*
|
||||
- * - kmemleak_lock (rwlock): protects the object_list modifications and
|
||||
+ * - kmemleak_lock (raw spinlock): protects the object_list modifications and
|
||||
* accesses to the object_tree_root. The object_list is the main list
|
||||
* holding the metadata (struct kmemleak_object) for the allocated memory
|
||||
* blocks. The object_tree_root is a red black tree used to look-up
|
||||
@@ -197,7 +197,7 @@ static LIST_HEAD(gray_list);
|
||||
/* search tree for object boundaries */
|
||||
static struct rb_root object_tree_root = RB_ROOT;
|
||||
/* rw_lock protecting the access to object_list and object_tree_root */
|
||||
-static DEFINE_RWLOCK(kmemleak_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(kmemleak_lock);
|
||||
|
||||
/* allocation caches for kmemleak internal data */
|
||||
static struct kmem_cache *object_cache;
|
||||
@@ -491,9 +491,9 @@ static struct kmemleak_object *find_and_
|
||||
struct kmemleak_object *object;
|
||||
|
||||
rcu_read_lock();
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
/* check whether the object is still available */
|
||||
if (object && !get_object(object))
|
||||
@@ -513,13 +513,13 @@ static struct kmemleak_object *find_and_
|
||||
unsigned long flags;
|
||||
struct kmemleak_object *object;
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
if (object) {
|
||||
rb_erase(&object->rb_node, &object_tree_root);
|
||||
list_del_rcu(&object->object_list);
|
||||
}
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
return object;
|
||||
}
|
||||
@@ -593,7 +593,7 @@ static struct kmemleak_object *create_ob
|
||||
/* kernel backtrace */
|
||||
object->trace_len = __save_stack_trace(object->trace);
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
|
||||
min_addr = min(min_addr, ptr);
|
||||
max_addr = max(max_addr, ptr + size);
|
||||
@@ -624,7 +624,7 @@ static struct kmemleak_object *create_ob
|
||||
|
||||
list_add_tail_rcu(&object->object_list, &object_list);
|
||||
out:
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
return object;
|
||||
}
|
||||
|
||||
@@ -1310,7 +1310,7 @@ static void scan_block(void *_start, voi
|
||||
unsigned long *end = _end - (BYTES_PER_POINTER - 1);
|
||||
unsigned long flags;
|
||||
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
for (ptr = start; ptr < end; ptr++) {
|
||||
struct kmemleak_object *object;
|
||||
unsigned long pointer;
|
||||
@@ -1367,7 +1367,7 @@ static void scan_block(void *_start, voi
|
||||
spin_unlock(&object->lock);
|
||||
}
|
||||
}
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -0,0 +1,127 @@
|
||||
Date: Fri, 28 Oct 2016 23:05:11 +0200
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
To: Trond Myklebust <trond.myklebust@primarydata.com>
|
||||
Cc: Anna Schumaker <anna.schumaker@netapp.com>,
|
||||
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
|
||||
tglx@linutronix.de
|
||||
Subject: NFSv4: replace seqcount_t with a seqlock_t
|
||||
|
||||
The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
|
||||
because it maps to preempt_disable() in -RT which I can't have at this
|
||||
point. So I took a look at the code.
|
||||
It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use
|
||||
raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because
|
||||
lockdep complained. The whole seqcount thing was introduced in commit
|
||||
c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as
|
||||
being recovered").
|
||||
The recovery threads runs only once.
|
||||
write_seqlock() does not work on !RT because it disables preemption and it the
|
||||
writer side is preemptible (has to remain so despite the fact that it will
|
||||
block readers).
|
||||
|
||||
Reported-by: kernel test robot <xiaolong.ye@intel.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/nfs/delegation.c | 4 ++--
|
||||
fs/nfs/nfs4_fs.h | 2 +-
|
||||
fs/nfs/nfs4proc.c | 4 ++--
|
||||
fs/nfs/nfs4state.c | 22 ++++++++++++++++------
|
||||
4 files changed, 21 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/fs/nfs/delegation.c
|
||||
+++ b/fs/nfs/delegation.c
|
||||
@@ -152,11 +152,11 @@ static int nfs_delegation_claim_opens(st
|
||||
sp = state->owner;
|
||||
/* Block nfs4_proc_unlck */
|
||||
mutex_lock(&sp->so_delegreturn_mutex);
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = read_seqbegin(&sp->so_reclaim_seqlock);
|
||||
err = nfs4_open_delegation_recall(ctx, state, stateid, type);
|
||||
if (!err)
|
||||
err = nfs_delegation_claim_locks(ctx, state, stateid);
|
||||
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
err = -EAGAIN;
|
||||
mutex_unlock(&sp->so_delegreturn_mutex);
|
||||
put_nfs_open_context(ctx);
|
||||
--- a/fs/nfs/nfs4_fs.h
|
||||
+++ b/fs/nfs/nfs4_fs.h
|
||||
@@ -114,7 +114,7 @@ struct nfs4_state_owner {
|
||||
unsigned long so_flags;
|
||||
struct list_head so_states;
|
||||
struct nfs_seqid_counter so_seqid;
|
||||
- seqcount_t so_reclaim_seqcount;
|
||||
+ seqlock_t so_reclaim_seqlock;
|
||||
struct mutex so_delegreturn_mutex;
|
||||
};
|
||||
|
||||
--- a/fs/nfs/nfs4proc.c
|
||||
+++ b/fs/nfs/nfs4proc.c
|
||||
@@ -2859,7 +2859,7 @@ static int _nfs4_open_and_get_state(stru
|
||||
unsigned int seq;
|
||||
int ret;
|
||||
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
|
||||
ret = _nfs4_proc_open(opendata, ctx);
|
||||
if (ret != 0)
|
||||
@@ -2900,7 +2900,7 @@ static int _nfs4_open_and_get_state(stru
|
||||
|
||||
if (d_inode(dentry) == state->inode) {
|
||||
nfs_inode_attach_open_context(ctx);
|
||||
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
nfs4_schedule_stateid_recovery(server, state);
|
||||
}
|
||||
|
||||
--- a/fs/nfs/nfs4state.c
|
||||
+++ b/fs/nfs/nfs4state.c
|
||||
@@ -511,7 +511,7 @@ nfs4_alloc_state_owner(struct nfs_server
|
||||
nfs4_init_seqid_counter(&sp->so_seqid);
|
||||
atomic_set(&sp->so_count, 1);
|
||||
INIT_LIST_HEAD(&sp->so_lru);
|
||||
- seqcount_init(&sp->so_reclaim_seqcount);
|
||||
+ seqlock_init(&sp->so_reclaim_seqlock);
|
||||
mutex_init(&sp->so_delegreturn_mutex);
|
||||
return sp;
|
||||
}
|
||||
@@ -1564,8 +1564,12 @@ static int nfs4_reclaim_open_state(struc
|
||||
* recovering after a network partition or a reboot from a
|
||||
* server that doesn't support a grace period.
|
||||
*/
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_seqlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
restart:
|
||||
list_for_each_entry(state, &sp->so_states, open_states) {
|
||||
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
|
||||
@@ -1652,14 +1656,20 @@ static int nfs4_reclaim_open_state(struc
|
||||
spin_lock(&sp->so_lock);
|
||||
goto restart;
|
||||
}
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return 0;
|
||||
out_err:
|
||||
nfs4_put_open_state(state);
|
||||
- spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
- spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,733 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 4 Apr 2017 12:50:16 +0200
|
||||
Subject: [PATCH] kernel: sched: Provide a pointer to the valid CPU mask
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
In commit 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed()
|
||||
wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not
|
||||
much difference in !RT but in RT we used this to implement
|
||||
migrate_disable(). Within a migrate_disable() section the CPU mask is
|
||||
restricted to single CPU while the "normal" CPU mask remains untouched.
|
||||
|
||||
As an alternative implementation Ingo suggested to use
|
||||
struct task_struct {
|
||||
const cpumask_t *cpus_ptr;
|
||||
cpumask_t cpus_mask;
|
||||
};
|
||||
with
|
||||
t->cpus_allowed_ptr = &t->cpus_allowed;
|
||||
|
||||
In -RT we then can switch the cpus_ptr to
|
||||
t->cpus_allowed_ptr = &cpumask_of(task_cpu(p));
|
||||
|
||||
in a migration disabled region. The rules are simple:
|
||||
- Code that 'uses' ->cpus_allowed would use the pointer.
|
||||
- Code that 'modifies' ->cpus_allowed would use the direct mask.
|
||||
|
||||
While converting the existing users I tried to stick with the rules
|
||||
above however… well mostly CPUFREQ tries to temporary switch the CPU
|
||||
mask to do something on a certain CPU and then switches the mask back it
|
||||
its original value. So in theory `cpus_ptr' could or should be used.
|
||||
However if this is invoked in a migration disabled region (which is not
|
||||
the case because it would require something like preempt_disable() and
|
||||
set_cpus_allowed_ptr() might sleep so it can't be) then the "restore"
|
||||
part would restore the wrong mask. So it only looks strange and I go for
|
||||
the pointer…
|
||||
|
||||
Some drivers copy the cpumask without cpumask_copy() and others use
|
||||
cpumask_copy but without alloc_cpumask_var(). I did not fix those as
|
||||
part of this, could do this as a follow up…
|
||||
|
||||
So is this the way we want it?
|
||||
Is the usage of `cpus_ptr' vs `cpus_mask' for the set + restore part
|
||||
(see cpufreq users) what we want? At some point it looks like they
|
||||
should use a different interface for their doing. I am not sure why
|
||||
switching to certain CPU is important but maybe it could be done via a
|
||||
workqueue from the CPUFREQ core (so we have a comment desribing why are
|
||||
doing this and a get_online_cpus() to ensure that the CPU does not go
|
||||
offline too early).
|
||||
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Ingo Molnar <mingo@elte.hu>
|
||||
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/ia64/kernel/mca.c | 2 -
|
||||
arch/mips/include/asm/switch_to.h | 4 +-
|
||||
arch/mips/kernel/mips-mt-fpaff.c | 2 -
|
||||
arch/mips/kernel/traps.c | 6 ++--
|
||||
arch/powerpc/platforms/cell/spufs/sched.c | 2 -
|
||||
arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 2 -
|
||||
drivers/infiniband/hw/hfi1/affinity.c | 6 ++--
|
||||
drivers/infiniband/hw/hfi1/sdma.c | 3 --
|
||||
drivers/infiniband/hw/qib/qib_file_ops.c | 7 ++--
|
||||
fs/proc/array.c | 4 +-
|
||||
include/linux/sched.h | 5 ++-
|
||||
init/init_task.c | 3 +-
|
||||
kernel/cgroup/cpuset.c | 2 -
|
||||
kernel/fork.c | 2 +
|
||||
kernel/sched/core.c | 40 ++++++++++++++--------------
|
||||
kernel/sched/cpudeadline.c | 4 +-
|
||||
kernel/sched/cpupri.c | 4 +-
|
||||
kernel/sched/deadline.c | 6 ++--
|
||||
kernel/sched/fair.c | 32 +++++++++++-----------
|
||||
kernel/sched/rt.c | 4 +-
|
||||
kernel/trace/trace_hwlat.c | 2 -
|
||||
lib/smp_processor_id.c | 2 -
|
||||
samples/trace_events/trace-events-sample.c | 2 -
|
||||
23 files changed, 74 insertions(+), 72 deletions(-)
|
||||
|
||||
--- a/arch/ia64/kernel/mca.c
|
||||
+++ b/arch/ia64/kernel/mca.c
|
||||
@@ -1824,7 +1824,7 @@ format_mca_init_stack(void *mca_data, un
|
||||
ti->cpu = cpu;
|
||||
p->stack = ti;
|
||||
p->state = TASK_UNINTERRUPTIBLE;
|
||||
- cpumask_set_cpu(cpu, &p->cpus_allowed);
|
||||
+ cpumask_set_cpu(cpu, &p->cpus_mask);
|
||||
INIT_LIST_HEAD(&p->tasks);
|
||||
p->parent = p->real_parent = p->group_leader = p;
|
||||
INIT_LIST_HEAD(&p->children);
|
||||
--- a/arch/mips/include/asm/switch_to.h
|
||||
+++ b/arch/mips/include/asm/switch_to.h
|
||||
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
|
||||
* inline to try to keep the overhead down. If we have been forced to run on
|
||||
* a "CPU" with an FPU because of a previous high level of FP computation,
|
||||
* but did not actually use the FPU during the most recent time-slice (CU1
|
||||
- * isn't set), we undo the restriction on cpus_allowed.
|
||||
+ * isn't set), we undo the restriction on cpus_mask.
|
||||
*
|
||||
* We're not calling set_cpus_allowed() here, because we have no need to
|
||||
* force prompt migration - we're already switching the current CPU to a
|
||||
@@ -57,7 +57,7 @@ do { \
|
||||
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
|
||||
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
|
||||
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
|
||||
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
|
||||
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
|
||||
} \
|
||||
next->thread.emulated_fp = 0; \
|
||||
} while(0)
|
||||
--- a/arch/mips/kernel/mips-mt-fpaff.c
|
||||
+++ b/arch/mips/kernel/mips-mt-fpaff.c
|
||||
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffi
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
|
||||
- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
|
||||
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
|
||||
cpumask_and(&mask, &allowed, cpu_active_mask);
|
||||
|
||||
out_unlock:
|
||||
--- a/arch/mips/kernel/traps.c
|
||||
+++ b/arch/mips/kernel/traps.c
|
||||
@@ -1174,12 +1174,12 @@ static void mt_ase_fp_affinity(void)
|
||||
* restricted the allowed set to exclude any CPUs with FPUs,
|
||||
* we'll skip the procedure.
|
||||
*/
|
||||
- if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) {
|
||||
+ if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) {
|
||||
cpumask_t tmask;
|
||||
|
||||
current->thread.user_cpus_allowed
|
||||
- = current->cpus_allowed;
|
||||
- cpumask_and(&tmask, ¤t->cpus_allowed,
|
||||
+ = current->cpus_mask;
|
||||
+ cpumask_and(&tmask, ¤t->cpus_mask,
|
||||
&mt_fpu_cpumask);
|
||||
set_cpus_allowed_ptr(current, &tmask);
|
||||
set_thread_flag(TIF_FPUBOUND);
|
||||
--- a/arch/powerpc/platforms/cell/spufs/sched.c
|
||||
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
|
||||
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_
|
||||
* runqueue. The context will be rescheduled on the proper node
|
||||
* if it is timesliced or preempted.
|
||||
*/
|
||||
- cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed);
|
||||
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
|
||||
|
||||
/* Save the current cpu id for spu interrupt routing. */
|
||||
ctx->last_ran = raw_smp_processor_id();
|
||||
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
|
||||
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
|
||||
@@ -1435,7 +1435,7 @@ static int pseudo_lock_dev_mmap(struct f
|
||||
* may be scheduled elsewhere and invalidate entries in the
|
||||
* pseudo-locked region.
|
||||
*/
|
||||
- if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) {
|
||||
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
--- a/drivers/infiniband/hw/hfi1/affinity.c
|
||||
+++ b/drivers/infiniband/hw/hfi1/affinity.c
|
||||
@@ -1037,7 +1037,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
struct hfi1_affinity_node *entry;
|
||||
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
|
||||
const struct cpumask *node_mask,
|
||||
- *proc_mask = ¤t->cpus_allowed;
|
||||
+ *proc_mask = current->cpus_ptr;
|
||||
struct hfi1_affinity_node_list *affinity = &node_affinity;
|
||||
struct cpu_mask_set *set = &affinity->proc;
|
||||
|
||||
@@ -1045,7 +1045,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
* check whether process/context affinity has already
|
||||
* been set
|
||||
*/
|
||||
- if (cpumask_weight(proc_mask) == 1) {
|
||||
+ if (current->nr_cpus_allowed == 1) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
@@ -1056,7 +1056,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
cpu = cpumask_first(proc_mask);
|
||||
cpumask_set_cpu(cpu, &set->used);
|
||||
goto done;
|
||||
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
|
||||
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
--- a/drivers/infiniband/hw/hfi1/sdma.c
|
||||
+++ b/drivers/infiniband/hw/hfi1/sdma.c
|
||||
@@ -855,14 +855,13 @@ struct sdma_engine *sdma_select_user_eng
|
||||
{
|
||||
struct sdma_rht_node *rht_node;
|
||||
struct sdma_engine *sde = NULL;
|
||||
- const struct cpumask *current_mask = ¤t->cpus_allowed;
|
||||
unsigned long cpu_id;
|
||||
|
||||
/*
|
||||
* To ensure that always the same sdma engine(s) will be
|
||||
* selected make sure the process is pinned to this CPU only.
|
||||
*/
|
||||
- if (cpumask_weight(current_mask) != 1)
|
||||
+ if (current->nr_cpus_allowed != 1)
|
||||
goto out;
|
||||
|
||||
cpu_id = smp_processor_id();
|
||||
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
|
||||
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
|
||||
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp
|
||||
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
|
||||
{
|
||||
struct qib_filedata *fd = fp->private_data;
|
||||
- const unsigned int weight = cpumask_weight(¤t->cpus_allowed);
|
||||
+ const unsigned int weight = current->nr_cpus_allowed;
|
||||
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
|
||||
int local_cpu;
|
||||
|
||||
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *
|
||||
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
|
||||
else {
|
||||
int unit;
|
||||
- const unsigned int cpu = cpumask_first(¤t->cpus_allowed);
|
||||
- const unsigned int weight =
|
||||
- cpumask_weight(¤t->cpus_allowed);
|
||||
+ const unsigned int cpu = cpumask_first(current->cpus_ptr);
|
||||
+ const unsigned int weight = current->nr_cpus_allowed;
|
||||
|
||||
if (weight == 1 && !test_bit(cpu, qib_cpulist))
|
||||
if (!find_hca(cpu, &unit) && unit >= 0)
|
||||
--- a/fs/proc/array.c
|
||||
+++ b/fs/proc/array.c
|
||||
@@ -381,9 +381,9 @@ static inline void task_context_switch_c
|
||||
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
seq_printf(m, "Cpus_allowed:\t%*pb\n",
|
||||
- cpumask_pr_args(&task->cpus_allowed));
|
||||
+ cpumask_pr_args(task->cpus_ptr));
|
||||
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
|
||||
- cpumask_pr_args(&task->cpus_allowed));
|
||||
+ cpumask_pr_args(task->cpus_ptr));
|
||||
}
|
||||
|
||||
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -660,7 +660,8 @@ struct task_struct {
|
||||
|
||||
unsigned int policy;
|
||||
int nr_cpus_allowed;
|
||||
- cpumask_t cpus_allowed;
|
||||
+ const cpumask_t *cpus_ptr;
|
||||
+ cpumask_t cpus_mask;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
@@ -1390,7 +1391,7 @@ extern struct pid *cad_pid;
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
||||
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
|
||||
--- a/init/init_task.c
|
||||
+++ b/init/init_task.c
|
||||
@@ -71,7 +71,8 @@ struct task_struct init_task
|
||||
.static_prio = MAX_PRIO - 20,
|
||||
.normal_prio = MAX_PRIO - 20,
|
||||
.policy = SCHED_NORMAL,
|
||||
- .cpus_allowed = CPU_MASK_ALL,
|
||||
+ .cpus_ptr = &init_task.cpus_mask,
|
||||
+ .cpus_mask = CPU_MASK_ALL,
|
||||
.nr_cpus_allowed= NR_CPUS,
|
||||
.mm = NULL,
|
||||
.active_mm = &init_mm,
|
||||
--- a/kernel/cgroup/cpuset.c
|
||||
+++ b/kernel/cgroup/cpuset.c
|
||||
@@ -2090,7 +2090,7 @@ static void cpuset_fork(struct task_stru
|
||||
if (task_css_is_root(task, cpuset_cgrp_id))
|
||||
return;
|
||||
|
||||
- set_cpus_allowed_ptr(task, ¤t->cpus_allowed);
|
||||
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
}
|
||||
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -845,6 +845,8 @@ static struct task_struct *dup_task_stru
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
tsk->stack_canary = get_random_canary();
|
||||
#endif
|
||||
+ if (orig->cpus_ptr == &orig->cpus_mask)
|
||||
+ tsk->cpus_ptr = &tsk->cpus_mask;
|
||||
|
||||
/*
|
||||
* One for us, one for whoever does the "release_task()" (usually
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -876,7 +876,7 @@ static inline bool is_per_cpu_kthread(st
|
||||
*/
|
||||
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
|
||||
{
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return false;
|
||||
|
||||
if (is_per_cpu_kthread(p))
|
||||
@@ -971,7 +971,7 @@ static int migration_cpu_stop(void *data
|
||||
local_irq_disable();
|
||||
/*
|
||||
* We need to explicitly wake pending tasks before running
|
||||
- * __migrate_task() such that we will not miss enforcing cpus_allowed
|
||||
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
|
||||
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
||||
*/
|
||||
sched_ttwu_pending();
|
||||
@@ -1002,7 +1002,7 @@ static int migration_cpu_stop(void *data
|
||||
*/
|
||||
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
- cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
+ cpumask_copy(&p->cpus_mask, new_mask);
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
}
|
||||
|
||||
@@ -1072,7 +1072,7 @@ static int __set_cpus_allowed_ptr(struct
|
||||
goto out;
|
||||
}
|
||||
|
||||
- if (cpumask_equal(&p->cpus_allowed, new_mask))
|
||||
+ if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
|
||||
@@ -1235,10 +1235,10 @@ static int migrate_swap_stop(void *data)
|
||||
if (task_cpu(arg->src_task) != arg->src_cpu)
|
||||
goto unlock;
|
||||
|
||||
- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
__migrate_swap_task(arg->src_task, arg->dst_cpu);
|
||||
@@ -1280,10 +1280,10 @@ int migrate_swap(struct task_struct *cur
|
||||
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
|
||||
goto out;
|
||||
|
||||
- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
|
||||
@@ -1428,7 +1428,7 @@ void kick_process(struct task_struct *p)
|
||||
EXPORT_SYMBOL_GPL(kick_process);
|
||||
|
||||
/*
|
||||
- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
|
||||
+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
|
||||
*
|
||||
* A few notes on cpu_active vs cpu_online:
|
||||
*
|
||||
@@ -1468,14 +1468,14 @@ static int select_fallback_rq(int cpu, s
|
||||
for_each_cpu(dest_cpu, nodemask) {
|
||||
if (!cpu_active(dest_cpu))
|
||||
continue;
|
||||
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
|
||||
return dest_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/* Any allowed, online CPU? */
|
||||
- for_each_cpu(dest_cpu, &p->cpus_allowed) {
|
||||
+ for_each_cpu(dest_cpu, p->cpus_ptr) {
|
||||
if (!is_cpu_allowed(p, dest_cpu))
|
||||
continue;
|
||||
|
||||
@@ -1519,7 +1519,7 @@ static int select_fallback_rq(int cpu, s
|
||||
}
|
||||
|
||||
/*
|
||||
- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
|
||||
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
@@ -1529,11 +1529,11 @@ int select_task_rq(struct task_struct *p
|
||||
if (p->nr_cpus_allowed > 1)
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
||||
else
|
||||
- cpu = cpumask_any(&p->cpus_allowed);
|
||||
+ cpu = cpumask_any(p->cpus_ptr);
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
- * to rely on ttwu() to place the task on a valid ->cpus_allowed
|
||||
+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
|
||||
* CPU.
|
||||
*
|
||||
* Since this is common to all placement strategies, this lives here.
|
||||
@@ -2400,7 +2400,7 @@ void wake_up_new_task(struct task_struct
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Fork balancing, do it here and not earlier because:
|
||||
- * - cpus_allowed can change in the fork path
|
||||
+ * - cpus_ptr can change in the fork path
|
||||
* - any previously selected CPU might disappear through hotplug
|
||||
*
|
||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||
@@ -4273,7 +4273,7 @@ static int __sched_setscheduler(struct t
|
||||
* the entire root_domain to become SCHED_DEADLINE. We
|
||||
* will also fail if there's no bandwidth available.
|
||||
*/
|
||||
- if (!cpumask_subset(span, &p->cpus_allowed) ||
|
||||
+ if (!cpumask_subset(span, p->cpus_ptr) ||
|
||||
rq->rd->dl_bw.bw == 0) {
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
return -EPERM;
|
||||
@@ -4872,7 +4872,7 @@ long sched_getaffinity(pid_t pid, struct
|
||||
goto out_unlock;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
|
||||
+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
@@ -5452,7 +5452,7 @@ int task_can_attach(struct task_struct *
|
||||
* allowed nodes is unnecessary. Thus, cpusets are not
|
||||
* applicable for such threads. This prevents checking for
|
||||
* success of set_cpus_allowed_ptr() on all attached tasks
|
||||
- * before cpus_allowed may be changed.
|
||||
+ * before cpus_mask may be changed.
|
||||
*/
|
||||
if (p->flags & PF_NO_SETAFFINITY) {
|
||||
ret = -EINVAL;
|
||||
@@ -5479,7 +5479,7 @@ int migrate_task_to(struct task_struct *
|
||||
if (curr_cpu == target_cpu)
|
||||
return 0;
|
||||
|
||||
- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: This is not properly updating schedstats */
|
||||
@@ -5617,7 +5617,7 @@ static void migrate_tasks(struct rq *dea
|
||||
put_prev_task(rq, next);
|
||||
|
||||
/*
|
||||
- * Rules for changing task_struct::cpus_allowed are holding
|
||||
+ * Rules for changing task_struct::cpus_mask are holding
|
||||
* both pi_lock and rq->lock, such that holding either
|
||||
* stabilizes the mask.
|
||||
*
|
||||
--- a/kernel/sched/cpudeadline.c
|
||||
+++ b/kernel/sched/cpudeadline.c
|
||||
@@ -124,14 +124,14 @@ int cpudl_find(struct cpudl *cp, struct
|
||||
const struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
if (later_mask &&
|
||||
- cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
|
||||
+ cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
|
||||
return 1;
|
||||
} else {
|
||||
int best_cpu = cpudl_maximum(cp);
|
||||
|
||||
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
|
||||
|
||||
- if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
|
||||
+ if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
|
||||
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
||||
if (later_mask)
|
||||
cpumask_set_cpu(best_cpu, later_mask);
|
||||
--- a/kernel/sched/cpupri.c
|
||||
+++ b/kernel/sched/cpupri.c
|
||||
@@ -98,11 +98,11 @@ int cpupri_find(struct cpupri *cp, struc
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||
+ if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
|
||||
if (lowest_mask) {
|
||||
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
+ cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
|
||||
|
||||
/*
|
||||
* We have to ensure that we have at least one bit
|
||||
--- a/kernel/sched/deadline.c
|
||||
+++ b/kernel/sched/deadline.c
|
||||
@@ -539,7 +539,7 @@ static struct rq *dl_task_offline_migrat
|
||||
* If we cannot preempt any rq, fall back to pick any
|
||||
* online CPU:
|
||||
*/
|
||||
- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
|
||||
+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
|
||||
if (cpu >= nr_cpu_ids) {
|
||||
/*
|
||||
* Failed to find any suitable CPU.
|
||||
@@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *
|
||||
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
- cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(str
|
||||
/* Retry if something changed. */
|
||||
if (double_lock_balance(rq, later_rq)) {
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
|
||||
+ !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -1630,7 +1630,7 @@ static void task_numa_compare(struct tas
|
||||
* be incurred if the tasks were swapped.
|
||||
*/
|
||||
/* Skip this swap candidate if cannot move to the source cpu */
|
||||
- if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
@@ -1727,7 +1727,7 @@ static void task_numa_find_cpu(struct ta
|
||||
|
||||
for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
|
||||
/* Skip this CPU if the source task cannot migrate */
|
||||
- if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
|
||||
continue;
|
||||
|
||||
env->dst_cpu = cpu;
|
||||
@@ -5712,7 +5712,7 @@ find_idlest_group(struct sched_domain *s
|
||||
|
||||
/* Skip over this group if it has no CPUs allowed */
|
||||
if (!cpumask_intersects(sched_group_span(group),
|
||||
- &p->cpus_allowed))
|
||||
+ p->cpus_ptr))
|
||||
continue;
|
||||
|
||||
local_group = cpumask_test_cpu(this_cpu,
|
||||
@@ -5844,7 +5844,7 @@ find_idlest_group_cpu(struct sched_group
|
||||
return cpumask_first(sched_group_span(group));
|
||||
|
||||
/* Traverse only the allowed CPUs */
|
||||
- for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
|
||||
+ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
|
||||
if (available_idle_cpu(i)) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct cpuidle_state *idle = idle_get_state(rq);
|
||||
@@ -5884,7 +5884,7 @@ static inline int find_idlest_cpu(struct
|
||||
{
|
||||
int new_cpu = cpu;
|
||||
|
||||
- if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
|
||||
+ if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
|
||||
return prev_cpu;
|
||||
|
||||
/*
|
||||
@@ -6001,7 +6001,7 @@ static int select_idle_core(struct task_
|
||||
if (!test_idle_cores(target, false))
|
||||
return -1;
|
||||
|
||||
- cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
|
||||
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
for_each_cpu_wrap(core, cpus, target) {
|
||||
bool idle = true;
|
||||
@@ -6035,7 +6035,7 @@ static int select_idle_smt(struct task_s
|
||||
return -1;
|
||||
|
||||
for_each_cpu(cpu, cpu_smt_mask(target)) {
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
return cpu;
|
||||
@@ -6098,7 +6098,7 @@ static int select_idle_cpu(struct task_s
|
||||
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
|
||||
if (!--nr)
|
||||
return -1;
|
||||
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
break;
|
||||
@@ -6135,7 +6135,7 @@ static int select_idle_sibling(struct ta
|
||||
recent_used_cpu != target &&
|
||||
cpus_share_cache(recent_used_cpu, target) &&
|
||||
available_idle_cpu(recent_used_cpu) &&
|
||||
- cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
|
||||
+ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
|
||||
/*
|
||||
* Replace recent_used_cpu with prev as it is a potential
|
||||
* candidate for the next wake:
|
||||
@@ -6353,7 +6353,7 @@ select_task_rq_fair(struct task_struct *
|
||||
if (sd_flag & SD_BALANCE_WAKE) {
|
||||
record_wakee(p);
|
||||
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
|
||||
- && cpumask_test_cpu(cpu, &p->cpus_allowed);
|
||||
+ && cpumask_test_cpu(cpu, p->cpus_ptr);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -7092,14 +7092,14 @@ int can_migrate_task(struct task_struct
|
||||
/*
|
||||
* We do not migrate tasks that are:
|
||||
* 1) throttled_lb_pair, or
|
||||
- * 2) cannot be migrated to this CPU due to cpus_allowed, or
|
||||
+ * 2) cannot be migrated to this CPU due to cpus_ptr, or
|
||||
* 3) running (obviously), or
|
||||
* 4) are cache-hot on their current CPU.
|
||||
*/
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
return 0;
|
||||
|
||||
- if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
|
||||
+ if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
|
||||
int cpu;
|
||||
|
||||
schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
|
||||
@@ -7119,7 +7119,7 @@ int can_migrate_task(struct task_struct
|
||||
|
||||
/* Prevent to re-select dst_cpu via env's CPUs: */
|
||||
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
|
||||
- if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
|
||||
+ if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
|
||||
env->flags |= LBF_DST_PINNED;
|
||||
env->new_dst_cpu = cpu;
|
||||
break;
|
||||
@@ -7716,7 +7716,7 @@ check_cpu_capacity(struct rq *rq, struct
|
||||
|
||||
/*
|
||||
* Group imbalance indicates (and tries to solve) the problem where balancing
|
||||
- * groups is inadequate due to ->cpus_allowed constraints.
|
||||
+ * groups is inadequate due to ->cpus_ptr constraints.
|
||||
*
|
||||
* Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
|
||||
* cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
|
||||
@@ -8331,7 +8331,7 @@ static struct sched_group *find_busiest_
|
||||
/*
|
||||
* If the busiest group is imbalanced the below checks don't
|
||||
* work because they assume all things are equal, which typically
|
||||
- * isn't true due to cpus_allowed constraints and the like.
|
||||
+ * isn't true due to cpus_ptr constraints and the like.
|
||||
*/
|
||||
if (busiest->group_type == group_imbalanced)
|
||||
goto force_balance;
|
||||
@@ -8727,7 +8727,7 @@ static int load_balance(int this_cpu, st
|
||||
* if the curr task on busiest CPU can't be
|
||||
* moved to this_cpu:
|
||||
*/
|
||||
- if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
|
||||
+ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
|
||||
raw_spin_unlock_irqrestore(&busiest->lock,
|
||||
flags);
|
||||
env.flags |= LBF_ALL_PINNED;
|
||||
--- a/kernel/sched/rt.c
|
||||
+++ b/kernel/sched/rt.c
|
||||
@@ -1611,7 +1611,7 @@ static void put_prev_task_rt(struct rq *
|
||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
- cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
+ cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@@ -1748,7 +1748,7 @@ static struct rq *find_lock_lowest_rq(st
|
||||
* Also make sure that it wasn't scheduled on its rq.
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
|
||||
+ !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
--- a/kernel/trace/trace_hwlat.c
|
||||
+++ b/kernel/trace/trace_hwlat.c
|
||||
@@ -277,7 +277,7 @@ static void move_to_next_cpu(void)
|
||||
* of this thread, than stop migrating for the duration
|
||||
* of the current test.
|
||||
*/
|
||||
- if (!cpumask_equal(current_mask, ¤t->cpus_allowed))
|
||||
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
|
||||
goto disable;
|
||||
|
||||
get_online_cpus();
|
||||
--- a/lib/smp_processor_id.c
|
||||
+++ b/lib/smp_processor_id.c
|
||||
@@ -22,7 +22,7 @@ notrace static unsigned int check_preemp
|
||||
* Kernel threads bound to a single CPU can safely use
|
||||
* smp_processor_id():
|
||||
*/
|
||||
- if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu)))
|
||||
+ if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
--- a/samples/trace_events/trace-events-sample.c
|
||||
+++ b/samples/trace_events/trace-events-sample.c
|
||||
@@ -33,7 +33,7 @@ static void simple_thread_func(int cnt)
|
||||
|
||||
/* Silly tracepoints */
|
||||
trace_foo_bar("hello", cnt, array, random_strings[len],
|
||||
- ¤t->cpus_allowed);
|
||||
+ current->cpus_ptr);
|
||||
|
||||
trace_foo_with_template_simple("HELLO", cnt);
|
||||
|
||||
251
kernel/patches-4.19.x-rt/0029-add_migrate_disable.patch
Normal file
251
kernel/patches-4.19.x-rt/0029-add_migrate_disable.patch
Normal file
@@ -0,0 +1,251 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 27 May 2017 19:02:06 +0200
|
||||
Subject: kernel/sched/core: add migrate_disable()
|
||||
|
||||
---
|
||||
include/linux/preempt.h | 23 ++++++++
|
||||
include/linux/sched.h | 7 ++
|
||||
include/linux/smp.h | 3 +
|
||||
kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
kernel/sched/debug.c | 4 +
|
||||
5 files changed, 165 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -185,6 +185,22 @@ do { \
|
||||
|
||||
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
|
||||
+#ifdef CONFIG_SMP
|
||||
+
|
||||
+extern void migrate_disable(void);
|
||||
+extern void migrate_enable(void);
|
||||
+
|
||||
+int __migrate_disabled(struct task_struct *p);
|
||||
+
|
||||
+#else
|
||||
+#define migrate_disable() barrier()
|
||||
+#define migrate_enable() barrier()
|
||||
+static inline int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PREEMPT
|
||||
#define preempt_enable() \
|
||||
do { \
|
||||
@@ -253,6 +269,13 @@ do { \
|
||||
#define preempt_enable_notrace() barrier()
|
||||
#define preemptible() 0
|
||||
|
||||
+#define migrate_disable() barrier()
|
||||
+#define migrate_enable() barrier()
|
||||
+
|
||||
+static inline int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
#endif /* CONFIG_PREEMPT_COUNT */
|
||||
|
||||
#ifdef MODULE
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -662,6 +662,13 @@ struct task_struct {
|
||||
int nr_cpus_allowed;
|
||||
const cpumask_t *cpus_ptr;
|
||||
cpumask_t cpus_mask;
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ int migrate_disable;
|
||||
+ int migrate_disable_update;
|
||||
+# ifdef CONFIG_SCHED_DEBUG
|
||||
+ int migrate_disable_atomic;
|
||||
+# endif
|
||||
+#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
--- a/include/linux/smp.h
|
||||
+++ b/include/linux/smp.h
|
||||
@@ -202,6 +202,9 @@ static inline int get_boot_cpu_id(void)
|
||||
#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
|
||||
#define put_cpu() preempt_enable()
|
||||
|
||||
+#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
|
||||
+#define put_cpu_light() migrate_enable()
|
||||
+
|
||||
/*
|
||||
* Callback to arch code if there's nosmp or maxcpus=0 on the
|
||||
* boot command line:
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1006,7 +1006,15 @@ void set_cpus_allowed_common(struct task
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
}
|
||||
|
||||
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+int __migrate_disabled(struct task_struct *p)
|
||||
+{
|
||||
+ return p->migrate_disable;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void __do_set_cpus_allowed_tail(struct task_struct *p,
|
||||
+ const struct cpumask *new_mask)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
bool queued, running;
|
||||
@@ -1035,6 +1043,20 @@ void do_set_cpus_allowed(struct task_str
|
||||
set_curr_task(rq, p);
|
||||
}
|
||||
|
||||
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
+{
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ if (__migrate_disabled(p)) {
|
||||
+ lockdep_assert_held(&p->pi_lock);
|
||||
+
|
||||
+ cpumask_copy(&p->cpus_mask, new_mask);
|
||||
+ p->migrate_disable_update = 1;
|
||||
+ return;
|
||||
+ }
|
||||
+#endif
|
||||
+ __do_set_cpus_allowed_tail(p, new_mask);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Change a given task's CPU affinity. Migrate the thread to a
|
||||
* proper CPU and schedule it away if the CPU it's executing on
|
||||
@@ -1093,9 +1115,16 @@ static int __set_cpus_allowed_ptr(struct
|
||||
}
|
||||
|
||||
/* Can the task run on the task's current CPU? If so, we're done */
|
||||
- if (cpumask_test_cpu(task_cpu(p), new_mask))
|
||||
+ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
|
||||
goto out;
|
||||
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ if (__migrate_disabled(p)) {
|
||||
+ p->migrate_disable_update = 1;
|
||||
+ goto out;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
|
||||
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
||||
struct migration_arg arg = { p, dest_cpu };
|
||||
@@ -7058,3 +7087,100 @@ const u32 sched_prio_to_wmult[40] = {
|
||||
};
|
||||
|
||||
#undef CREATE_TRACE_POINTS
|
||||
+
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+
|
||||
+void migrate_disable(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+
|
||||
+ if (in_atomic() || irqs_disabled()) {
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ p->migrate_disable_atomic++;
|
||||
+#endif
|
||||
+ return;
|
||||
+ }
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+#endif
|
||||
+
|
||||
+ if (p->migrate_disable) {
|
||||
+ p->migrate_disable++;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ p->migrate_disable = 1;
|
||||
+
|
||||
+ p->cpus_ptr = cpumask_of(smp_processor_id());
|
||||
+ p->nr_cpus_allowed = 1;
|
||||
+
|
||||
+ preempt_enable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(migrate_disable);
|
||||
+
|
||||
+void migrate_enable(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+
|
||||
+ if (in_atomic() || irqs_disabled()) {
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ p->migrate_disable_atomic--;
|
||||
+#endif
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+ WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+#endif
|
||||
+
|
||||
+ WARN_ON_ONCE(p->migrate_disable <= 0);
|
||||
+ if (p->migrate_disable > 1) {
|
||||
+ p->migrate_disable--;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ preempt_disable();
|
||||
+
|
||||
+ p->cpus_ptr = &p->cpus_mask;
|
||||
+ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
|
||||
+ p->migrate_disable = 0;
|
||||
+
|
||||
+ if (p->migrate_disable_update) {
|
||||
+ struct rq *rq;
|
||||
+ struct rq_flags rf;
|
||||
+
|
||||
+ rq = task_rq_lock(p, &rf);
|
||||
+ update_rq_clock(rq);
|
||||
+
|
||||
+ __do_set_cpus_allowed_tail(p, &p->cpus_mask);
|
||||
+ task_rq_unlock(rq, p, &rf);
|
||||
+
|
||||
+ p->migrate_disable_update = 0;
|
||||
+
|
||||
+ WARN_ON(smp_processor_id() != task_cpu(p));
|
||||
+ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
|
||||
+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
|
||||
+ struct migration_arg arg;
|
||||
+ unsigned int dest_cpu;
|
||||
+
|
||||
+ if (p->flags & PF_KTHREAD) {
|
||||
+ /*
|
||||
+ * Kernel threads are allowed on online && !active CPUs
|
||||
+ */
|
||||
+ cpu_valid_mask = cpu_online_mask;
|
||||
+ }
|
||||
+ dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_mask);
|
||||
+ arg.task = p;
|
||||
+ arg.dest_cpu = dest_cpu;
|
||||
+
|
||||
+ preempt_enable();
|
||||
+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
|
||||
+ tlb_migrate_finish(p->mm);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(migrate_enable);
|
||||
+#endif
|
||||
--- a/kernel/sched/debug.c
|
||||
+++ b/kernel/sched/debug.c
|
||||
@@ -978,6 +978,10 @@ void proc_sched_show_task(struct task_st
|
||||
P(dl.runtime);
|
||||
P(dl.deadline);
|
||||
}
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
|
||||
+ P(migrate_disable);
|
||||
+#endif
|
||||
+ P(nr_cpus_allowed);
|
||||
#undef PN_SCHEDSTAT
|
||||
#undef PN
|
||||
#undef __PN
|
||||
@@ -0,0 +1,31 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Oct 2018 17:34:50 +0200
|
||||
Subject: [PATCH] sched/migrate_disable: Add export_symbol_gpl for
|
||||
__migrate_disabled
|
||||
|
||||
Jonathan reported that lttng/modules can't use __migrate_disabled().
|
||||
This function is only used by sched/core itself and the tracing
|
||||
infrastructure to report the migrate counter (lttng does probably the
|
||||
same). Since the rework migrate_disable() it moved from sched.h to
|
||||
preempt.h and is became an exported function instead of a "static
|
||||
inline" due to the header recursion of preempt vs sched.
|
||||
|
||||
Since the compiler inlines the function for sched/core usage, add a
|
||||
EXPORT_SYMBOL_GPL to allow the module/LTTNG usage.
|
||||
|
||||
Reported-by: Jonathan Rajott <jonathan.rajotte-julien@efficios.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/core.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -1011,6 +1011,7 @@ int __migrate_disabled(struct task_struc
|
||||
{
|
||||
return p->migrate_disable;
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(__migrate_disabled);
|
||||
#endif
|
||||
|
||||
static void __do_set_cpus_allowed_tail(struct task_struct *p,
|
||||
@@ -0,0 +1,91 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 09 Mar 2016 10:51:06 +0100
|
||||
Subject: arm: at91: do not disable/enable clocks in a row
|
||||
|
||||
Currently the driver will disable the clock and enable it one line later
|
||||
if it is switching from periodic mode into one shot.
|
||||
This can be avoided and causes a needless warning on -RT.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 33 +++++++++++++++++++++++++++++----
|
||||
1 file changed, 29 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -126,6 +126,7 @@ static struct clocksource clksrc = {
|
||||
struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
+ bool clk_enabled;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -143,6 +144,24 @@ static struct tc_clkevt_device *to_tc_cl
|
||||
*/
|
||||
static u32 timer_clock;
|
||||
|
||||
+static void tc_clk_disable(struct clock_event_device *d)
|
||||
+{
|
||||
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
+
|
||||
+ clk_disable(tcd->clk);
|
||||
+ tcd->clk_enabled = false;
|
||||
+}
|
||||
+
|
||||
+static void tc_clk_enable(struct clock_event_device *d)
|
||||
+{
|
||||
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
+
|
||||
+ if (tcd->clk_enabled)
|
||||
+ return;
|
||||
+ clk_enable(tcd->clk);
|
||||
+ tcd->clk_enabled = true;
|
||||
+}
|
||||
+
|
||||
static int tc_shutdown(struct clock_event_device *d)
|
||||
{
|
||||
struct tc_clkevt_device *tcd = to_tc_clkevt(d);
|
||||
@@ -150,8 +169,14 @@ static int tc_shutdown(struct clock_even
|
||||
|
||||
writel(0xff, regs + ATMEL_TC_REG(2, IDR));
|
||||
writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int tc_shutdown_clk_off(struct clock_event_device *d)
|
||||
+{
|
||||
+ tc_shutdown(d);
|
||||
if (!clockevent_state_detached(d))
|
||||
- clk_disable(tcd->clk);
|
||||
+ tc_clk_disable(d);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -164,7 +189,7 @@ static int tc_set_oneshot(struct clock_e
|
||||
if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
|
||||
tc_shutdown(d);
|
||||
|
||||
- clk_enable(tcd->clk);
|
||||
+ tc_clk_enable(d);
|
||||
|
||||
/* slow clock, count up to RC, then irq and stop */
|
||||
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
|
||||
@@ -186,7 +211,7 @@ static int tc_set_periodic(struct clock_
|
||||
/* By not making the gentime core emulate periodic mode on top
|
||||
* of oneshot, we get lower overhead and improved accuracy.
|
||||
*/
|
||||
- clk_enable(tcd->clk);
|
||||
+ tc_clk_enable(d);
|
||||
|
||||
/* slow clock, count up to RC, then irq and restart */
|
||||
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
@@ -220,7 +245,7 @@ static struct tc_clkevt_device clkevt =
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
.rating = 125,
|
||||
.set_next_event = tc_next_event,
|
||||
- .set_state_shutdown = tc_shutdown,
|
||||
+ .set_state_shutdown = tc_shutdown_clk_off,
|
||||
.set_state_periodic = tc_set_periodic,
|
||||
.set_state_oneshot = tc_set_oneshot,
|
||||
},
|
||||
@@ -0,0 +1,157 @@
|
||||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Mon, 8 Mar 2010 18:57:04 +0100
|
||||
Subject: clocksource: TCLIB: Allow higher clock rates for clock events
|
||||
|
||||
As default the TCLIB uses the 32KiHz base clock rate for clock events.
|
||||
Add a compile time selection to allow higher clock resulution.
|
||||
|
||||
(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>)
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 36 +++++++++++++++++++++---------------
|
||||
drivers/misc/Kconfig | 12 ++++++++++--
|
||||
2 files changed, 31 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -25,8 +25,7 @@
|
||||
* this 32 bit free-running counter. the second channel is not used.
|
||||
*
|
||||
* - The third channel may be used to provide a 16-bit clockevent
|
||||
- * source, used in either periodic or oneshot mode. This runs
|
||||
- * at 32 KiHZ, and can handle delays of up to two seconds.
|
||||
+ * source, used in either periodic or oneshot mode.
|
||||
*
|
||||
* A boot clocksource and clockevent source are also currently needed,
|
||||
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
|
||||
@@ -127,6 +126,7 @@ struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
bool clk_enabled;
|
||||
+ u32 freq;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -135,13 +135,6 @@ static struct tc_clkevt_device *to_tc_cl
|
||||
return container_of(clkevt, struct tc_clkevt_device, clkevt);
|
||||
}
|
||||
|
||||
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
- * because using one of the divided clocks would usually mean the
|
||||
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
- *
|
||||
- * A divided clock could be good for high resolution timers, since
|
||||
- * 30.5 usec resolution can seem "low".
|
||||
- */
|
||||
static u32 timer_clock;
|
||||
|
||||
static void tc_clk_disable(struct clock_event_device *d)
|
||||
@@ -191,7 +184,7 @@ static int tc_set_oneshot(struct clock_e
|
||||
|
||||
tc_clk_enable(d);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and stop */
|
||||
+ /* count up to RC, then irq and stop */
|
||||
writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
|
||||
ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
|
||||
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -213,10 +206,10 @@ static int tc_set_periodic(struct clock_
|
||||
*/
|
||||
tc_clk_enable(d);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and restart */
|
||||
+ /* count up to RC, then irq and restart */
|
||||
writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
- writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
+ writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
|
||||
/* Enable clock and interrupts on RC compare */
|
||||
writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -243,7 +236,11 @@ static struct tc_clkevt_device clkevt =
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC |
|
||||
CLOCK_EVT_FEAT_ONESHOT,
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
.rating = 125,
|
||||
+#else
|
||||
+ .rating = 200,
|
||||
+#endif
|
||||
.set_next_event = tc_next_event,
|
||||
.set_state_shutdown = tc_shutdown_clk_off,
|
||||
.set_state_periodic = tc_set_periodic,
|
||||
@@ -265,8 +262,9 @@ static irqreturn_t ch2_irq(int irq, void
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
|
||||
{
|
||||
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
|
||||
int ret;
|
||||
struct clk *t2_clk = tc->clk[2];
|
||||
int irq = tc->irq[2];
|
||||
@@ -287,7 +285,11 @@ static int __init setup_clkevents(struct
|
||||
clkevt.regs = tc->regs;
|
||||
clkevt.clk = t2_clk;
|
||||
|
||||
- timer_clock = clk32k_divisor_idx;
|
||||
+ timer_clock = divisor_idx;
|
||||
+ if (!divisor)
|
||||
+ clkevt.freq = 32768;
|
||||
+ else
|
||||
+ clkevt.freq = clk_get_rate(t2_clk) / divisor;
|
||||
|
||||
clkevt.clkevt.cpumask = cpumask_of(0);
|
||||
|
||||
@@ -298,7 +300,7 @@ static int __init setup_clkevents(struct
|
||||
return ret;
|
||||
}
|
||||
|
||||
- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
|
||||
+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -435,7 +437,11 @@ static int __init tcb_clksrc_init(void)
|
||||
goto err_disable_t1;
|
||||
|
||||
/* channel 2: periodic and oneshot timer support */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
ret = setup_clkevents(tc, clk32k_divisor_idx);
|
||||
+#else
|
||||
+ ret = setup_clkevents(tc, best_divisor_idx);
|
||||
+#endif
|
||||
if (ret)
|
||||
goto err_unregister_clksrc;
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -69,8 +69,7 @@ config ATMEL_TCB_CLKSRC
|
||||
are combined to make a single 32-bit timer.
|
||||
|
||||
When GENERIC_CLOCKEVENTS is defined, the third timer channel
|
||||
- may be used as a clock event device supporting oneshot mode
|
||||
- (delays of up to two seconds) based on the 32 KiHz clock.
|
||||
+ may be used as a clock event device supporting oneshot mode.
|
||||
|
||||
config ATMEL_TCB_CLKSRC_BLOCK
|
||||
int
|
||||
@@ -83,6 +82,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
TC can be used for other purposes, such as PWM generation and
|
||||
interval timing.
|
||||
|
||||
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
+ bool "TC Block use 32 KiHz clock"
|
||||
+ depends on ATMEL_TCB_CLKSRC
|
||||
+ default y
|
||||
+ help
|
||||
+ Select this to use 32 KiHz base clock rate as TC block clock
|
||||
+ source for clock events.
|
||||
+
|
||||
+
|
||||
config DUMMY_IRQ
|
||||
tristate "Dummy IRQ handler"
|
||||
default n
|
||||
@@ -0,0 +1,156 @@
|
||||
Subject: timekeeping: Split jiffies seqlock
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 14 Feb 2013 22:36:59 +0100
|
||||
|
||||
Replace jiffies_lock seqlock with a simple seqcounter and a rawlock so
|
||||
it can be taken in atomic context on RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/time/jiffies.c | 7 ++++---
|
||||
kernel/time/tick-common.c | 10 ++++++----
|
||||
kernel/time/tick-sched.c | 19 ++++++++++++-------
|
||||
kernel/time/timekeeping.c | 6 ++++--
|
||||
kernel/time/timekeeping.h | 3 ++-
|
||||
5 files changed, 28 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/kernel/time/jiffies.c
|
||||
+++ b/kernel/time/jiffies.c
|
||||
@@ -74,7 +74,8 @@ static struct clocksource clocksource_ji
|
||||
.max_cycles = 10,
|
||||
};
|
||||
|
||||
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp seqcount_t jiffies_seq;
|
||||
|
||||
#if (BITS_PER_LONG < 64)
|
||||
u64 get_jiffies_64(void)
|
||||
@@ -83,9 +84,9 @@ u64 get_jiffies_64(void)
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
ret = jiffies_64;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(get_jiffies_64);
|
||||
--- a/kernel/time/tick-common.c
|
||||
+++ b/kernel/time/tick-common.c
|
||||
@@ -79,13 +79,15 @@ int tick_is_oneshot_available(void)
|
||||
static void tick_periodic(int cpu)
|
||||
{
|
||||
if (tick_do_timer_cpu == cpu) {
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
/* Keep track of the next tick event */
|
||||
tick_next_period = ktime_add(tick_next_period, tick_period);
|
||||
|
||||
do_timer(1);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_ev
|
||||
ktime_t next;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
next = tick_next_period;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
|
||||
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
|
||||
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -67,7 +67,8 @@ static void tick_do_update_jiffies64(kti
|
||||
return;
|
||||
|
||||
/* Reevaluate with jiffies_lock held */
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
delta = ktime_sub(now, last_jiffies_update);
|
||||
if (delta >= tick_period) {
|
||||
@@ -90,10 +91,12 @@ static void tick_do_update_jiffies64(kti
|
||||
/* Keep the tick_next_period variable up to date */
|
||||
tick_next_period = ktime_add(last_jiffies_update, tick_period);
|
||||
} else {
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return;
|
||||
}
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -104,12 +107,14 @@ static ktime_t tick_init_jiffy_update(vo
|
||||
{
|
||||
ktime_t period;
|
||||
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
/* Did we start the jiffies update yet ? */
|
||||
if (last_jiffies_update == 0)
|
||||
last_jiffies_update = tick_next_period;
|
||||
period = last_jiffies_update;
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return period;
|
||||
}
|
||||
|
||||
@@ -652,10 +657,10 @@ static ktime_t tick_nohz_next_event(stru
|
||||
|
||||
/* Read jiffies and the time when jiffies were updated last */
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
basemono = last_jiffies_update;
|
||||
basejiff = jiffies;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
ts->last_jiffies = basejiff;
|
||||
ts->timer_expires_base = basemono;
|
||||
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -2417,8 +2417,10 @@ EXPORT_SYMBOL(hardpps);
|
||||
*/
|
||||
void xtime_update(unsigned long ticks)
|
||||
{
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
do_timer(ticks);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
--- a/kernel/time/timekeeping.h
|
||||
+++ b/kernel/time/timekeeping.h
|
||||
@@ -18,7 +18,8 @@ extern void timekeeping_resume(void);
|
||||
extern void do_timer(unsigned long ticks);
|
||||
extern void update_wall_time(void);
|
||||
|
||||
-extern seqlock_t jiffies_lock;
|
||||
+extern raw_spinlock_t jiffies_lock;
|
||||
+extern seqcount_t jiffies_seq;
|
||||
|
||||
#define CS_NAME_LEN 32
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
Subject: signal: Revert ptrace preempt magic
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 21 Sep 2011 19:57:12 +0200
|
||||
|
||||
Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more
|
||||
than a bandaid around the ptrace design trainwreck. It's not a
|
||||
correctness issue, it's merily a cosmetic bandaid.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/signal.c | 8 --------
|
||||
1 file changed, 8 deletions(-)
|
||||
|
||||
--- a/kernel/signal.c
|
||||
+++ b/kernel/signal.c
|
||||
@@ -2094,15 +2094,7 @@ static void ptrace_stop(int exit_code, i
|
||||
if (gstop_done && ptrace_reparented(current))
|
||||
do_notify_parent_cldstop(current, false, why);
|
||||
|
||||
- /*
|
||||
- * Don't want to allow preemption here, because
|
||||
- * sys_ptrace() needs this task to be inactive.
|
||||
- *
|
||||
- * XXX: implement read_unlock_no_resched().
|
||||
- */
|
||||
- preempt_disable();
|
||||
read_unlock(&tasklist_lock);
|
||||
- preempt_enable_no_resched();
|
||||
freezable_schedule();
|
||||
} else {
|
||||
/*
|
||||
@@ -0,0 +1,57 @@
|
||||
From: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Date: Wed, 5 Mar 2014 00:49:47 +0100
|
||||
Subject: net: sched: Use msleep() instead of yield()
|
||||
|
||||
On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50
|
||||
(by default). If a high priority userspace process tries to shut down a busy
|
||||
network interface it might spin in a yield loop waiting for the device to
|
||||
become idle. With the interrupt thread having a lower priority than the
|
||||
looping process it might never be scheduled and so result in a deadlock on UP
|
||||
systems.
|
||||
|
||||
With Magic SysRq the following backtrace can be produced:
|
||||
|
||||
> test_app R running 0 174 168 0x00000000
|
||||
> [<c02c7070>] (__schedule+0x220/0x3fc) from [<c02c7870>] (preempt_schedule_irq+0x48/0x80)
|
||||
> [<c02c7870>] (preempt_schedule_irq+0x48/0x80) from [<c0008fa8>] (svc_preempt+0x8/0x20)
|
||||
> [<c0008fa8>] (svc_preempt+0x8/0x20) from [<c001a984>] (local_bh_enable+0x18/0x88)
|
||||
> [<c001a984>] (local_bh_enable+0x18/0x88) from [<c025316c>] (dev_deactivate_many+0x220/0x264)
|
||||
> [<c025316c>] (dev_deactivate_many+0x220/0x264) from [<c023be04>] (__dev_close_many+0x64/0xd4)
|
||||
> [<c023be04>] (__dev_close_many+0x64/0xd4) from [<c023be9c>] (__dev_close+0x28/0x3c)
|
||||
> [<c023be9c>] (__dev_close+0x28/0x3c) from [<c023f7f0>] (__dev_change_flags+0x88/0x130)
|
||||
> [<c023f7f0>] (__dev_change_flags+0x88/0x130) from [<c023f904>] (dev_change_flags+0x10/0x48)
|
||||
> [<c023f904>] (dev_change_flags+0x10/0x48) from [<c024c140>] (do_setlink+0x370/0x7ec)
|
||||
> [<c024c140>] (do_setlink+0x370/0x7ec) from [<c024d2f0>] (rtnl_newlink+0x2b4/0x450)
|
||||
> [<c024d2f0>] (rtnl_newlink+0x2b4/0x450) from [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4)
|
||||
> [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4) from [<c0256740>] (netlink_rcv_skb+0xac/0xc0)
|
||||
> [<c0256740>] (netlink_rcv_skb+0xac/0xc0) from [<c024bbd8>] (rtnetlink_rcv+0x18/0x24)
|
||||
> [<c024bbd8>] (rtnetlink_rcv+0x18/0x24) from [<c02561b8>] (netlink_unicast+0x13c/0x198)
|
||||
> [<c02561b8>] (netlink_unicast+0x13c/0x198) from [<c025651c>] (netlink_sendmsg+0x264/0x2e0)
|
||||
> [<c025651c>] (netlink_sendmsg+0x264/0x2e0) from [<c022af98>] (sock_sendmsg+0x78/0x98)
|
||||
> [<c022af98>] (sock_sendmsg+0x78/0x98) from [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278)
|
||||
> [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278) from [<c022cf08>] (__sys_sendmsg+0x48/0x78)
|
||||
> [<c022cf08>] (__sys_sendmsg+0x48/0x78) from [<c0009320>] (ret_fast_syscall+0x0/0x2c)
|
||||
|
||||
This patch works around the problem by replacing yield() by msleep(1), giving
|
||||
the interrupt thread time to finish, similar to other changes contained in the
|
||||
rt patch set. Using wait_for_completion() instead would probably be a better
|
||||
solution.
|
||||
|
||||
|
||||
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
net/sched/sch_generic.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -1184,7 +1184,7 @@ void dev_deactivate_many(struct list_hea
|
||||
/* Wait for outstanding qdisc_run calls. */
|
||||
list_for_each_entry(dev, head, close_list) {
|
||||
while (some_qdisc_is_busy(dev))
|
||||
- yield();
|
||||
+ msleep(1);
|
||||
/* The new qdisc is assigned at this point so we can safely
|
||||
* unwind stale skb lists and qdisc statistics
|
||||
*/
|
||||
@@ -0,0 +1,30 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 27 Mar 2018 16:24:15 +0200
|
||||
Subject: [PATCH] dm rq: remove BUG_ON(!irqs_disabled) check
|
||||
|
||||
In commit 052189a2ec95 ("dm: remove superfluous irq disablement in
|
||||
dm_request_fn") the spin_lock_irq() was replaced with spin_lock() + a
|
||||
check for disabled interrupts. Later the locking part was removed in
|
||||
commit 2eb6e1e3aa87 ("dm: submit stacked requests in irq enabled
|
||||
context") but the BUG_ON() check remained.
|
||||
|
||||
Since the original purpose for the "are-irqs-off" check is gone (the
|
||||
->queue_lock has been removed) remove it.
|
||||
|
||||
Cc: Keith Busch <keith.busch@intel.com>
|
||||
Cc: Mike Snitzer <snitzer@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/md/dm-rq.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/md/dm-rq.c
|
||||
+++ b/drivers/md/dm-rq.c
|
||||
@@ -688,7 +688,6 @@ static void dm_old_request_fn(struct req
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
kthread_queue_work(&md->kworker, &tio->work);
|
||||
- BUG_ON(!irqs_disabled());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 8 Nov 2013 17:34:54 +0100
|
||||
Subject: usb: do no disable interrupts in giveback
|
||||
|
||||
Since commit 94dfd7ed ("USB: HCD: support giveback of URB in tasklet
|
||||
context") the USB code disables interrupts before invoking the complete
|
||||
callback.
|
||||
This should not be required the HCD completes the URBs either in hard-irq
|
||||
context or in BH context. Lockdep may report false positives if one has two
|
||||
HCDs (one completes in IRQ and the other in BH context) and is using the same
|
||||
USB driver (device) with both HCDs. This is safe since the same URBs are never
|
||||
mixed with those two HCDs.
|
||||
Longeterm we should force all HCDs to complete in the same context.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/usb/core/hcd.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/drivers/usb/core/hcd.c
|
||||
+++ b/drivers/usb/core/hcd.c
|
||||
@@ -1738,7 +1738,6 @@ static void __usb_hcd_giveback_urb(struc
|
||||
struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus);
|
||||
struct usb_anchor *anchor = urb->anchor;
|
||||
int status = urb->unlinked;
|
||||
- unsigned long flags;
|
||||
|
||||
urb->hcpriv = NULL;
|
||||
if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) &&
|
||||
@@ -1766,9 +1765,7 @@ static void __usb_hcd_giveback_urb(struc
|
||||
* and no one may trigger the above deadlock situation when
|
||||
* running complete() in tasklet.
|
||||
*/
|
||||
- local_irq_save(flags);
|
||||
urb->complete(urb);
|
||||
- local_irq_restore(flags);
|
||||
|
||||
usb_anchor_resume_wakeups(anchor);
|
||||
atomic_dec(&urb->use_count);
|
||||
57
kernel/patches-4.19.x-rt/0038-rt-preempt-base-config.patch
Normal file
57
kernel/patches-4.19.x-rt/0038-rt-preempt-base-config.patch
Normal file
@@ -0,0 +1,57 @@
|
||||
Subject: rt: Provide PREEMPT_RT_BASE config switch
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 17 Jun 2011 12:39:57 +0200
|
||||
|
||||
Introduce PREEMPT_RT_BASE which enables parts of
|
||||
PREEMPT_RT_FULL. Forces interrupt threading and enables some of the RT
|
||||
substitutions for testing.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/Kconfig.preempt | 21 ++++++++++++++++++---
|
||||
1 file changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/Kconfig.preempt
|
||||
+++ b/kernel/Kconfig.preempt
|
||||
@@ -1,3 +1,10 @@
|
||||
+config PREEMPT
|
||||
+ bool
|
||||
+ select PREEMPT_COUNT
|
||||
+
|
||||
+config PREEMPT_RT_BASE
|
||||
+ bool
|
||||
+ select PREEMPT
|
||||
|
||||
choice
|
||||
prompt "Preemption Model"
|
||||
@@ -34,10 +41,10 @@ config PREEMPT_VOLUNTARY
|
||||
|
||||
Select this if you are building a kernel for a desktop system.
|
||||
|
||||
-config PREEMPT
|
||||
+config PREEMPT__LL
|
||||
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||||
depends on !ARCH_NO_PREEMPT
|
||||
- select PREEMPT_COUNT
|
||||
+ select PREEMPT
|
||||
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
|
||||
help
|
||||
This option reduces the latency of the kernel by making
|
||||
@@ -54,7 +61,15 @@ config PREEMPT
|
||||
embedded system with latency requirements in the milliseconds
|
||||
range.
|
||||
|
||||
+config PREEMPT_RTB
|
||||
+ bool "Preemptible Kernel (Basic RT)"
|
||||
+ select PREEMPT_RT_BASE
|
||||
+ help
|
||||
+ This option is basically the same as (Low-Latency Desktop) but
|
||||
+ enables changes which are preliminary for the full preemptible
|
||||
+ RT kernel.
|
||||
+
|
||||
endchoice
|
||||
|
||||
config PREEMPT_COUNT
|
||||
- bool
|
||||
\ No newline at end of file
|
||||
+ bool
|
||||
@@ -0,0 +1,67 @@
|
||||
Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 14 Dec 2011 01:03:49 +0100
|
||||
|
||||
There are "valid" GFP_ATOMIC allocations such as
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 2130, name: tar
|
||||
|1 lock held by tar/2130:
|
||||
| #0: (&mm->mmap_sem){++++++}, at: [<ffffffff811d4e89>] SyS_brk+0x39/0x190
|
||||
|Preemption disabled at:[<ffffffff81063048>] flush_tlb_mm_range+0x28/0x350
|
||||
|
|
||||
|CPU: 1 PID: 2130 Comm: tar Tainted: G W 4.8.2-rt2+ #747
|
||||
|Call Trace:
|
||||
| [<ffffffff814d52dc>] dump_stack+0x86/0xca
|
||||
| [<ffffffff810a26fb>] ___might_sleep+0x14b/0x240
|
||||
| [<ffffffff819bc1d4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff81194fba>] get_page_from_freelist+0x83a/0x11b0
|
||||
| [<ffffffff81195e8b>] __alloc_pages_nodemask+0x15b/0x1190
|
||||
| [<ffffffff811f0b81>] alloc_pages_current+0xa1/0x1f0
|
||||
| [<ffffffff811f7df5>] new_slab+0x3e5/0x690
|
||||
| [<ffffffff811fb0d5>] ___slab_alloc+0x495/0x660
|
||||
| [<ffffffff811fb311>] __slab_alloc.isra.79+0x71/0xc0
|
||||
| [<ffffffff811fb447>] __kmalloc_node+0xe7/0x240
|
||||
| [<ffffffff814d4ee0>] alloc_cpumask_var_node+0x20/0x50
|
||||
| [<ffffffff814d4f3e>] alloc_cpumask_var+0xe/0x10
|
||||
| [<ffffffff810430c1>] native_send_call_func_ipi+0x21/0x130
|
||||
| [<ffffffff8111c13f>] smp_call_function_many+0x22f/0x370
|
||||
| [<ffffffff81062b64>] native_flush_tlb_others+0x1a4/0x3a0
|
||||
| [<ffffffff8106309b>] flush_tlb_mm_range+0x7b/0x350
|
||||
| [<ffffffff811c88e2>] tlb_flush_mmu_tlbonly+0x62/0xd0
|
||||
| [<ffffffff811c9af4>] tlb_finish_mmu+0x14/0x50
|
||||
| [<ffffffff811d1c84>] unmap_region+0xe4/0x110
|
||||
| [<ffffffff811d3db3>] do_munmap+0x293/0x470
|
||||
| [<ffffffff811d4f8c>] SyS_brk+0x13c/0x190
|
||||
| [<ffffffff810032e2>] do_fast_syscall_32+0xb2/0x2f0
|
||||
| [<ffffffff819be181>] entry_SYSENTER_compat+0x51/0x60
|
||||
|
||||
which forbid allocations at run-time.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/Kconfig | 2 +-
|
||||
lib/Kconfig | 1 +
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -934,7 +934,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
|
||||
config MAXSMP
|
||||
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
|
||||
depends on X86_64 && SMP && DEBUG_KERNEL
|
||||
- select CPUMASK_OFFSTACK
|
||||
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
|
||||
---help---
|
||||
Enable maximum number of CPUS and NUMA Nodes for this architecture.
|
||||
If unsure, say N.
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -441,6 +441,7 @@ config CHECK_SIGNATURE
|
||||
|
||||
config CPUMASK_OFFSTACK
|
||||
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
Use dynamic allocation for cpumask_var_t, instead of putting
|
||||
them on the stack. This is a bit more expensive, but avoids
|
||||
35
kernel/patches-4.19.x-rt/0040-jump-label-rt.patch
Normal file
35
kernel/patches-4.19.x-rt/0040-jump-label-rt.patch
Normal file
@@ -0,0 +1,35 @@
|
||||
Subject: jump-label: disable if stop_machine() is used
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 08 Jul 2015 17:14:48 +0200
|
||||
|
||||
Some architectures are using stop_machine() while switching the opcode which
|
||||
leads to latency spikes.
|
||||
The architectures which use stop_machine() atm:
|
||||
- ARM stop machine
|
||||
- s390 stop machine
|
||||
|
||||
The architecures which use other sorcery:
|
||||
- MIPS
|
||||
- X86
|
||||
- powerpc
|
||||
- sparc
|
||||
- arm64
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: only ARM for now]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -51,7 +51,7 @@ config ARM
|
||||
select HARDIRQS_SW_RESEND
|
||||
select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
|
||||
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
|
||||
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
|
||||
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE
|
||||
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
|
||||
@@ -0,0 +1,33 @@
|
||||
Subject: kconfig: Disable config options which are not RT compatible
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 24 Jul 2011 12:11:43 +0200
|
||||
|
||||
Disable stuff which is known to have issues on RT
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/Kconfig | 1 +
|
||||
mm/Kconfig | 2 +-
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -28,6 +28,7 @@ config OPROFILE
|
||||
tristate "OProfile system profiling"
|
||||
depends on PROFILING
|
||||
depends on HAVE_OPROFILE
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select RING_BUFFER
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
help
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -377,7 +377,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
|
||||
|
||||
config TRANSPARENT_HUGEPAGE
|
||||
bool "Transparent Hugepage Support"
|
||||
- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
|
||||
select COMPACTION
|
||||
select RADIX_TREE_MULTIORDER
|
||||
help
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 17 Oct 2017 16:36:18 +0200
|
||||
Subject: [PATCH] lockdep: disable self-test
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The self-test wasn't always 100% accurate for RT. We disabled a few
|
||||
tests which failed because they had a different semantic for RT. Some
|
||||
still reported false positives. Now the selftest locks up the system
|
||||
during boot and it needs to be investigated…
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
lib/Kconfig.debug | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -1207,7 +1207,7 @@ config DEBUG_ATOMIC_SLEEP
|
||||
|
||||
config DEBUG_LOCKING_API_SELFTESTS
|
||||
bool "Locking API boot-time self-tests"
|
||||
- depends on DEBUG_KERNEL
|
||||
+ depends on DEBUG_KERNEL && !PREEMPT_RT_FULL
|
||||
help
|
||||
Say Y here if you want the kernel to run a short self-test during
|
||||
bootup. The self-test checks whether common types of locking bugs
|
||||
31
kernel/patches-4.19.x-rt/0043-mm-disable-sloub-rt.patch
Normal file
31
kernel/patches-4.19.x-rt/0043-mm-disable-sloub-rt.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:44:03 -0500
|
||||
Subject: mm: Allow only slub on RT
|
||||
|
||||
Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
init/Kconfig | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1634,6 +1634,7 @@ choice
|
||||
|
||||
config SLAB
|
||||
bool "SLAB"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select HAVE_HARDENED_USERCOPY_ALLOCATOR
|
||||
help
|
||||
The regular slab allocator that is established and known to work
|
||||
@@ -1654,6 +1655,7 @@ config SLUB
|
||||
config SLOB
|
||||
depends on EXPERT
|
||||
bool "SLOB (Simple Allocator)"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
SLOB replaces the stock allocator with a drastically simpler
|
||||
allocator. SLOB is generally more space efficient but
|
||||
28
kernel/patches-4.19.x-rt/0044-mutex-no-spin-on-rt.patch
Normal file
28
kernel/patches-4.19.x-rt/0044-mutex-no-spin-on-rt.patch
Normal file
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:51:45 +0200
|
||||
Subject: locking: Disable spin on owner for RT
|
||||
|
||||
Drop spin on owner for mutex / rwsem. We are most likely not using it
|
||||
but…
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/Kconfig.locks | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/Kconfig.locks
|
||||
+++ b/kernel/Kconfig.locks
|
||||
@@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW
|
||||
|
||||
config MUTEX_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
- depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
+ depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
|
||||
|
||||
config RWSEM_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
- depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
+ depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
|
||||
|
||||
config LOCK_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
@@ -0,0 +1,24 @@
|
||||
Subject: rcu: Disable RCU_FAST_NO_HZ on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 28 Oct 2012 13:26:09 +0000
|
||||
|
||||
This uses a timer_list timer from the irq disabled guts of the idle
|
||||
code. Disable it for now to prevent wreckage.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/rcu/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/rcu/Kconfig
|
||||
+++ b/kernel/rcu/Kconfig
|
||||
@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF
|
||||
|
||||
config RCU_FAST_NO_HZ
|
||||
bool "Accelerate last non-dyntick-idle CPU's grace periods"
|
||||
- depends on NO_HZ_COMMON && SMP && RCU_EXPERT
|
||||
+ depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
|
||||
default n
|
||||
help
|
||||
This option permits CPUs to enter dynticks-idle state even if
|
||||
@@ -0,0 +1,27 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 21 Mar 2014 20:19:05 +0100
|
||||
Subject: rcu: make RCU_BOOST default on RT
|
||||
|
||||
Since it is no longer invoked from the softirq people run into OOM more
|
||||
often if the priority of the RCU thread is too low. Making boosting
|
||||
default on RT should help in those case and it can be switched off if
|
||||
someone knows better.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/rcu/Kconfig | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/rcu/Kconfig
|
||||
+++ b/kernel/rcu/Kconfig
|
||||
@@ -190,8 +190,8 @@ config RCU_FAST_NO_HZ
|
||||
|
||||
config RCU_BOOST
|
||||
bool "Enable RCU priority boosting"
|
||||
- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
|
||||
- default n
|
||||
+ depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT_FULL
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
help
|
||||
This option boosts the priority of preempted RCU readers that
|
||||
block the current preemptible RCU grace period for too long.
|
||||
@@ -0,0 +1,28 @@
|
||||
Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:03:52 +0200
|
||||
|
||||
Carsten reported problems when running:
|
||||
|
||||
taskset 01 chrt -f 1 sleep 1
|
||||
|
||||
from within rc.local on a F15 machine. The task stays running and
|
||||
never gets on the run queue because some of the run queues have
|
||||
rt_throttled=1 which does not go away. Works nice from a ssh login
|
||||
shell. Disabling CONFIG_RT_GROUP_SCHED solves that as well.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -781,6 +781,7 @@ config CFS_BANDWIDTH
|
||||
config RT_GROUP_SCHED
|
||||
bool "Group scheduling for SCHED_RR/FIFO"
|
||||
depends on CGROUP_SCHED
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
default n
|
||||
help
|
||||
This feature lets you explicitly allocate real CPU bandwidth
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 27 May 2017 19:02:06 +0200
|
||||
Subject: net/core: disable NET_RX_BUSY_POLL
|
||||
|
||||
sk_busy_loop() does preempt_disable() followed by a few operations which can
|
||||
take sleeping locks and may get long.
|
||||
I _think_ that we could use preempt_disable_nort() (in sk_busy_loop()) instead
|
||||
but after a successfull cmpxchg(&napi->state, …) we would gain the ressource
|
||||
and could be scheduled out. At this point nobody knows who (which context) owns
|
||||
it and so it could take a while until the state is realeased and napi_poll()
|
||||
could be invoked again.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
net/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/net/Kconfig
|
||||
+++ b/net/Kconfig
|
||||
@@ -275,7 +275,7 @@ config CGROUP_NET_CLASSID
|
||||
|
||||
config NET_RX_BUSY_POLL
|
||||
bool
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
|
||||
config BQL
|
||||
bool
|
||||
@@ -0,0 +1,155 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 1 Dec 2017 10:42:03 +0100
|
||||
Subject: [PATCH] arm*: disable NEON in kernel mode
|
||||
|
||||
NEON in kernel mode is used by the crypto algorithms and raid6 code.
|
||||
While the raid6 code looks okay, the crypto algorithms do not: NEON
|
||||
is enabled on first invocation and may allocate/free/map memory before
|
||||
the NEON mode is disabled again.
|
||||
This needs to be changed until it can be enabled.
|
||||
On ARM NEON in kernel mode can be simply disabled. on ARM64 it needs to
|
||||
stay on due to possible EFI callbacks so here I disable each algorithm.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 2 +-
|
||||
arch/arm64/crypto/Kconfig | 28 ++++++++++++++--------------
|
||||
arch/arm64/crypto/crc32-ce-glue.c | 3 ++-
|
||||
3 files changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -2160,7 +2160,7 @@ config NEON
|
||||
|
||||
config KERNEL_MODE_NEON
|
||||
bool "Support for NEON in kernel mode"
|
||||
- depends on NEON && AEABI
|
||||
+ depends on NEON && AEABI && !PREEMPT_RT_BASE
|
||||
help
|
||||
Say Y to include support for NEON in kernel mode.
|
||||
|
||||
--- a/arch/arm64/crypto/Kconfig
|
||||
+++ b/arch/arm64/crypto/Kconfig
|
||||
@@ -19,43 +19,43 @@ config CRYPTO_SHA512_ARM64
|
||||
|
||||
config CRYPTO_SHA1_ARM64_CE
|
||||
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA1
|
||||
|
||||
config CRYPTO_SHA2_ARM64_CE
|
||||
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA256_ARM64
|
||||
|
||||
config CRYPTO_SHA512_ARM64_CE
|
||||
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA512_ARM64
|
||||
|
||||
config CRYPTO_SHA3_ARM64
|
||||
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA3
|
||||
|
||||
config CRYPTO_SM3_ARM64_CE
|
||||
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SM3
|
||||
|
||||
config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_AES
|
||||
@@ -63,7 +63,7 @@ config CRYPTO_GHASH_ARM64_CE
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
- depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
+ depends on KERNEL_MODE_NEON && CRC_T10DIF && !PREEMPT_RT_BASE
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
@@ -77,13 +77,13 @@ config CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
- depends on ARM64 && KERNEL_MODE_NEON
|
||||
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_CCM
|
||||
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
|
||||
- depends on ARM64 && KERNEL_MODE_NEON
|
||||
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
@@ -91,7 +91,7 @@ config CRYPTO_AES_ARM64_CE_CCM
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
@@ -99,7 +99,7 @@ config CRYPTO_AES_ARM64_CE_BLK
|
||||
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_AES
|
||||
@@ -107,13 +107,13 @@ config CRYPTO_AES_ARM64_NEON_BLK
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_AES_ARM64_BS
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
|
||||
- depends on KERNEL_MODE_NEON
|
||||
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_AES_ARM64
|
||||
--- a/arch/arm64/crypto/crc32-ce-glue.c
|
||||
+++ b/arch/arm64/crypto/crc32-ce-glue.c
|
||||
@@ -208,7 +208,8 @@ static struct shash_alg crc32_pmull_algs
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
+ !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: powerpc: Use generic rwsem on RT
|
||||
|
||||
Use generic code which uses rtmutex
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/powerpc/Kconfig | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/powerpc/Kconfig
|
||||
+++ b/arch/powerpc/Kconfig
|
||||
@@ -105,10 +105,11 @@ config LOCKDEP_SUPPORT
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
|
||||
config RWSEM_XCHGADD_ALGORITHM
|
||||
bool
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
|
||||
config GENERIC_LOCKBREAK
|
||||
bool
|
||||
@@ -0,0 +1,37 @@
|
||||
From: Bogdan Purcareata <bogdan.purcareata@freescale.com>
|
||||
Date: Fri, 24 Apr 2015 15:53:13 +0000
|
||||
Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL
|
||||
|
||||
While converting the openpic emulation code to use a raw_spinlock_t enables
|
||||
guests to run on RT, there's still a performance issue. For interrupts sent in
|
||||
directed delivery mode with a multiple CPU mask, the emulated openpic will loop
|
||||
through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop
|
||||
through all the pending interrupts for that VCPU. This is done while holding the
|
||||
raw_lock, meaning that in all this time the interrupts and preemption are
|
||||
disabled on the host Linux. A malicious user app can max both these number and
|
||||
cause a DoS.
|
||||
|
||||
This temporary fix is sent for two reasons. First is so that users who want to
|
||||
use the in-kernel MPIC emulation are aware of the potential latencies, thus
|
||||
making sure that the hardware MPIC and their usage scenario does not involve
|
||||
interrupts sent in directed delivery mode, and the number of possible pending
|
||||
interrupts is kept small. Secondly, this should incentivize the development of a
|
||||
proper openpic emulation that would be better suited for RT.
|
||||
|
||||
Acked-by: Scott Wood <scottwood@freescale.com>
|
||||
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/kvm/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/powerpc/kvm/Kconfig
|
||||
+++ b/arch/powerpc/kvm/Kconfig
|
||||
@@ -178,6 +178,7 @@ config KVM_E500MC
|
||||
config KVM_MPIC
|
||||
bool "KVM in-kernel MPIC emulation"
|
||||
depends on KVM && E500
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
@@ -0,0 +1,22 @@
|
||||
Subject: powerpc: Disable highmem on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:08:34 +0200
|
||||
|
||||
The current highmem handling on -RT is not compatible and needs fixups.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/powerpc/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/powerpc/Kconfig
|
||||
+++ b/arch/powerpc/Kconfig
|
||||
@@ -398,7 +398,7 @@ menu "Kernel options"
|
||||
|
||||
config HIGHMEM
|
||||
bool "High memory support"
|
||||
- depends on PPC32
|
||||
+ depends on PPC32 && !PREEMPT_RT_FULL
|
||||
|
||||
source kernel/Kconfig.hz
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
Subject: mips: Disable highmem on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 18 Jul 2011 17:10:12 +0200
|
||||
|
||||
The current highmem handling on -RT is not compatible and needs fixups.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/mips/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -2517,7 +2517,7 @@ config MIPS_CRC_SUPPORT
|
||||
#
|
||||
config HIGHMEM
|
||||
bool "High Memory Support"
|
||||
- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
|
||||
+ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
|
||||
|
||||
config CPU_SUPPORTS_HIGHMEM
|
||||
bool
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 26 Jul 2009 02:21:32 +0200
|
||||
Subject: x86: Use generic rwsem_spinlocks on -rt
|
||||
|
||||
Simplifies the separation of anon_rw_semaphores and rw_semaphores for
|
||||
-rt.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
arch/x86/Kconfig | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -264,8 +264,11 @@ config ARCH_MAY_HAVE_PC_FDC
|
||||
def_bool y
|
||||
depends on ISA_DMA_API
|
||||
|
||||
+config RWSEM_GENERIC_SPINLOCK
|
||||
+ def_bool PREEMPT_RT_FULL
|
||||
+
|
||||
config RWSEM_XCHGADD_ALGORITHM
|
||||
- def_bool y
|
||||
+ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
|
||||
|
||||
config GENERIC_CALIBRATE_DELAY
|
||||
def_bool y
|
||||
@@ -0,0 +1,34 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 23 Jan 2014 14:45:59 +0100
|
||||
Subject: leds: trigger: disable CPU trigger on -RT
|
||||
|
||||
as it triggers:
|
||||
|CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141
|
||||
|[<c0014aa4>] (unwind_backtrace+0x0/0xf8) from [<c0012788>] (show_stack+0x1c/0x20)
|
||||
|[<c0012788>] (show_stack+0x1c/0x20) from [<c043c8dc>] (dump_stack+0x20/0x2c)
|
||||
|[<c043c8dc>] (dump_stack+0x20/0x2c) from [<c004c5e8>] (__might_sleep+0x13c/0x170)
|
||||
|[<c004c5e8>] (__might_sleep+0x13c/0x170) from [<c043f270>] (__rt_spin_lock+0x28/0x38)
|
||||
|[<c043f270>] (__rt_spin_lock+0x28/0x38) from [<c043fa00>] (rt_read_lock+0x68/0x7c)
|
||||
|[<c043fa00>] (rt_read_lock+0x68/0x7c) from [<c036cf74>] (led_trigger_event+0x2c/0x5c)
|
||||
|[<c036cf74>] (led_trigger_event+0x2c/0x5c) from [<c036e0bc>] (ledtrig_cpu+0x54/0x5c)
|
||||
|[<c036e0bc>] (ledtrig_cpu+0x54/0x5c) from [<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c)
|
||||
|[<c000ffd8>] (arch_cpu_idle_exit+0x18/0x1c) from [<c00590b8>] (cpu_startup_entry+0xa8/0x234)
|
||||
|[<c00590b8>] (cpu_startup_entry+0xa8/0x234) from [<c043b2cc>] (rest_init+0xb8/0xe0)
|
||||
|[<c043b2cc>] (rest_init+0xb8/0xe0) from [<c061ebe0>] (start_kernel+0x2c4/0x380)
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/leds/trigger/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/leds/trigger/Kconfig
|
||||
+++ b/drivers/leds/trigger/Kconfig
|
||||
@@ -63,6 +63,7 @@ config LEDS_TRIGGER_BACKLIGHT
|
||||
|
||||
config LEDS_TRIGGER_CPU
|
||||
bool "LED CPU Trigger"
|
||||
+ depends on !PREEMPT_RT_BASE
|
||||
help
|
||||
This allows LEDs to be controlled by active CPUs. This shows
|
||||
the active CPUs across an array of LEDs so you can see which
|
||||
@@ -0,0 +1,32 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 9 Apr 2015 15:23:01 +0200
|
||||
Subject: cpufreq: drop K8's driver from beeing selected
|
||||
|
||||
Ralf posted a picture of a backtrace from
|
||||
|
||||
| powernowk8_target_fn() -> transition_frequency_fidvid() and then at the
|
||||
| end:
|
||||
| 932 policy = cpufreq_cpu_get(smp_processor_id());
|
||||
| 933 cpufreq_cpu_put(policy);
|
||||
|
||||
crashing the system on -RT. I assumed that policy was a NULL pointer but
|
||||
was rulled out. Since Ralf can't do any more investigations on this and
|
||||
I have no machine with this, I simply switch it off.
|
||||
|
||||
Reported-by: Ralf Mardorf <ralf.mardorf@alice-dsl.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/Kconfig.x86 | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/Kconfig.x86
|
||||
+++ b/drivers/cpufreq/Kconfig.x86
|
||||
@@ -125,7 +125,7 @@ config X86_POWERNOW_K7_ACPI
|
||||
|
||||
config X86_POWERNOW_K8
|
||||
tristate "AMD Opteron/Athlon64 PowerNow!"
|
||||
- depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
|
||||
+ depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
|
||||
help
|
||||
This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
|
||||
Support for K10 and newer processors is now in acpi-cpufreq.
|
||||
31
kernel/patches-4.19.x-rt/0057-md-disable-bcache.patch
Normal file
31
kernel/patches-4.19.x-rt/0057-md-disable-bcache.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 29 Aug 2013 11:48:57 +0200
|
||||
Subject: md: disable bcache
|
||||
|
||||
It uses anon semaphores
|
||||
|drivers/md/bcache/request.c: In function ‘cached_dev_write_complete’:
|
||||
|drivers/md/bcache/request.c:1007:2: error: implicit declaration of function ‘up_read_non_owner’ [-Werror=implicit-function-declaration]
|
||||
| up_read_non_owner(&dc->writeback_lock);
|
||||
| ^
|
||||
|drivers/md/bcache/request.c: In function ‘request_write’:
|
||||
|drivers/md/bcache/request.c:1033:2: error: implicit declaration of function ‘down_read_non_owner’ [-Werror=implicit-function-declaration]
|
||||
| down_read_non_owner(&dc->writeback_lock);
|
||||
| ^
|
||||
|
||||
either we get rid of those or we have to introduce them…
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/md/bcache/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/drivers/md/bcache/Kconfig
|
||||
+++ b/drivers/md/bcache/Kconfig
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
config BCACHE
|
||||
tristate "Block device as cache"
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
select CRC64
|
||||
help
|
||||
Allows a block device to be used as cache for other devices; uses
|
||||
@@ -0,0 +1,39 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 15:03:16 +0200
|
||||
Subject: [PATCH] efi: Disable runtime services on RT
|
||||
|
||||
Based on meassurements the EFI functions get_variable /
|
||||
get_next_variable take up to 2us which looks okay.
|
||||
The functions get_time, set_time take around 10ms. Those 10ms are too
|
||||
much. Even one ms would be too much.
|
||||
Ard mentioned that SetVariable might even trigger larger latencies if
|
||||
the firware will erase flash blocks on NOR.
|
||||
|
||||
The time-functions are used by efi-rtc and can be triggered during
|
||||
runtimed (either via explicit read/write or ntp sync).
|
||||
|
||||
The variable write could be used by pstore.
|
||||
These functions can be disabled without much of a loss. The poweroff /
|
||||
reboot hooks may be provided by PSCI.
|
||||
|
||||
Disable EFI's runtime wrappers.
|
||||
|
||||
This was observed on "EFI v2.60 by SoftIron Overdrive 1000".
|
||||
|
||||
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/firmware/efi/efi.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/firmware/efi/efi.c
|
||||
+++ b/drivers/firmware/efi/efi.c
|
||||
@@ -87,7 +87,7 @@ struct mm_struct efi_mm = {
|
||||
|
||||
struct workqueue_struct *efi_rts_wq;
|
||||
|
||||
-static bool disable_runtime;
|
||||
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT_BASE);
|
||||
static int __init setup_noefi(char *arg)
|
||||
{
|
||||
disable_runtime = true;
|
||||
163
kernel/patches-4.19.x-rt/0059-printk-kill.patch
Normal file
163
kernel/patches-4.19.x-rt/0059-printk-kill.patch
Normal file
@@ -0,0 +1,163 @@
|
||||
Subject: printk: Add a printk kill switch
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 22 Jul 2011 17:58:40 +0200
|
||||
|
||||
Add a prinkt-kill-switch. This is used from (NMI) watchdog to ensure that
|
||||
it does not dead-lock with the early printk code.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/printk.h | 2 +
|
||||
kernel/printk/printk.c | 79 ++++++++++++++++++++++++++++++++++++-------------
|
||||
kernel/watchdog_hld.c | 10 ++++++
|
||||
3 files changed, 71 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -140,9 +140,11 @@ struct va_format {
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
extern asmlinkage __printf(1, 2)
|
||||
void early_printk(const char *fmt, ...);
|
||||
+extern void printk_kill(void);
|
||||
#else
|
||||
static inline __printf(1, 2) __cold
|
||||
void early_printk(const char *s, ...) { }
|
||||
+static inline void printk_kill(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PRINTK_NMI
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -405,6 +405,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
printk_safe_exit_irqrestore(flags); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_EARLY_PRINTK
|
||||
+struct console *early_console;
|
||||
+
|
||||
+static void early_vprintk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ if (early_console) {
|
||||
+ char buf[512];
|
||||
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
||||
+
|
||||
+ early_console->write(early_console, buf, n);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+asmlinkage void early_printk(const char *fmt, ...)
|
||||
+{
|
||||
+ va_list ap;
|
||||
+
|
||||
+ va_start(ap, fmt);
|
||||
+ early_vprintk(fmt, ap);
|
||||
+ va_end(ap);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * This is independent of any log levels - a global
|
||||
+ * kill switch that turns off all of printk.
|
||||
+ *
|
||||
+ * Used by the NMI watchdog if early-printk is enabled.
|
||||
+ */
|
||||
+static bool __read_mostly printk_killswitch;
|
||||
+
|
||||
+void printk_kill(void)
|
||||
+{
|
||||
+ printk_killswitch = true;
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_PRINTK
|
||||
+static int forced_early_printk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ if (!printk_killswitch)
|
||||
+ return 0;
|
||||
+ early_vprintk(fmt, ap);
|
||||
+ return 1;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#else
|
||||
+static inline int forced_early_printk(const char *fmt, va_list ap)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
@@ -1897,6 +1949,13 @@ asmlinkage int vprintk_emit(int facility
|
||||
bool in_sched = false;
|
||||
unsigned long flags;
|
||||
|
||||
+ /*
|
||||
+ * Fall back to early_printk if a debugging subsystem has
|
||||
+ * killed printk output
|
||||
+ */
|
||||
+ if (unlikely(forced_early_printk(fmt, args)))
|
||||
+ return 1;
|
||||
+
|
||||
if (level == LOGLEVEL_SCHED) {
|
||||
level = LOGLEVEL_DEFAULT;
|
||||
in_sched = true;
|
||||
@@ -2037,26 +2096,6 @@ static bool suppress_message_printing(in
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
-#ifdef CONFIG_EARLY_PRINTK
|
||||
-struct console *early_console;
|
||||
-
|
||||
-asmlinkage __visible void early_printk(const char *fmt, ...)
|
||||
-{
|
||||
- va_list ap;
|
||||
- char buf[512];
|
||||
- int n;
|
||||
-
|
||||
- if (!early_console)
|
||||
- return;
|
||||
-
|
||||
- va_start(ap, fmt);
|
||||
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
||||
- va_end(ap);
|
||||
-
|
||||
- early_console->write(early_console, buf, n);
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
static int __add_preferred_console(char *name, int idx, char *options,
|
||||
char *brl_options)
|
||||
{
|
||||
--- a/kernel/watchdog_hld.c
|
||||
+++ b/kernel/watchdog_hld.c
|
||||
@@ -24,6 +24,8 @@ static DEFINE_PER_CPU(bool, hard_watchdo
|
||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
static DEFINE_PER_CPU(struct perf_event *, dead_event);
|
||||
+static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
|
||||
+
|
||||
static struct cpumask dead_events_mask;
|
||||
|
||||
static unsigned long hardlockup_allcpu_dumped;
|
||||
@@ -134,6 +136,13 @@ static void watchdog_overflow_callback(s
|
||||
/* only print hardlockups once */
|
||||
if (__this_cpu_read(hard_watchdog_warn) == true)
|
||||
return;
|
||||
+ /*
|
||||
+ * If early-printk is enabled then make sure we do not
|
||||
+ * lock up in printk() and kill console logging:
|
||||
+ */
|
||||
+ printk_kill();
|
||||
+
|
||||
+ raw_spin_lock(&watchdog_output_lock);
|
||||
|
||||
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
|
||||
print_modules();
|
||||
@@ -151,6 +160,7 @@ static void watchdog_overflow_callback(s
|
||||
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
+ raw_spin_unlock(&watchdog_output_lock);
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(regs, "Hard LOCKUP");
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
Subject: printk: Add "force_early_printk" boot param to help with debugging
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 02 Sep 2011 14:41:29 +0200
|
||||
|
||||
Gives me an option to screw printk and actually see what the machine
|
||||
says.
|
||||
|
||||
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Link: http://lkml.kernel.org/r/1314967289.1301.11.camel@twins
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/n/tip-ykb97nsfmobq44xketrxs977@git.kernel.org
|
||||
---
|
||||
kernel/printk/printk.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -435,6 +435,13 @@ asmlinkage void early_printk(const char
|
||||
*/
|
||||
static bool __read_mostly printk_killswitch;
|
||||
|
||||
+static int __init force_early_printk_setup(char *str)
|
||||
+{
|
||||
+ printk_killswitch = true;
|
||||
+ return 0;
|
||||
+}
|
||||
+early_param("force_early_printk", force_early_printk_setup);
|
||||
+
|
||||
void printk_kill(void)
|
||||
{
|
||||
printk_killswitch = true;
|
||||
47
kernel/patches-4.19.x-rt/0061-preempt-nort-rt-variants.patch
Normal file
47
kernel/patches-4.19.x-rt/0061-preempt-nort-rt-variants.patch
Normal file
@@ -0,0 +1,47 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 24 Jul 2009 12:38:56 +0200
|
||||
Subject: preempt: Provide preempt_*_(no)rt variants
|
||||
|
||||
RT needs a few preempt_disable/enable points which are not necessary
|
||||
otherwise. Implement variants to avoid #ifdeffery.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/preempt.h | 18 +++++++++++++++++-
|
||||
1 file changed, 17 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -181,7 +181,11 @@ do { \
|
||||
preempt_count_dec(); \
|
||||
} while (0)
|
||||
|
||||
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
|
||||
+#else
|
||||
+# define preempt_enable_no_resched() preempt_enable()
|
||||
+#endif
|
||||
|
||||
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
|
||||
@@ -298,6 +302,18 @@ do { \
|
||||
set_preempt_need_resched(); \
|
||||
} while (0)
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define preempt_disable_rt() preempt_disable()
|
||||
+# define preempt_enable_rt() preempt_enable()
|
||||
+# define preempt_disable_nort() barrier()
|
||||
+# define preempt_enable_nort() barrier()
|
||||
+#else
|
||||
+# define preempt_disable_rt() barrier()
|
||||
+# define preempt_enable_rt() barrier()
|
||||
+# define preempt_disable_nort() preempt_disable()
|
||||
+# define preempt_enable_nort() preempt_enable()
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
|
||||
struct preempt_notifier;
|
||||
@@ -0,0 +1,62 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 8 Mar 2017 14:23:35 +0100
|
||||
Subject: [PATCH] futex: workaround migrate_disable/enable in different context
|
||||
|
||||
migrate_disable()/migrate_enable() takes a different path in atomic() vs
|
||||
!atomic() context. These little hacks ensure that we don't underflow / overflow
|
||||
the migrate code counts properly while we lock the hb lockwith interrupts
|
||||
enabled and unlock it with interrupts disabled.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 19 +++++++++++++++++++
|
||||
1 file changed, 19 insertions(+)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -2856,6 +2856,14 @@ static int futex_lock_pi(u32 __user *uad
|
||||
* before __rt_mutex_start_proxy_lock() is done.
|
||||
*/
|
||||
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
+ /*
|
||||
+ * the migrate_disable() here disables migration in the in_atomic() fast
|
||||
+ * path which is enabled again in the following spin_unlock(). We have
|
||||
+ * one migrate_disable() pending in the slow-path which is reversed
|
||||
+ * after the raw_spin_unlock_irq() where we leave the atomic context.
|
||||
+ */
|
||||
+ migrate_disable();
|
||||
+
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
|
||||
@@ -2864,6 +2872,7 @@ static int futex_lock_pi(u32 __user *uad
|
||||
*/
|
||||
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
|
||||
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
+ migrate_enable();
|
||||
|
||||
if (ret) {
|
||||
if (ret == 1)
|
||||
@@ -3012,11 +3021,21 @@ static int futex_unlock_pi(u32 __user *u
|
||||
* rt_waiter. Also see the WARN in wake_futex_pi().
|
||||
*/
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
+ /*
|
||||
+ * Magic trickery for now to make the RT migrate disable
|
||||
+ * logic happy. The following spin_unlock() happens with
|
||||
+ * interrupts disabled so the internal migrate_enable()
|
||||
+ * won't undo the migrate_disable() which was issued when
|
||||
+ * locking hb->lock.
|
||||
+ */
|
||||
+ migrate_disable();
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
/* drops pi_state->pi_mutex.wait_lock */
|
||||
ret = wake_futex_pi(uaddr, uval, pi_state);
|
||||
|
||||
+ migrate_enable();
|
||||
+
|
||||
put_pi_state(pi_state);
|
||||
|
||||
/*
|
||||
330
kernel/patches-4.19.x-rt/0063-rt-local-irq-lock.patch
Normal file
330
kernel/patches-4.19.x-rt/0063-rt-local-irq-lock.patch
Normal file
@@ -0,0 +1,330 @@
|
||||
Subject: rt: Add local irq locks
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 20 Jun 2011 09:03:47 +0200
|
||||
|
||||
Introduce locallock. For !RT this maps to preempt_disable()/
|
||||
local_irq_disable() so there is not much that changes. For RT this will
|
||||
map to a spinlock. This makes preemption possible and locked "ressource"
|
||||
gets the lockdep anotation it wouldn't have otherwise. The locks are
|
||||
recursive for owner == current. Also, all locks user migrate_disable()
|
||||
which ensures that the task is not migrated to another CPU while the lock
|
||||
is held and the owner is preempted.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/locallock.h | 271 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/percpu.h | 29 ++++
|
||||
2 files changed, 300 insertions(+)
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/locallock.h
|
||||
@@ -0,0 +1,271 @@
|
||||
+#ifndef _LINUX_LOCALLOCK_H
|
||||
+#define _LINUX_LOCALLOCK_H
|
||||
+
|
||||
+#include <linux/percpu.h>
|
||||
+#include <linux/spinlock.h>
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
+# define LL_WARN(cond) WARN_ON(cond)
|
||||
+#else
|
||||
+# define LL_WARN(cond) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
+ * per cpu lock based substitute for local_irq_*()
|
||||
+ */
|
||||
+struct local_irq_lock {
|
||||
+ spinlock_t lock;
|
||||
+ struct task_struct *owner;
|
||||
+ int nestcnt;
|
||||
+ unsigned long flags;
|
||||
+};
|
||||
+
|
||||
+#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
|
||||
+ DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
|
||||
+ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
|
||||
+
|
||||
+#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
|
||||
+ DECLARE_PER_CPU(struct local_irq_lock, lvar)
|
||||
+
|
||||
+#define local_irq_lock_init(lvar) \
|
||||
+ do { \
|
||||
+ int __cpu; \
|
||||
+ for_each_possible_cpu(__cpu) \
|
||||
+ spin_lock_init(&per_cpu(lvar, __cpu).lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline void __local_lock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current) {
|
||||
+ spin_lock(&lv->lock);
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ }
|
||||
+ lv->nestcnt++;
|
||||
+}
|
||||
+
|
||||
+#define local_lock(lvar) \
|
||||
+ do { __local_lock(&get_local_var(lvar)); } while (0)
|
||||
+
|
||||
+#define local_lock_on(lvar, cpu) \
|
||||
+ do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline int __local_trylock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current && spin_trylock(&lv->lock)) {
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ lv->nestcnt = 1;
|
||||
+ return 1;
|
||||
+ } else if (lv->owner == current) {
|
||||
+ lv->nestcnt++;
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#define local_trylock(lvar) \
|
||||
+ ({ \
|
||||
+ int __locked; \
|
||||
+ __locked = __local_trylock(&get_local_var(lvar)); \
|
||||
+ if (!__locked) \
|
||||
+ put_local_var(lvar); \
|
||||
+ __locked; \
|
||||
+ })
|
||||
+
|
||||
+static inline void __local_unlock(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ LL_WARN(lv->nestcnt == 0);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ if (--lv->nestcnt)
|
||||
+ return;
|
||||
+
|
||||
+ lv->owner = NULL;
|
||||
+ spin_unlock(&lv->lock);
|
||||
+}
|
||||
+
|
||||
+#define local_unlock(lvar) \
|
||||
+ do { \
|
||||
+ __local_unlock(this_cpu_ptr(&lvar)); \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_on(lvar, cpu) \
|
||||
+ do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline void __local_lock_irq(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ spin_lock_irqsave(&lv->lock, lv->flags);
|
||||
+ LL_WARN(lv->owner);
|
||||
+ LL_WARN(lv->nestcnt);
|
||||
+ lv->owner = current;
|
||||
+ lv->nestcnt = 1;
|
||||
+}
|
||||
+
|
||||
+#define local_lock_irq(lvar) \
|
||||
+ do { __local_lock_irq(&get_local_var(lvar)); } while (0)
|
||||
+
|
||||
+#define local_lock_irq_on(lvar, cpu) \
|
||||
+ do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
|
||||
+
|
||||
+static inline void __local_unlock_irq(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ LL_WARN(!lv->nestcnt);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ lv->owner = NULL;
|
||||
+ lv->nestcnt = 0;
|
||||
+ spin_unlock_irq(&lv->lock);
|
||||
+}
|
||||
+
|
||||
+#define local_unlock_irq(lvar) \
|
||||
+ do { \
|
||||
+ __local_unlock_irq(this_cpu_ptr(&lvar)); \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_irq_on(lvar, cpu) \
|
||||
+ do { \
|
||||
+ __local_unlock_irq(&per_cpu(lvar, cpu)); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline int __local_lock_irqsave(struct local_irq_lock *lv)
|
||||
+{
|
||||
+ if (lv->owner != current) {
|
||||
+ __local_lock_irq(lv);
|
||||
+ return 0;
|
||||
+ } else {
|
||||
+ lv->nestcnt++;
|
||||
+ return 1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define local_lock_irqsave(lvar, _flags) \
|
||||
+ do { \
|
||||
+ if (__local_lock_irqsave(&get_local_var(lvar))) \
|
||||
+ put_local_var(lvar); \
|
||||
+ _flags = __this_cpu_read(lvar.flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_lock_irqsave_on(lvar, _flags, cpu) \
|
||||
+ do { \
|
||||
+ __local_lock_irqsave(&per_cpu(lvar, cpu)); \
|
||||
+ _flags = per_cpu(lvar, cpu).flags; \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
|
||||
+ unsigned long flags)
|
||||
+{
|
||||
+ LL_WARN(!lv->nestcnt);
|
||||
+ LL_WARN(lv->owner != current);
|
||||
+ if (--lv->nestcnt)
|
||||
+ return 0;
|
||||
+
|
||||
+ lv->owner = NULL;
|
||||
+ spin_unlock_irqrestore(&lv->lock, lv->flags);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+#define local_unlock_irqrestore(lvar, flags) \
|
||||
+ do { \
|
||||
+ if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
|
||||
+ put_local_var(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_unlock_irqrestore_on(lvar, flags, cpu) \
|
||||
+ do { \
|
||||
+ __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_trylock_irq(lvar, lock) \
|
||||
+ ({ \
|
||||
+ int __locked; \
|
||||
+ local_lock_irq(lvar); \
|
||||
+ __locked = spin_trylock(lock); \
|
||||
+ if (!__locked) \
|
||||
+ local_unlock_irq(lvar); \
|
||||
+ __locked; \
|
||||
+ })
|
||||
+
|
||||
+#define local_spin_lock_irq(lvar, lock) \
|
||||
+ do { \
|
||||
+ local_lock_irq(lvar); \
|
||||
+ spin_lock(lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_unlock_irq(lvar, lock) \
|
||||
+ do { \
|
||||
+ spin_unlock(lock); \
|
||||
+ local_unlock_irq(lvar); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_lock_irqsave(lvar, lock, flags) \
|
||||
+ do { \
|
||||
+ local_lock_irqsave(lvar, flags); \
|
||||
+ spin_lock(lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
|
||||
+ do { \
|
||||
+ spin_unlock(lock); \
|
||||
+ local_unlock_irqrestore(lvar, flags); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define get_locked_var(lvar, var) \
|
||||
+ (*({ \
|
||||
+ local_lock(lvar); \
|
||||
+ this_cpu_ptr(&var); \
|
||||
+ }))
|
||||
+
|
||||
+#define put_locked_var(lvar, var) local_unlock(lvar);
|
||||
+
|
||||
+#define local_lock_cpu(lvar) \
|
||||
+ ({ \
|
||||
+ local_lock(lvar); \
|
||||
+ smp_processor_id(); \
|
||||
+ })
|
||||
+
|
||||
+#define local_unlock_cpu(lvar) local_unlock(lvar)
|
||||
+
|
||||
+#else /* PREEMPT_RT_BASE */
|
||||
+
|
||||
+#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
|
||||
+#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
|
||||
+
|
||||
+static inline void local_irq_lock_init(int lvar) { }
|
||||
+
|
||||
+#define local_trylock(lvar) \
|
||||
+ ({ \
|
||||
+ preempt_disable(); \
|
||||
+ 1; \
|
||||
+ })
|
||||
+
|
||||
+#define local_lock(lvar) preempt_disable()
|
||||
+#define local_unlock(lvar) preempt_enable()
|
||||
+#define local_lock_irq(lvar) local_irq_disable()
|
||||
+#define local_lock_irq_on(lvar, cpu) local_irq_disable()
|
||||
+#define local_unlock_irq(lvar) local_irq_enable()
|
||||
+#define local_unlock_irq_on(lvar, cpu) local_irq_enable()
|
||||
+#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
|
||||
+#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
|
||||
+
|
||||
+#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
|
||||
+#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
|
||||
+#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
|
||||
+#define local_spin_lock_irqsave(lvar, lock, flags) \
|
||||
+ spin_lock_irqsave(lock, flags)
|
||||
+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
|
||||
+ spin_unlock_irqrestore(lock, flags)
|
||||
+
|
||||
+#define get_locked_var(lvar, var) get_cpu_var(var)
|
||||
+#define put_locked_var(lvar, var) put_cpu_var(var)
|
||||
+
|
||||
+#define local_lock_cpu(lvar) get_cpu()
|
||||
+#define local_unlock_cpu(lvar) put_cpu()
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
--- a/include/linux/percpu.h
|
||||
+++ b/include/linux/percpu.h
|
||||
@@ -19,6 +19,35 @@
|
||||
#define PERCPU_MODULE_RESERVE 0
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+#define get_local_var(var) (*({ \
|
||||
+ migrate_disable(); \
|
||||
+ this_cpu_ptr(&var); }))
|
||||
+
|
||||
+#define put_local_var(var) do { \
|
||||
+ (void)&(var); \
|
||||
+ migrate_enable(); \
|
||||
+} while (0)
|
||||
+
|
||||
+# define get_local_ptr(var) ({ \
|
||||
+ migrate_disable(); \
|
||||
+ this_cpu_ptr(var); })
|
||||
+
|
||||
+# define put_local_ptr(var) do { \
|
||||
+ (void)(var); \
|
||||
+ migrate_enable(); \
|
||||
+} while (0)
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#define get_local_var(var) get_cpu_var(var)
|
||||
+#define put_local_var(var) put_cpu_var(var)
|
||||
+#define get_local_ptr(var) get_cpu_ptr(var)
|
||||
+#define put_local_ptr(var) put_cpu_ptr(var)
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
/* minimum unit size, also is the maximum supported allocation size */
|
||||
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
From: Julia Cartwright <julia@ni.com>
|
||||
Date: Mon, 7 May 2018 08:58:56 -0500
|
||||
Subject: [PATCH] locallock: provide {get,put}_locked_ptr() variants
|
||||
|
||||
Provide a set of locallocked accessors for pointers to per-CPU data;
|
||||
this is useful for dynamically-allocated per-CPU regions, for example.
|
||||
|
||||
These are symmetric with the {get,put}_cpu_ptr() per-CPU accessor
|
||||
variants.
|
||||
|
||||
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/locallock.h | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
--- a/include/linux/locallock.h
|
||||
+++ b/include/linux/locallock.h
|
||||
@@ -222,6 +222,14 @@ static inline int __local_unlock_irqrest
|
||||
|
||||
#define put_locked_var(lvar, var) local_unlock(lvar);
|
||||
|
||||
+#define get_locked_ptr(lvar, var) \
|
||||
+ ({ \
|
||||
+ local_lock(lvar); \
|
||||
+ this_cpu_ptr(var); \
|
||||
+ })
|
||||
+
|
||||
+#define put_locked_ptr(lvar, var) local_unlock(lvar);
|
||||
+
|
||||
#define local_lock_cpu(lvar) \
|
||||
({ \
|
||||
local_lock(lvar); \
|
||||
@@ -262,6 +270,8 @@ static inline void local_irq_lock_init(i
|
||||
|
||||
#define get_locked_var(lvar, var) get_cpu_var(var)
|
||||
#define put_locked_var(lvar, var) put_cpu_var(var)
|
||||
+#define get_locked_ptr(lvar, var) get_cpu_ptr(var)
|
||||
+#define put_locked_ptr(lvar, var) put_cpu_ptr(var)
|
||||
|
||||
#define local_lock_cpu(lvar) get_cpu()
|
||||
#define local_unlock_cpu(lvar) put_cpu()
|
||||
@@ -0,0 +1,23 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:34 -0500
|
||||
Subject: mm/scatterlist: Do not disable irqs on RT
|
||||
|
||||
For -RT it is enough to keep pagefault disabled (which is currently handled by
|
||||
kmap_atomic()).
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
lib/scatterlist.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/scatterlist.c
|
||||
+++ b/lib/scatterlist.c
|
||||
@@ -776,7 +776,7 @@ void sg_miter_stop(struct sg_mapping_ite
|
||||
flush_kernel_dcache_page(miter->page);
|
||||
|
||||
if (miter->__flags & SG_MITER_ATOMIC) {
|
||||
- WARN_ON_ONCE(preemptible());
|
||||
+ WARN_ON_ONCE(!pagefault_disabled());
|
||||
kunmap_atomic(miter->addr);
|
||||
} else
|
||||
kunmap(miter->page);
|
||||
142
kernel/patches-4.19.x-rt/0066-oleg-signal-rt-fix.patch
Normal file
142
kernel/patches-4.19.x-rt/0066-oleg-signal-rt-fix.patch
Normal file
@@ -0,0 +1,142 @@
|
||||
From: Oleg Nesterov <oleg@redhat.com>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: signal/x86: Delay calling signals in atomic
|
||||
|
||||
On x86_64 we must disable preemption before we enable interrupts
|
||||
for stack faults, int3 and debugging, because the current task is using
|
||||
a per CPU debug stack defined by the IST. If we schedule out, another task
|
||||
can come in and use the same stack and cause the stack to be corrupted
|
||||
and crash the kernel on return.
|
||||
|
||||
When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
|
||||
one of these is the spin lock used in signal handling.
|
||||
|
||||
Some of the debug code (int3) causes do_trap() to send a signal.
|
||||
This function calls a spin lock that has been converted to a mutex
|
||||
and has the possibility to sleep. If this happens, the above issues with
|
||||
the corrupted stack is possible.
|
||||
|
||||
Instead of calling the signal right away, for PREEMPT_RT and x86_64,
|
||||
the signal information is stored on the stacks task_struct and
|
||||
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
|
||||
code will send the signal when preemption is enabled.
|
||||
|
||||
[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
|
||||
ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
|
||||
|
||||
|
||||
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
|
||||
arch/x86/entry/common.c | 7 +++++++
|
||||
arch/x86/include/asm/signal.h | 13 +++++++++++++
|
||||
include/linux/sched.h | 4 ++++
|
||||
kernel/signal.c | 37 +++++++++++++++++++++++++++++++++++--
|
||||
4 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/arch/x86/entry/common.c
|
||||
+++ b/arch/x86/entry/common.c
|
||||
@@ -151,6 +151,13 @@ static void exit_to_usermode_loop(struct
|
||||
if (cached_flags & _TIF_NEED_RESCHED)
|
||||
schedule();
|
||||
|
||||
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+ if (unlikely(current->forced_info.si_signo)) {
|
||||
+ struct task_struct *t = current;
|
||||
+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
|
||||
+ t->forced_info.si_signo = 0;
|
||||
+ }
|
||||
+#endif
|
||||
if (cached_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
--- a/arch/x86/include/asm/signal.h
|
||||
+++ b/arch/x86/include/asm/signal.h
|
||||
@@ -28,6 +28,19 @@ typedef struct {
|
||||
#define SA_IA32_ABI 0x02000000u
|
||||
#define SA_X32_ABI 0x01000000u
|
||||
|
||||
+/*
|
||||
+ * Because some traps use the IST stack, we must keep preemption
|
||||
+ * disabled while calling do_trap(), but do_trap() may call
|
||||
+ * force_sig_info() which will grab the signal spin_locks for the
|
||||
+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
|
||||
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
|
||||
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
|
||||
+ * trap.
|
||||
+ */
|
||||
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
|
||||
+#define ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+#endif
|
||||
+
|
||||
#ifndef CONFIG_COMPAT
|
||||
typedef sigset_t compat_sigset_t;
|
||||
#endif
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -881,6 +881,10 @@ struct task_struct {
|
||||
/* Restored if set_restore_sigmask() was used: */
|
||||
sigset_t saved_sigmask;
|
||||
struct sigpending pending;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /* TODO: move me into ->restart_block ? */
|
||||
+ struct siginfo forced_info;
|
||||
+#endif
|
||||
unsigned long sas_ss_sp;
|
||||
size_t sas_ss_size;
|
||||
unsigned int sas_ss_flags;
|
||||
--- a/kernel/signal.c
|
||||
+++ b/kernel/signal.c
|
||||
@@ -1268,8 +1268,8 @@ int do_send_sig_info(int sig, struct sig
|
||||
* We don't want to have recursive SIGSEGV's etc, for example,
|
||||
* that is why we also clear SIGNAL_UNKILLABLE.
|
||||
*/
|
||||
-int
|
||||
-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
+static int
|
||||
+do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
{
|
||||
unsigned long int flags;
|
||||
int ret, blocked, ignored;
|
||||
@@ -1298,6 +1298,39 @@ force_sig_info(int sig, struct siginfo *
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
+{
|
||||
+/*
|
||||
+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
|
||||
+ * since it can not enable preemption, and the signal code's spin_locks
|
||||
+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
|
||||
+ * send the signal on exit of the trap.
|
||||
+ */
|
||||
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
+ if (in_atomic()) {
|
||||
+ if (WARN_ON_ONCE(t != current))
|
||||
+ return 0;
|
||||
+ if (WARN_ON_ONCE(t->forced_info.si_signo))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (is_si_special(info)) {
|
||||
+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
|
||||
+ t->forced_info.si_signo = sig;
|
||||
+ t->forced_info.si_errno = 0;
|
||||
+ t->forced_info.si_code = SI_KERNEL;
|
||||
+ t->forced_info.si_pid = 0;
|
||||
+ t->forced_info.si_uid = 0;
|
||||
+ } else {
|
||||
+ t->forced_info = *info;
|
||||
+ }
|
||||
+
|
||||
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
|
||||
+ return 0;
|
||||
+ }
|
||||
+#endif
|
||||
+ return do_force_sig_info(sig, info, t);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Nuke all other threads in the group.
|
||||
*/
|
||||
@@ -0,0 +1,42 @@
|
||||
From: Yang Shi <yang.shi@linaro.org>
|
||||
Date: Thu, 10 Dec 2015 10:58:51 -0800
|
||||
Subject: x86/signal: delay calling signals on 32bit
|
||||
|
||||
When running some ptrace single step tests on x86-32 machine, the below problem
|
||||
is triggered:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 1041, name: dummy2
|
||||
Preemption disabled at:[<c100326f>] do_debug+0x1f/0x1a0
|
||||
|
||||
CPU: 10 PID: 1041 Comm: dummy2 Tainted: G W 4.1.13-rt13 #1
|
||||
Call Trace:
|
||||
[<c1aa8306>] dump_stack+0x46/0x5c
|
||||
[<c1080517>] ___might_sleep+0x137/0x220
|
||||
[<c1ab0eff>] rt_spin_lock+0x1f/0x80
|
||||
[<c1064b5a>] do_force_sig_info+0x2a/0xc0
|
||||
[<c106567d>] force_sig_info+0xd/0x10
|
||||
[<c1010cff>] send_sigtrap+0x6f/0x80
|
||||
[<c10033b1>] do_debug+0x161/0x1a0
|
||||
[<c1ab2921>] debug_stack_correct+0x2e/0x35
|
||||
|
||||
This happens since 959274753857 ("x86, traps: Track entry into and exit
|
||||
from IST context") which was merged in v4.1-rc1.
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@linaro.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/include/asm/signal.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/include/asm/signal.h
|
||||
+++ b/arch/x86/include/asm/signal.h
|
||||
@@ -37,7 +37,7 @@ typedef struct {
|
||||
* TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
|
||||
* trap.
|
||||
*/
|
||||
-#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
|
||||
+#if defined(CONFIG_PREEMPT_RT_FULL)
|
||||
#define ARCH_RT_DELAYS_SIGNAL_SEND
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,184 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 09:18:52 +0100
|
||||
Subject: buffer_head: Replace bh_uptodate_lock for -rt
|
||||
|
||||
Wrap the bit_spin_lock calls into a separate inline and add the RT
|
||||
replacements with a real spinlock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/buffer.c | 21 +++++++--------------
|
||||
fs/ext4/page-io.c | 6 ++----
|
||||
fs/ntfs/aops.c | 10 +++-------
|
||||
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 46 insertions(+), 25 deletions(-)
|
||||
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -273,8 +273,7 @@ static void end_buffer_async_read(struct
|
||||
* decide that the page is now completely done.
|
||||
*/
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -287,8 +286,7 @@ static void end_buffer_async_read(struct
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
|
||||
/*
|
||||
* If none of the buffers had errors and they are all
|
||||
@@ -300,9 +298,7 @@ static void end_buffer_async_read(struct
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -329,8 +325,7 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
@@ -342,15 +337,12 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
}
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
end_page_writeback(page);
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(end_buffer_async_write);
|
||||
|
||||
@@ -3360,6 +3352,7 @@ struct buffer_head *alloc_buffer_head(gf
|
||||
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
|
||||
if (ret) {
|
||||
INIT_LIST_HEAD(&ret->b_assoc_buffers);
|
||||
+ buffer_head_init_locks(ret);
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bh_accounting.nr);
|
||||
recalc_bh_state();
|
||||
--- a/fs/ext4/page-io.c
|
||||
+++ b/fs/ext4/page-io.c
|
||||
@@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio *
|
||||
* We check all buffers in the page under BH_Uptodate_Lock
|
||||
* to avoid races with other end io clearing async_write flags
|
||||
*/
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(head);
|
||||
do {
|
||||
if (bh_offset(bh) < bio_start ||
|
||||
bh_offset(bh) + bh->b_size > bio_end) {
|
||||
@@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio *
|
||||
if (bio->bi_status)
|
||||
buffer_io_error(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(head, flags);
|
||||
if (!under_io) {
|
||||
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
||||
if (data_page)
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -106,8 +106,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
"0x%llx.", (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -122,8 +121,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
/*
|
||||
* If none of the buffers had errors then we can set the page uptodate,
|
||||
* but we first have to perform the post read mst fixups, if the
|
||||
@@ -156,9 +154,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
unlock_page(page);
|
||||
return;
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -76,8 +76,42 @@ struct buffer_head {
|
||||
struct address_space *b_assoc_map; /* mapping this buffer is
|
||||
associated with */
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spinlock_t b_uptodate_lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_irq_save(flags);
|
||||
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+ return flags;
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
|
||||
+{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+ local_irq_restore(flags);
|
||||
+#else
|
||||
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline void buffer_head_init_locks(struct buffer_head *bh)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
|
||||
* and buffer_foo() functions.
|
||||
@@ -0,0 +1,96 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 10:11:25 +0100
|
||||
Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
|
||||
|
||||
bit_spin_locks break under RT.
|
||||
|
||||
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
--
|
||||
include/linux/buffer_head.h | 8 ++++++++
|
||||
include/linux/jbd2.h | 24 ++++++++++++++++++++++++
|
||||
2 files changed, 32 insertions(+)
|
||||
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -78,6 +78,10 @@ struct buffer_head {
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spinlock_t b_uptodate_lock;
|
||||
+#if IS_ENABLED(CONFIG_JBD2)
|
||||
+ spinlock_t b_state_lock;
|
||||
+ spinlock_t b_journal_head_lock;
|
||||
+#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -109,6 +113,10 @@ static inline void buffer_head_init_lock
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#if IS_ENABLED(CONFIG_JBD2)
|
||||
+ spin_lock_init(&bh->b_state_lock);
|
||||
+ spin_lock_init(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
--- a/include/linux/jbd2.h
|
||||
+++ b/include/linux/jbd2.h
|
||||
@@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh
|
||||
|
||||
static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_trylock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_is_locked(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#define J_ASSERT(assert) BUG_ON(!(assert))
|
||||
@@ -0,0 +1,114 @@
|
||||
From: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Date: Fri, 21 Jun 2013 15:07:25 -0400
|
||||
Subject: list_bl: Make list head locking RT safe
|
||||
|
||||
As per changes in include/linux/jbd_common.h for avoiding the
|
||||
bit_spin_locks on RT ("fs: jbd/jbd2: Make state lock and journal
|
||||
head lock rt safe") we do the same thing here.
|
||||
|
||||
We use the non atomic __set_bit and __clear_bit inside the scope of
|
||||
the lock to preserve the ability of the existing LIST_DEBUG code to
|
||||
use the zero'th bit in the sanity checks.
|
||||
|
||||
As a bit spinlock, we had no lockdep visibility into the usage
|
||||
of the list head locking. Now, if we were to implement it as a
|
||||
standard non-raw spinlock, we would see:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 122, name: udevd
|
||||
5 locks held by udevd/122:
|
||||
#0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [<ffffffff811967e8>] lock_rename+0xe8/0xf0
|
||||
#1: (rename_lock){+.+...}, at: [<ffffffff811a277c>] d_move+0x2c/0x60
|
||||
#2: (&dentry->d_lock){+.+...}, at: [<ffffffff811a0763>] dentry_lock_for_move+0xf3/0x130
|
||||
#3: (&dentry->d_lock/2){+.+...}, at: [<ffffffff811a0734>] dentry_lock_for_move+0xc4/0x130
|
||||
#4: (&dentry->d_lock/3){+.+...}, at: [<ffffffff811a0747>] dentry_lock_for_move+0xd7/0x130
|
||||
Pid: 122, comm: udevd Not tainted 3.4.47-rt62 #7
|
||||
Call Trace:
|
||||
[<ffffffff810b9624>] __might_sleep+0x134/0x1f0
|
||||
[<ffffffff817a24d4>] rt_spin_lock+0x24/0x60
|
||||
[<ffffffff811a0c4c>] __d_shrink+0x5c/0xa0
|
||||
[<ffffffff811a1b2d>] __d_drop+0x1d/0x40
|
||||
[<ffffffff811a24be>] __d_move+0x8e/0x320
|
||||
[<ffffffff811a278e>] d_move+0x3e/0x60
|
||||
[<ffffffff81199598>] vfs_rename+0x198/0x4c0
|
||||
[<ffffffff8119b093>] sys_renameat+0x213/0x240
|
||||
[<ffffffff817a2de5>] ? _raw_spin_unlock+0x35/0x60
|
||||
[<ffffffff8107781c>] ? do_page_fault+0x1ec/0x4b0
|
||||
[<ffffffff817a32ca>] ? retint_swapgs+0xe/0x13
|
||||
[<ffffffff813eb0e6>] ? trace_hardirqs_on_thunk+0x3a/0x3f
|
||||
[<ffffffff8119b0db>] sys_rename+0x1b/0x20
|
||||
[<ffffffff817a3b96>] system_call_fastpath+0x1a/0x1f
|
||||
|
||||
Since we are only taking the lock during short lived list operations,
|
||||
lets assume for now that it being raw won't be a significant latency
|
||||
concern.
|
||||
|
||||
|
||||
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/list_bl.h | 28 ++++++++++++++++++++++++++--
|
||||
1 file changed, 26 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/list_bl.h
|
||||
+++ b/include/linux/list_bl.h
|
||||
@@ -3,6 +3,7 @@
|
||||
#define _LINUX_LIST_BL_H
|
||||
|
||||
#include <linux/list.h>
|
||||
+#include <linux/spinlock.h>
|
||||
#include <linux/bit_spinlock.h>
|
||||
|
||||
/*
|
||||
@@ -33,13 +34,22 @@
|
||||
|
||||
struct hlist_bl_head {
|
||||
struct hlist_bl_node *first;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ raw_spinlock_t lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
struct hlist_bl_node {
|
||||
struct hlist_bl_node *next, **pprev;
|
||||
};
|
||||
-#define INIT_HLIST_BL_HEAD(ptr) \
|
||||
- ((ptr)->first = NULL)
|
||||
+
|
||||
+static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
+{
|
||||
+ h->first = NULL;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ raw_spin_lock_init(&h->lock);
|
||||
+#endif
|
||||
+}
|
||||
|
||||
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
|
||||
{
|
||||
@@ -119,12 +129,26 @@ static inline void hlist_bl_del_init(str
|
||||
|
||||
static inline void hlist_bl_lock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+ raw_spin_lock(&b->lock);
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __set_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void hlist_bl_unlock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__bit_spin_unlock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __clear_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+ raw_spin_unlock(&b->lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
|
||||
@@ -0,0 +1,97 @@
|
||||
From: Josh Cartwright <joshc@ni.com>
|
||||
Date: Thu, 31 Mar 2016 00:04:25 -0500
|
||||
Subject: [PATCH] list_bl: fixup bogus lockdep warning
|
||||
|
||||
At first glance, the use of 'static inline' seems appropriate for
|
||||
INIT_HLIST_BL_HEAD().
|
||||
|
||||
However, when a 'static inline' function invocation is inlined by gcc,
|
||||
all callers share any static local data declared within that inline
|
||||
function.
|
||||
|
||||
This presents a problem for how lockdep classes are setup. raw_spinlocks, for
|
||||
example, when CONFIG_DEBUG_SPINLOCK,
|
||||
|
||||
# define raw_spin_lock_init(lock) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
\
|
||||
__raw_spin_lock_init((lock), #lock, &__key); \
|
||||
} while (0)
|
||||
|
||||
When this macro is expanded into a 'static inline' caller, like
|
||||
INIT_HLIST_BL_HEAD():
|
||||
|
||||
static inline INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
{
|
||||
h->first = NULL;
|
||||
raw_spin_lock_init(&h->lock);
|
||||
}
|
||||
|
||||
...the static local lock_class_key object is made a function static.
|
||||
|
||||
For compilation units which initialize invoke INIT_HLIST_BL_HEAD() more
|
||||
than once, then, all of the invocations share this same static local
|
||||
object.
|
||||
|
||||
This can lead to some very confusing lockdep splats (example below).
|
||||
Solve this problem by forcing the INIT_HLIST_BL_HEAD() to be a macro,
|
||||
which prevents the lockdep class object sharing.
|
||||
|
||||
=============================================
|
||||
[ INFO: possible recursive locking detected ]
|
||||
4.4.4-rt11 #4 Not tainted
|
||||
---------------------------------------------
|
||||
kswapd0/59 is trying to acquire lock:
|
||||
(&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
but task is already holding lock:
|
||||
(&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
other info that might help us debug this:
|
||||
Possible unsafe locking scenario:
|
||||
|
||||
CPU0
|
||||
----
|
||||
lock(&h->lock#2);
|
||||
lock(&h->lock#2);
|
||||
|
||||
*** DEADLOCK ***
|
||||
|
||||
May be due to missing lock nesting notation
|
||||
|
||||
2 locks held by kswapd0/59:
|
||||
#0: (shrinker_rwsem){+.+...}, at: rt_down_read_trylock
|
||||
#1: (&h->lock#2){+.+.-.}, at: mb_cache_shrink_scan
|
||||
|
||||
Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
Tested-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
Signed-off-by: Josh Cartwright <joshc@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/list_bl.h | 12 +++++++-----
|
||||
1 file changed, 7 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/linux/list_bl.h
|
||||
+++ b/include/linux/list_bl.h
|
||||
@@ -43,13 +43,15 @@ struct hlist_bl_node {
|
||||
struct hlist_bl_node *next, **pprev;
|
||||
};
|
||||
|
||||
-static inline void INIT_HLIST_BL_HEAD(struct hlist_bl_head *h)
|
||||
-{
|
||||
- h->first = NULL;
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
- raw_spin_lock_init(&h->lock);
|
||||
+#define INIT_HLIST_BL_HEAD(h) \
|
||||
+do { \
|
||||
+ (h)->first = NULL; \
|
||||
+ raw_spin_lock_init(&(h)->lock); \
|
||||
+} while (0)
|
||||
+#else
|
||||
+#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
|
||||
#endif
|
||||
-}
|
||||
|
||||
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
|
||||
{
|
||||
@@ -0,0 +1,37 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:57 -0500
|
||||
Subject: genirq: Disable irqpoll on -rt
|
||||
|
||||
Creates long latencies for no value
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/irq/spurious.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/kernel/irq/spurious.c
|
||||
+++ b/kernel/irq/spurious.c
|
||||
@@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
@@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
||||
45
kernel/patches-4.19.x-rt/0073-genirq-force-threading.patch
Normal file
45
kernel/patches-4.19.x-rt/0073-genirq-force-threading.patch
Normal file
@@ -0,0 +1,45 @@
|
||||
Subject: genirq: Force interrupt thread on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 03 Apr 2011 11:57:29 +0200
|
||||
|
||||
Force threaded_irqs and optimize the code (force_irqthreads) in regard
|
||||
to this.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 4 ++++
|
||||
kernel/irq/manage.c | 2 ++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -427,7 +427,11 @@ extern int irq_set_irqchip_state(unsigne
|
||||
bool state);
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define force_irqthreads (true)
|
||||
+# else
|
||||
extern bool force_irqthreads;
|
||||
+# endif
|
||||
#else
|
||||
#define force_irqthreads (0)
|
||||
#endif
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__read_mostly bool force_irqthreads;
|
||||
EXPORT_SYMBOL_GPL(force_irqthreads);
|
||||
|
||||
@@ -32,6 +33,7 @@ static int __init setup_forced_irqthread
|
||||
return 0;
|
||||
}
|
||||
early_param("threadirqs", setup_forced_irqthreads);
|
||||
+# endif
|
||||
#endif
|
||||
|
||||
static void __synchronize_hardirq(struct irq_desc *desc)
|
||||
@@ -0,0 +1,166 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 28 May 2018 15:24:20 +0200
|
||||
Subject: [PATCH 1/4] Split IRQ-off and zone->lock while freeing pages from PCP
|
||||
list #1
|
||||
|
||||
Split the IRQ-off section while accessing the PCP list from zone->lock
|
||||
while freeing pages.
|
||||
Introcude isolate_pcp_pages() which separates the pages from the PCP
|
||||
list onto a temporary list and then free the temporary list via
|
||||
free_pcppages_bulk().
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 82 +++++++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 52 insertions(+), 30 deletions(-)
|
||||
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -1095,7 +1095,7 @@ static inline void prefetch_buddy(struct
|
||||
}
|
||||
|
||||
/*
|
||||
- * Frees a number of pages from the PCP lists
|
||||
+ * Frees a number of pages which have been collected from the pcp lists.
|
||||
* Assumes all pages on list are in same zone, and of same order.
|
||||
* count is the number of pages to free.
|
||||
*
|
||||
@@ -1106,14 +1106,41 @@ static inline void prefetch_buddy(struct
|
||||
* pinned" detection logic.
|
||||
*/
|
||||
static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
- struct per_cpu_pages *pcp)
|
||||
+ struct list_head *head)
|
||||
+{
|
||||
+ bool isolated_pageblocks;
|
||||
+ struct page *page, *tmp;
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ spin_lock_irqsave(&zone->lock, flags);
|
||||
+ isolated_pageblocks = has_isolate_pageblock(zone);
|
||||
+
|
||||
+ /*
|
||||
+ * Use safe version since after __free_one_page(),
|
||||
+ * page->lru.next will not point to original list.
|
||||
+ */
|
||||
+ list_for_each_entry_safe(page, tmp, head, lru) {
|
||||
+ int mt = get_pcppage_migratetype(page);
|
||||
+ /* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
+ /* Pageblock could have been isolated meanwhile */
|
||||
+ if (unlikely(isolated_pageblocks))
|
||||
+ mt = get_pageblock_migratetype(page);
|
||||
+
|
||||
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
+ trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
+ }
|
||||
+ spin_unlock_irqrestore(&zone->lock, flags);
|
||||
+}
|
||||
+
|
||||
+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
|
||||
+ struct list_head *dst)
|
||||
+
|
||||
{
|
||||
int migratetype = 0;
|
||||
int batch_free = 0;
|
||||
int prefetch_nr = 0;
|
||||
- bool isolated_pageblocks;
|
||||
- struct page *page, *tmp;
|
||||
- LIST_HEAD(head);
|
||||
+ struct page *page;
|
||||
|
||||
while (count) {
|
||||
struct list_head *list;
|
||||
@@ -1145,7 +1172,7 @@ static void free_pcppages_bulk(struct zo
|
||||
if (bulkfree_pcp_prepare(page))
|
||||
continue;
|
||||
|
||||
- list_add_tail(&page->lru, &head);
|
||||
+ list_add_tail(&page->lru, dst);
|
||||
|
||||
/*
|
||||
* We are going to put the page back to the global
|
||||
@@ -1160,26 +1187,6 @@ static void free_pcppages_bulk(struct zo
|
||||
prefetch_buddy(page);
|
||||
} while (--count && --batch_free && !list_empty(list));
|
||||
}
|
||||
-
|
||||
- spin_lock(&zone->lock);
|
||||
- isolated_pageblocks = has_isolate_pageblock(zone);
|
||||
-
|
||||
- /*
|
||||
- * Use safe version since after __free_one_page(),
|
||||
- * page->lru.next will not point to original list.
|
||||
- */
|
||||
- list_for_each_entry_safe(page, tmp, &head, lru) {
|
||||
- int mt = get_pcppage_migratetype(page);
|
||||
- /* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
- /* Pageblock could have been isolated meanwhile */
|
||||
- if (unlikely(isolated_pageblocks))
|
||||
- mt = get_pageblock_migratetype(page);
|
||||
-
|
||||
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
- trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
- }
|
||||
- spin_unlock(&zone->lock);
|
||||
}
|
||||
|
||||
static void free_one_page(struct zone *zone,
|
||||
@@ -2536,13 +2543,18 @@ void drain_zone_pages(struct zone *zone,
|
||||
{
|
||||
unsigned long flags;
|
||||
int to_drain, batch;
|
||||
+ LIST_HEAD(dst);
|
||||
|
||||
local_irq_save(flags);
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0)
|
||||
- free_pcppages_bulk(zone, to_drain, pcp);
|
||||
+ isolate_pcp_pages(to_drain, pcp, &dst);
|
||||
+
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ if (to_drain > 0)
|
||||
+ free_pcppages_bulk(zone, to_drain, &dst);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2558,14 +2570,21 @@ static void drain_pages_zone(unsigned in
|
||||
unsigned long flags;
|
||||
struct per_cpu_pageset *pset;
|
||||
struct per_cpu_pages *pcp;
|
||||
+ LIST_HEAD(dst);
|
||||
+ int count;
|
||||
|
||||
local_irq_save(flags);
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
|
||||
pcp = &pset->pcp;
|
||||
- if (pcp->count)
|
||||
- free_pcppages_bulk(zone, pcp->count, pcp);
|
||||
+ count = pcp->count;
|
||||
+ if (count)
|
||||
+ isolate_pcp_pages(count, pcp, &dst);
|
||||
+
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ if (count)
|
||||
+ free_pcppages_bulk(zone, count, &dst);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2787,7 +2806,10 @@ static void free_unref_page_commit(struc
|
||||
pcp->count++;
|
||||
if (pcp->count >= pcp->high) {
|
||||
unsigned long batch = READ_ONCE(pcp->batch);
|
||||
- free_pcppages_bulk(zone, batch, pcp);
|
||||
+ LIST_HEAD(dst);
|
||||
+
|
||||
+ isolate_pcp_pages(batch, pcp, &dst);
|
||||
+ free_pcppages_bulk(zone, batch, &dst);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Mon, 28 May 2018 15:24:21 +0200
|
||||
Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP
|
||||
list #2
|
||||
|
||||
Split the IRQ-off section while accessing the PCP list from zone->lock
|
||||
while freeing pages.
|
||||
Introcude isolate_pcp_pages() which separates the pages from the PCP
|
||||
list onto a temporary list and then free the temporary list via
|
||||
free_pcppages_bulk().
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++----------
|
||||
1 file changed, 50 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -1105,8 +1105,8 @@ static inline void prefetch_buddy(struct
|
||||
* And clear the zone's pages_scanned counter, to hold off the "all pages are
|
||||
* pinned" detection logic.
|
||||
*/
|
||||
-static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
- struct list_head *head)
|
||||
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
|
||||
+ bool zone_retry)
|
||||
{
|
||||
bool isolated_pageblocks;
|
||||
struct page *page, *tmp;
|
||||
@@ -1121,12 +1121,27 @@ static void free_pcppages_bulk(struct zo
|
||||
*/
|
||||
list_for_each_entry_safe(page, tmp, head, lru) {
|
||||
int mt = get_pcppage_migratetype(page);
|
||||
+
|
||||
+ if (page_zone(page) != zone) {
|
||||
+ /*
|
||||
+ * free_unref_page_list() sorts pages by zone. If we end
|
||||
+ * up with pages from a different NUMA nodes belonging
|
||||
+ * to the same ZONE index then we need to redo with the
|
||||
+ * correct ZONE pointer. Skip the page for now, redo it
|
||||
+ * on the next iteration.
|
||||
+ */
|
||||
+ WARN_ON_ONCE(zone_retry == false);
|
||||
+ if (zone_retry)
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
/* MIGRATE_ISOLATE page should not go to pcplists */
|
||||
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
|
||||
/* Pageblock could have been isolated meanwhile */
|
||||
if (unlikely(isolated_pageblocks))
|
||||
mt = get_pageblock_migratetype(page);
|
||||
|
||||
+ list_del(&page->lru);
|
||||
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
|
||||
trace_mm_page_pcpu_drain(page, 0, mt);
|
||||
}
|
||||
@@ -2554,7 +2569,7 @@ void drain_zone_pages(struct zone *zone,
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (to_drain > 0)
|
||||
- free_pcppages_bulk(zone, to_drain, &dst);
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2584,7 +2599,7 @@ static void drain_pages_zone(unsigned in
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (count)
|
||||
- free_pcppages_bulk(zone, count, &dst);
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2777,7 +2792,8 @@ static bool free_unref_page_prepare(stru
|
||||
return true;
|
||||
}
|
||||
|
||||
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
|
||||
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
|
||||
+ struct list_head *dst)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
struct per_cpu_pages *pcp;
|
||||
@@ -2806,10 +2822,8 @@ static void free_unref_page_commit(struc
|
||||
pcp->count++;
|
||||
if (pcp->count >= pcp->high) {
|
||||
unsigned long batch = READ_ONCE(pcp->batch);
|
||||
- LIST_HEAD(dst);
|
||||
|
||||
- isolate_pcp_pages(batch, pcp, &dst);
|
||||
- free_pcppages_bulk(zone, batch, &dst);
|
||||
+ isolate_pcp_pages(batch, pcp, dst);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2820,13 +2834,17 @@ void free_unref_page(struct page *page)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
+ struct zone *zone = page_zone(page);
|
||||
+ LIST_HEAD(dst);
|
||||
|
||||
if (!free_unref_page_prepare(page, pfn))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
- free_unref_page_commit(page, pfn);
|
||||
+ free_unref_page_commit(page, pfn, &dst);
|
||||
local_irq_restore(flags);
|
||||
+ if (!list_empty(&dst))
|
||||
+ free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2837,6 +2855,11 @@ void free_unref_page_list(struct list_he
|
||||
struct page *page, *next;
|
||||
unsigned long flags, pfn;
|
||||
int batch_count = 0;
|
||||
+ struct list_head dsts[__MAX_NR_ZONES];
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < __MAX_NR_ZONES; i++)
|
||||
+ INIT_LIST_HEAD(&dsts[i]);
|
||||
|
||||
/* Prepare pages for freeing */
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
@@ -2849,10 +2872,12 @@ void free_unref_page_list(struct list_he
|
||||
local_irq_save(flags);
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
unsigned long pfn = page_private(page);
|
||||
+ enum zone_type type;
|
||||
|
||||
set_page_private(page, 0);
|
||||
trace_mm_page_free_batched(page);
|
||||
- free_unref_page_commit(page, pfn);
|
||||
+ type = page_zonenum(page);
|
||||
+ free_unref_page_commit(page, pfn, &dsts[type]);
|
||||
|
||||
/*
|
||||
* Guard against excessive IRQ disabled times when we get
|
||||
@@ -2865,6 +2890,21 @@ void free_unref_page_list(struct list_he
|
||||
}
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
+
|
||||
+ for (i = 0; i < __MAX_NR_ZONES; ) {
|
||||
+ struct page *page;
|
||||
+ struct zone *zone;
|
||||
+
|
||||
+ if (list_empty(&dsts[i])) {
|
||||
+ i++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ page = list_first_entry(&dsts[i], struct page, lru);
|
||||
+ zone = page_zone(page);
|
||||
+
|
||||
+ free_pcppages_bulk(zone, &dsts[i], true);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -0,0 +1,608 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 28 May 2018 15:24:22 +0200
|
||||
Subject: [PATCH 3/4] mm/SLxB: change list_lock to raw_spinlock_t
|
||||
|
||||
The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t
|
||||
otherwise the interrupts won't be disabled on -RT. The locking rules remain
|
||||
the same on !RT.
|
||||
This patch changes it for SLAB and SLUB since both share the same header
|
||||
file for struct kmem_cache_node defintion.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slab.c | 94 +++++++++++++++++++++++++++++++-------------------------------
|
||||
mm/slab.h | 2 -
|
||||
mm/slub.c | 50 ++++++++++++++++----------------
|
||||
3 files changed, 73 insertions(+), 73 deletions(-)
|
||||
|
||||
--- a/mm/slab.c
|
||||
+++ b/mm/slab.c
|
||||
@@ -233,7 +233,7 @@ static void kmem_cache_node_init(struct
|
||||
parent->shared = NULL;
|
||||
parent->alien = NULL;
|
||||
parent->colour_next = 0;
|
||||
- spin_lock_init(&parent->list_lock);
|
||||
+ raw_spin_lock_init(&parent->list_lock);
|
||||
parent->free_objects = 0;
|
||||
parent->free_touched = 0;
|
||||
}
|
||||
@@ -600,9 +600,9 @@ static noinline void cache_free_pfmemall
|
||||
page_node = page_to_nid(page);
|
||||
n = get_node(cachep, page_node);
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, &objp, 1, page_node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -730,7 +730,7 @@ static void __drain_alien_cache(struct k
|
||||
struct kmem_cache_node *n = get_node(cachep, node);
|
||||
|
||||
if (ac->avail) {
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
/*
|
||||
* Stuff objects into the remote nodes shared array first.
|
||||
* That way we could avoid the overhead of putting the objects
|
||||
@@ -741,7 +741,7 @@ static void __drain_alien_cache(struct k
|
||||
|
||||
free_block(cachep, ac->entry, ac->avail, node, list);
|
||||
ac->avail = 0;
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -814,9 +814,9 @@ static int __cache_free_alien(struct kme
|
||||
slabs_destroy(cachep, &list);
|
||||
} else {
|
||||
n = get_node(cachep, page_node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, &objp, 1, page_node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
return 1;
|
||||
@@ -857,10 +857,10 @@ static int init_cache_node(struct kmem_c
|
||||
*/
|
||||
n = get_node(cachep, node);
|
||||
if (n) {
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
|
||||
cachep->num;
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -939,7 +939,7 @@ static int setup_kmem_cache_node(struct
|
||||
goto fail;
|
||||
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
if (n->shared && force_change) {
|
||||
free_block(cachep, n->shared->entry,
|
||||
n->shared->avail, node, &list);
|
||||
@@ -957,7 +957,7 @@ static int setup_kmem_cache_node(struct
|
||||
new_alien = NULL;
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
|
||||
/*
|
||||
@@ -996,7 +996,7 @@ static void cpuup_canceled(long cpu)
|
||||
if (!n)
|
||||
continue;
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
/* Free limit for this kmem_cache_node */
|
||||
n->free_limit -= cachep->batchcount;
|
||||
@@ -1009,7 +1009,7 @@ static void cpuup_canceled(long cpu)
|
||||
}
|
||||
|
||||
if (!cpumask_empty(mask)) {
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
goto free_slab;
|
||||
}
|
||||
|
||||
@@ -1023,7 +1023,7 @@ static void cpuup_canceled(long cpu)
|
||||
alien = n->alien;
|
||||
n->alien = NULL;
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
kfree(shared);
|
||||
if (alien) {
|
||||
@@ -1207,7 +1207,7 @@ static void __init init_list(struct kmem
|
||||
/*
|
||||
* Do not assume that spinlocks can be initialized via memcpy:
|
||||
*/
|
||||
- spin_lock_init(&ptr->list_lock);
|
||||
+ raw_spin_lock_init(&ptr->list_lock);
|
||||
|
||||
MAKE_ALL_LISTS(cachep, ptr, nodeid);
|
||||
cachep->node[nodeid] = ptr;
|
||||
@@ -1378,11 +1378,11 @@ slab_out_of_memory(struct kmem_cache *ca
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
unsigned long total_slabs, free_slabs, free_objs;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
total_slabs = n->total_slabs;
|
||||
free_slabs = n->free_slabs;
|
||||
free_objs = n->free_objects;
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
|
||||
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
|
||||
node, total_slabs - free_slabs, total_slabs,
|
||||
@@ -2175,7 +2175,7 @@ static void check_spinlock_acquired(stru
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
check_irq_off();
|
||||
- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
|
||||
+ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2183,7 +2183,7 @@ static void check_spinlock_acquired_node
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
check_irq_off();
|
||||
- assert_spin_locked(&get_node(cachep, node)->list_lock);
|
||||
+ assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2223,9 +2223,9 @@ static void do_drain(void *arg)
|
||||
check_irq_off();
|
||||
ac = cpu_cache_get(cachep);
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
free_block(cachep, ac->entry, ac->avail, node, &list);
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
ac->avail = 0;
|
||||
}
|
||||
@@ -2243,9 +2243,9 @@ static void drain_cpu_caches(struct kmem
|
||||
drain_alien_cache(cachep, n->alien);
|
||||
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
drain_array_locked(cachep, n->shared, node, true, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -2267,10 +2267,10 @@ static int drain_freelist(struct kmem_ca
|
||||
nr_freed = 0;
|
||||
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
p = n->slabs_free.prev;
|
||||
if (p == &n->slabs_free) {
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2283,7 +2283,7 @@ static int drain_freelist(struct kmem_ca
|
||||
* to the cache.
|
||||
*/
|
||||
n->free_objects -= cache->num;
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slab_destroy(cache, page);
|
||||
nr_freed++;
|
||||
}
|
||||
@@ -2731,7 +2731,7 @@ static void cache_grow_end(struct kmem_c
|
||||
INIT_LIST_HEAD(&page->lru);
|
||||
n = get_node(cachep, page_to_nid(page));
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
n->total_slabs++;
|
||||
if (!page->active) {
|
||||
list_add_tail(&page->lru, &(n->slabs_free));
|
||||
@@ -2741,7 +2741,7 @@ static void cache_grow_end(struct kmem_c
|
||||
|
||||
STATS_INC_GROWN(cachep);
|
||||
n->free_objects += cachep->num - page->active;
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
}
|
||||
@@ -2909,7 +2909,7 @@ static struct page *get_first_slab(struc
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
- assert_spin_locked(&n->list_lock);
|
||||
+ assert_raw_spin_locked(&n->list_lock);
|
||||
page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
|
||||
if (!page) {
|
||||
n->free_touched = 1;
|
||||
@@ -2935,10 +2935,10 @@ static noinline void *cache_alloc_pfmema
|
||||
if (!gfp_pfmemalloc_allowed(flags))
|
||||
return NULL;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
page = get_first_slab(n, true);
|
||||
if (!page) {
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2947,7 +2947,7 @@ static noinline void *cache_alloc_pfmema
|
||||
|
||||
fixup_slab_list(cachep, n, page, &list);
|
||||
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
|
||||
return obj;
|
||||
@@ -3006,7 +3006,7 @@ static void *cache_alloc_refill(struct k
|
||||
if (!n->free_objects && (!shared || !shared->avail))
|
||||
goto direct_grow;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
shared = READ_ONCE(n->shared);
|
||||
|
||||
/* See if we can refill from the shared array */
|
||||
@@ -3030,7 +3030,7 @@ static void *cache_alloc_refill(struct k
|
||||
must_grow:
|
||||
n->free_objects -= ac->avail;
|
||||
alloc_done:
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
|
||||
direct_grow:
|
||||
@@ -3255,7 +3255,7 @@ static void *____cache_alloc_node(struct
|
||||
BUG_ON(!n);
|
||||
|
||||
check_irq_off();
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
page = get_first_slab(n, false);
|
||||
if (!page)
|
||||
goto must_grow;
|
||||
@@ -3273,12 +3273,12 @@ static void *____cache_alloc_node(struct
|
||||
|
||||
fixup_slab_list(cachep, n, page, &list);
|
||||
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
fixup_objfreelist_debug(cachep, &list);
|
||||
return obj;
|
||||
|
||||
must_grow:
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
|
||||
if (page) {
|
||||
/* This slab isn't counted yet so don't update free_objects */
|
||||
@@ -3454,7 +3454,7 @@ static void cache_flusharray(struct kmem
|
||||
|
||||
check_irq_off();
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
if (n->shared) {
|
||||
struct array_cache *shared_array = n->shared;
|
||||
int max = shared_array->limit - shared_array->avail;
|
||||
@@ -3483,7 +3483,7 @@ static void cache_flusharray(struct kmem
|
||||
STATS_SET_FREEABLE(cachep, i);
|
||||
}
|
||||
#endif
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
ac->avail -= batchcount;
|
||||
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
|
||||
@@ -3893,9 +3893,9 @@ static int __do_tune_cpucache(struct kme
|
||||
|
||||
node = cpu_to_mem(cpu);
|
||||
n = get_node(cachep, node);
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
free_block(cachep, ac->entry, ac->avail, node, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
free_percpu(prev);
|
||||
@@ -4020,9 +4020,9 @@ static void drain_array(struct kmem_cach
|
||||
return;
|
||||
}
|
||||
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
drain_array_locked(cachep, ac, node, false, &list);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
slabs_destroy(cachep, &list);
|
||||
}
|
||||
@@ -4106,7 +4106,7 @@ void get_slabinfo(struct kmem_cache *cac
|
||||
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
check_irq_on();
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
total_slabs += n->total_slabs;
|
||||
free_slabs += n->free_slabs;
|
||||
@@ -4115,7 +4115,7 @@ void get_slabinfo(struct kmem_cache *cac
|
||||
if (n->shared)
|
||||
shared_avail += n->shared->avail;
|
||||
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
}
|
||||
num_objs = total_slabs * cachep->num;
|
||||
active_slabs = total_slabs - free_slabs;
|
||||
@@ -4330,13 +4330,13 @@ static int leaks_show(struct seq_file *m
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
|
||||
check_irq_on();
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
|
||||
list_for_each_entry(page, &n->slabs_full, lru)
|
||||
handle_slab(x, cachep, page);
|
||||
list_for_each_entry(page, &n->slabs_partial, lru)
|
||||
handle_slab(x, cachep, page);
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
}
|
||||
} while (!is_store_user_clean(cachep));
|
||||
|
||||
--- a/mm/slab.h
|
||||
+++ b/mm/slab.h
|
||||
@@ -453,7 +453,7 @@ static inline void slab_post_alloc_hook(
|
||||
* The slab lists for all objects.
|
||||
*/
|
||||
struct kmem_cache_node {
|
||||
- spinlock_t list_lock;
|
||||
+ raw_spinlock_t list_lock;
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
struct list_head slabs_partial; /* partial list first, better asm code */
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1167,7 +1167,7 @@ static noinline int free_debug_processin
|
||||
unsigned long uninitialized_var(flags);
|
||||
int ret = 0;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
slab_lock(page);
|
||||
|
||||
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
|
||||
@@ -1202,7 +1202,7 @@ static noinline int free_debug_processin
|
||||
bulk_cnt, cnt);
|
||||
|
||||
slab_unlock(page);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
if (!ret)
|
||||
slab_fix(s, "Object at 0x%p not freed", object);
|
||||
return ret;
|
||||
@@ -1802,7 +1802,7 @@ static void *get_partial_node(struct kme
|
||||
if (!n || !n->nr_partial)
|
||||
return NULL;
|
||||
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
list_for_each_entry_safe(page, page2, &n->partial, lru) {
|
||||
void *t;
|
||||
|
||||
@@ -1827,7 +1827,7 @@ static void *get_partial_node(struct kme
|
||||
break;
|
||||
|
||||
}
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
return object;
|
||||
}
|
||||
|
||||
@@ -2073,7 +2073,7 @@ static void deactivate_slab(struct kmem_
|
||||
* that acquire_slab() will see a slab page that
|
||||
* is frozen
|
||||
*/
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
} else {
|
||||
m = M_FULL;
|
||||
@@ -2084,7 +2084,7 @@ static void deactivate_slab(struct kmem_
|
||||
* slabs from diagnostic functions will not see
|
||||
* any frozen slabs.
|
||||
*/
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2119,7 +2119,7 @@ static void deactivate_slab(struct kmem_
|
||||
goto redo;
|
||||
|
||||
if (lock)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
if (m == M_FREE) {
|
||||
stat(s, DEACTIVATE_EMPTY);
|
||||
@@ -2154,10 +2154,10 @@ static void unfreeze_partials(struct kme
|
||||
n2 = get_node(s, page_to_nid(page));
|
||||
if (n != n2) {
|
||||
if (n)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
n = n2;
|
||||
- spin_lock(&n->list_lock);
|
||||
+ raw_spin_lock(&n->list_lock);
|
||||
}
|
||||
|
||||
do {
|
||||
@@ -2186,7 +2186,7 @@ static void unfreeze_partials(struct kme
|
||||
}
|
||||
|
||||
if (n)
|
||||
- spin_unlock(&n->list_lock);
|
||||
+ raw_spin_unlock(&n->list_lock);
|
||||
|
||||
while (discard_page) {
|
||||
page = discard_page;
|
||||
@@ -2355,10 +2355,10 @@ static unsigned long count_partial(struc
|
||||
unsigned long x = 0;
|
||||
struct page *page;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
list_for_each_entry(page, &n->partial, lru)
|
||||
x += get_count(page);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return x;
|
||||
}
|
||||
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
|
||||
@@ -2793,7 +2793,7 @@ static void __slab_free(struct kmem_cach
|
||||
|
||||
do {
|
||||
if (unlikely(n)) {
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
n = NULL;
|
||||
}
|
||||
prior = page->freelist;
|
||||
@@ -2825,7 +2825,7 @@ static void __slab_free(struct kmem_cach
|
||||
* Otherwise the list_lock will synchronize with
|
||||
* other processors updating the list of slabs.
|
||||
*/
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -2867,7 +2867,7 @@ static void __slab_free(struct kmem_cach
|
||||
add_partial(n, page, DEACTIVATE_TO_TAIL);
|
||||
stat(s, FREE_ADD_PARTIAL);
|
||||
}
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return;
|
||||
|
||||
slab_empty:
|
||||
@@ -2882,7 +2882,7 @@ static void __slab_free(struct kmem_cach
|
||||
remove_full(s, n, page);
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
stat(s, FREE_SLAB);
|
||||
discard_slab(s, page);
|
||||
}
|
||||
@@ -3269,7 +3269,7 @@ static void
|
||||
init_kmem_cache_node(struct kmem_cache_node *n)
|
||||
{
|
||||
n->nr_partial = 0;
|
||||
- spin_lock_init(&n->list_lock);
|
||||
+ raw_spin_lock_init(&n->list_lock);
|
||||
INIT_LIST_HEAD(&n->partial);
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
atomic_long_set(&n->nr_slabs, 0);
|
||||
@@ -3653,7 +3653,7 @@ static void free_partial(struct kmem_cac
|
||||
struct page *page, *h;
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
- spin_lock_irq(&n->list_lock);
|
||||
+ raw_spin_lock_irq(&n->list_lock);
|
||||
list_for_each_entry_safe(page, h, &n->partial, lru) {
|
||||
if (!page->inuse) {
|
||||
remove_partial(n, page);
|
||||
@@ -3663,7 +3663,7 @@ static void free_partial(struct kmem_cac
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()");
|
||||
}
|
||||
}
|
||||
- spin_unlock_irq(&n->list_lock);
|
||||
+ raw_spin_unlock_irq(&n->list_lock);
|
||||
|
||||
list_for_each_entry_safe(page, h, &discard, lru)
|
||||
discard_slab(s, page);
|
||||
@@ -3936,7 +3936,7 @@ int __kmem_cache_shrink(struct kmem_cach
|
||||
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
|
||||
INIT_LIST_HEAD(promote + i);
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
/*
|
||||
* Build lists of slabs to discard or promote.
|
||||
@@ -3967,7 +3967,7 @@ int __kmem_cache_shrink(struct kmem_cach
|
||||
for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
|
||||
list_splice(promote + i, &n->partial);
|
||||
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
|
||||
/* Release empty slabs */
|
||||
list_for_each_entry_safe(page, t, &discard, lru)
|
||||
@@ -4381,7 +4381,7 @@ static int validate_slab_node(struct kme
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
|
||||
list_for_each_entry(page, &n->partial, lru) {
|
||||
validate_slab_slab(s, page, map);
|
||||
@@ -4403,7 +4403,7 @@ static int validate_slab_node(struct kme
|
||||
s->name, count, atomic_long_read(&n->nr_slabs));
|
||||
|
||||
out:
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -4593,12 +4593,12 @@ static int list_locations(struct kmem_ca
|
||||
if (!atomic_long_read(&n->nr_slabs))
|
||||
continue;
|
||||
|
||||
- spin_lock_irqsave(&n->list_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&n->list_lock, flags);
|
||||
list_for_each_entry(page, &n->partial, lru)
|
||||
process_slab(&t, s, page, alloc, map);
|
||||
list_for_each_entry(page, &n->full, lru)
|
||||
process_slab(&t, s, page, alloc, map);
|
||||
- spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
}
|
||||
|
||||
for (i = 0; i < t.count; i++) {
|
||||
@@ -0,0 +1,216 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 21 Jun 2018 17:29:19 +0200
|
||||
Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled
|
||||
regions
|
||||
|
||||
__free_slab() is invoked with disabled interrupts which increases the
|
||||
irq-off time while __free_pages() is doing the work.
|
||||
Allow __free_slab() to be invoked with enabled interrupts and move
|
||||
everything from interrupts-off invocations to a temporary per-CPU list
|
||||
so it can be processed later.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 69 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1330,6 +1330,12 @@ static inline void dec_slabs_node(struct
|
||||
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
+struct slub_free_list {
|
||||
+ raw_spinlock_t lock;
|
||||
+ struct list_head list;
|
||||
+};
|
||||
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
|
||||
+
|
||||
/*
|
||||
* Hooks for other subsystems that check memory allocations. In a typical
|
||||
* production configuration these hooks all should produce no code at all.
|
||||
@@ -1684,6 +1690,16 @@ static void __free_slab(struct kmem_cach
|
||||
__free_pages(page, order);
|
||||
}
|
||||
|
||||
+static void free_delayed(struct list_head *h)
|
||||
+{
|
||||
+ while (!list_empty(h)) {
|
||||
+ struct page *page = list_first_entry(h, struct page, lru);
|
||||
+
|
||||
+ list_del(&page->lru);
|
||||
+ __free_slab(page->slab_cache, page);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void rcu_free_slab(struct rcu_head *h)
|
||||
{
|
||||
struct page *page = container_of(h, struct page, rcu_head);
|
||||
@@ -1695,6 +1711,12 @@ static void free_slab(struct kmem_cache
|
||||
{
|
||||
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
|
||||
call_rcu(&page->rcu_head, rcu_free_slab);
|
||||
+ } else if (irqs_disabled()) {
|
||||
+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
|
||||
+
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_add(&page->lru, &f->list);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
} else
|
||||
__free_slab(s, page);
|
||||
}
|
||||
@@ -2223,14 +2245,21 @@ static void put_cpu_partial(struct kmem_
|
||||
pobjects = oldpage->pobjects;
|
||||
pages = oldpage->pages;
|
||||
if (drain && pobjects > s->cpu_partial) {
|
||||
+ struct slub_free_list *f;
|
||||
unsigned long flags;
|
||||
+ LIST_HEAD(tofree);
|
||||
/*
|
||||
* partial array is full. Move the existing
|
||||
* set to the per node partial list.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
|
||||
+ f = this_cpu_ptr(&slub_free_list);
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_splice_init(&f->list, &tofree);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
local_irq_restore(flags);
|
||||
+ free_delayed(&tofree);
|
||||
oldpage = NULL;
|
||||
pobjects = 0;
|
||||
pages = 0;
|
||||
@@ -2300,7 +2329,22 @@ static bool has_cpu_slab(int cpu, void *
|
||||
|
||||
static void flush_all(struct kmem_cache *s)
|
||||
{
|
||||
+ LIST_HEAD(tofree);
|
||||
+ int cpu;
|
||||
+
|
||||
on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
|
||||
+ for_each_online_cpu(cpu) {
|
||||
+ struct slub_free_list *f;
|
||||
+
|
||||
+ if (!has_cpu_slab(cpu, s))
|
||||
+ continue;
|
||||
+
|
||||
+ f = &per_cpu(slub_free_list, cpu);
|
||||
+ raw_spin_lock_irq(&f->lock);
|
||||
+ list_splice_init(&f->list, &tofree);
|
||||
+ raw_spin_unlock_irq(&f->lock);
|
||||
+ free_delayed(&tofree);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2498,8 +2542,10 @@ static inline void *get_freelist(struct
|
||||
* already disabled (which is the case for bulk allocation).
|
||||
*/
|
||||
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
|
||||
- unsigned long addr, struct kmem_cache_cpu *c)
|
||||
+ unsigned long addr, struct kmem_cache_cpu *c,
|
||||
+ struct list_head *to_free)
|
||||
{
|
||||
+ struct slub_free_list *f;
|
||||
void *freelist;
|
||||
struct page *page;
|
||||
|
||||
@@ -2555,6 +2601,13 @@ static void *___slab_alloc(struct kmem_c
|
||||
VM_BUG_ON(!c->page->frozen);
|
||||
c->freelist = get_freepointer(s, freelist);
|
||||
c->tid = next_tid(c->tid);
|
||||
+
|
||||
+out:
|
||||
+ f = this_cpu_ptr(&slub_free_list);
|
||||
+ raw_spin_lock(&f->lock);
|
||||
+ list_splice_init(&f->list, to_free);
|
||||
+ raw_spin_unlock(&f->lock);
|
||||
+
|
||||
return freelist;
|
||||
|
||||
new_slab:
|
||||
@@ -2570,7 +2623,7 @@ static void *___slab_alloc(struct kmem_c
|
||||
|
||||
if (unlikely(!freelist)) {
|
||||
slab_out_of_memory(s, gfpflags, node);
|
||||
- return NULL;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
page = c->page;
|
||||
@@ -2583,7 +2636,7 @@ static void *___slab_alloc(struct kmem_c
|
||||
goto new_slab; /* Slab failed checks. Next slab needed */
|
||||
|
||||
deactivate_slab(s, page, get_freepointer(s, freelist), c);
|
||||
- return freelist;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2595,6 +2648,7 @@ static void *__slab_alloc(struct kmem_ca
|
||||
{
|
||||
void *p;
|
||||
unsigned long flags;
|
||||
+ LIST_HEAD(tofree);
|
||||
|
||||
local_irq_save(flags);
|
||||
#ifdef CONFIG_PREEMPT
|
||||
@@ -2606,8 +2660,9 @@ static void *__slab_alloc(struct kmem_ca
|
||||
c = this_cpu_ptr(s->cpu_slab);
|
||||
#endif
|
||||
|
||||
- p = ___slab_alloc(s, gfpflags, node, addr, c);
|
||||
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
|
||||
local_irq_restore(flags);
|
||||
+ free_delayed(&tofree);
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -3085,6 +3140,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
|
||||
void **p)
|
||||
{
|
||||
struct kmem_cache_cpu *c;
|
||||
+ LIST_HEAD(to_free);
|
||||
int i;
|
||||
|
||||
/* memcg and kmem_cache debug support */
|
||||
@@ -3108,7 +3164,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
|
||||
* of re-populating per CPU c->freelist
|
||||
*/
|
||||
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
|
||||
- _RET_IP_, c);
|
||||
+ _RET_IP_, c, &to_free);
|
||||
if (unlikely(!p[i]))
|
||||
goto error;
|
||||
|
||||
@@ -3120,6 +3176,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
|
||||
}
|
||||
c->tid = next_tid(c->tid);
|
||||
local_irq_enable();
|
||||
+ free_delayed(&to_free);
|
||||
|
||||
/* Clear memory outside IRQ disabled fastpath loop */
|
||||
if (unlikely(flags & __GFP_ZERO)) {
|
||||
@@ -3134,6 +3191,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
|
||||
return i;
|
||||
error:
|
||||
local_irq_enable();
|
||||
+ free_delayed(&to_free);
|
||||
slab_post_alloc_hook(s, flags, i, p);
|
||||
__kmem_cache_free_bulk(s, i, p);
|
||||
return 0;
|
||||
@@ -4180,6 +4238,12 @@ void __init kmem_cache_init(void)
|
||||
{
|
||||
static __initdata struct kmem_cache boot_kmem_cache,
|
||||
boot_kmem_cache_node;
|
||||
+ int cpu;
|
||||
+
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
|
||||
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
|
||||
+ }
|
||||
|
||||
if (debug_guardpage_minorder())
|
||||
slub_max_order = 0;
|
||||
@@ -0,0 +1,232 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:37 -0500
|
||||
Subject: mm: page_alloc: rt-friendly per-cpu pages
|
||||
|
||||
rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
|
||||
method into a preemptible, explicit-per-cpu-locks method.
|
||||
|
||||
Contains fixes from:
|
||||
Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
mm/page_alloc.c | 63 ++++++++++++++++++++++++++++++++++++++------------------
|
||||
1 file changed, 43 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -60,6 +60,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/mm.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/memcontrol.h>
|
||||
@@ -291,6 +292,18 @@ EXPORT_SYMBOL(nr_node_ids);
|
||||
EXPORT_SYMBOL(nr_online_nodes);
|
||||
#endif
|
||||
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define cpu_lock_irqsave(cpu, flags) \
|
||||
+ local_lock_irqsave_on(pa_lock, flags, cpu)
|
||||
+# define cpu_unlock_irqrestore(cpu, flags) \
|
||||
+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
|
||||
+#else
|
||||
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
|
||||
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
|
||||
+#endif
|
||||
+
|
||||
int page_group_by_mobility_disabled __read_mostly;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
@@ -1296,10 +1309,10 @@ static void __free_pages_ok(struct page
|
||||
return;
|
||||
|
||||
migratetype = get_pfnblock_migratetype(page, pfn);
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
__count_vm_events(PGFREE, 1 << order);
|
||||
free_one_page(page_zone(page), page, pfn, order, migratetype);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
}
|
||||
|
||||
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
|
||||
@@ -2560,13 +2573,13 @@ void drain_zone_pages(struct zone *zone,
|
||||
int to_drain, batch;
|
||||
LIST_HEAD(dst);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
batch = READ_ONCE(pcp->batch);
|
||||
to_drain = min(pcp->count, batch);
|
||||
if (to_drain > 0)
|
||||
isolate_pcp_pages(to_drain, pcp, &dst);
|
||||
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
if (to_drain > 0)
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
@@ -2588,7 +2601,7 @@ static void drain_pages_zone(unsigned in
|
||||
LIST_HEAD(dst);
|
||||
int count;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ cpu_lock_irqsave(cpu, flags);
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
|
||||
pcp = &pset->pcp;
|
||||
@@ -2596,7 +2609,7 @@ static void drain_pages_zone(unsigned in
|
||||
if (count)
|
||||
isolate_pcp_pages(count, pcp, &dst);
|
||||
|
||||
- local_irq_restore(flags);
|
||||
+ cpu_unlock_irqrestore(cpu, flags);
|
||||
|
||||
if (count)
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
@@ -2634,6 +2647,7 @@ void drain_local_pages(struct zone *zone
|
||||
drain_pages(cpu);
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
static void drain_local_pages_wq(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
@@ -2647,6 +2661,7 @@ static void drain_local_pages_wq(struct
|
||||
drain_local_pages(NULL);
|
||||
preempt_enable();
|
||||
}
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
|
||||
@@ -2713,7 +2728,14 @@ void drain_all_pages(struct zone *zone)
|
||||
else
|
||||
cpumask_clear_cpu(cpu, &cpus_with_pcps);
|
||||
}
|
||||
-
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ for_each_cpu(cpu, &cpus_with_pcps) {
|
||||
+ if (zone)
|
||||
+ drain_pages_zone(cpu, zone);
|
||||
+ else
|
||||
+ drain_pages(cpu);
|
||||
+ }
|
||||
+#else
|
||||
for_each_cpu(cpu, &cpus_with_pcps) {
|
||||
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
|
||||
INIT_WORK(work, drain_local_pages_wq);
|
||||
@@ -2721,6 +2743,7 @@ void drain_all_pages(struct zone *zone)
|
||||
}
|
||||
for_each_cpu(cpu, &cpus_with_pcps)
|
||||
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
|
||||
+#endif
|
||||
|
||||
mutex_unlock(&pcpu_drain_mutex);
|
||||
}
|
||||
@@ -2840,9 +2863,9 @@ void free_unref_page(struct page *page)
|
||||
if (!free_unref_page_prepare(page, pfn))
|
||||
return;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
free_unref_page_commit(page, pfn, &dst);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
if (!list_empty(&dst))
|
||||
free_pcppages_bulk(zone, &dst, false);
|
||||
}
|
||||
@@ -2869,7 +2892,7 @@ void free_unref_page_list(struct list_he
|
||||
set_page_private(page, pfn);
|
||||
}
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
unsigned long pfn = page_private(page);
|
||||
enum zone_type type;
|
||||
@@ -2884,12 +2907,12 @@ void free_unref_page_list(struct list_he
|
||||
* a large list of pages to free.
|
||||
*/
|
||||
if (++batch_count == SWAP_CLUSTER_MAX) {
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
batch_count = 0;
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
}
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
for (i = 0; i < __MAX_NR_ZONES; ) {
|
||||
struct page *page;
|
||||
@@ -3038,7 +3061,7 @@ static struct page *rmqueue_pcplist(stru
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
pcp = &this_cpu_ptr(zone->pageset)->pcp;
|
||||
list = &pcp->lists[migratetype];
|
||||
page = __rmqueue_pcplist(zone, migratetype, pcp, list);
|
||||
@@ -3046,7 +3069,7 @@ static struct page *rmqueue_pcplist(stru
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone);
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -3073,7 +3096,7 @@ struct page *rmqueue(struct zone *prefer
|
||||
* allocate greater than order-1 page units with __GFP_NOFAIL.
|
||||
*/
|
||||
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
|
||||
- spin_lock_irqsave(&zone->lock, flags);
|
||||
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
|
||||
|
||||
do {
|
||||
page = NULL;
|
||||
@@ -3093,14 +3116,14 @@ struct page *rmqueue(struct zone *prefer
|
||||
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
|
||||
out:
|
||||
VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
|
||||
return page;
|
||||
|
||||
failed:
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -8094,7 +8117,7 @@ void zone_pcp_reset(struct zone *zone)
|
||||
struct per_cpu_pageset *pset;
|
||||
|
||||
/* avoid races with drain_pages() */
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(pa_lock, flags);
|
||||
if (zone->pageset != &boot_pageset) {
|
||||
for_each_online_cpu(cpu) {
|
||||
pset = per_cpu_ptr(zone->pageset, cpu);
|
||||
@@ -8103,7 +8126,7 @@ void zone_pcp_reset(struct zone *zone)
|
||||
free_percpu(zone->pageset);
|
||||
zone->pageset = &boot_pageset;
|
||||
}
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(pa_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
@@ -0,0 +1,199 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:51 -0500
|
||||
Subject: mm/swap: Convert to percpu locked
|
||||
|
||||
Replace global locks (get_cpu + local_irq_save) with "local_locks()".
|
||||
Currently there is one of for "rotate" and one for "swap".
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/swap.h | 2 ++
|
||||
mm/compaction.c | 6 ++++--
|
||||
mm/page_alloc.c | 3 ++-
|
||||
mm/swap.c | 38 ++++++++++++++++++++++----------------
|
||||
4 files changed, 30 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/linux/swap.h
|
||||
+++ b/include/linux/swap.h
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/page-flags.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
struct notifier_block;
|
||||
@@ -331,6 +332,7 @@ extern unsigned long nr_free_pagecache_p
|
||||
|
||||
|
||||
/* linux/mm/swap.c */
|
||||
+DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
|
||||
extern void lru_cache_add(struct page *);
|
||||
extern void lru_cache_add_anon(struct page *page);
|
||||
extern void lru_cache_add_file(struct page *page);
|
||||
--- a/mm/compaction.c
|
||||
+++ b/mm/compaction.c
|
||||
@@ -1657,10 +1657,12 @@ static enum compact_result compact_zone(
|
||||
block_start_pfn(cc->migrate_pfn, cc->order);
|
||||
|
||||
if (cc->last_migrated_pfn < current_block_start) {
|
||||
- cpu = get_cpu();
|
||||
+ cpu = get_cpu_light();
|
||||
+ local_lock_irq(swapvec_lock);
|
||||
lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_irq(swapvec_lock);
|
||||
drain_local_pages(zone);
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
/* No more flushing until we migrate again */
|
||||
cc->last_migrated_pfn = 0;
|
||||
}
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -7205,8 +7205,9 @@ void __init free_area_init(unsigned long
|
||||
|
||||
static int page_alloc_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
-
|
||||
+ local_lock_irq_on(swapvec_lock, cpu);
|
||||
lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_irq_on(swapvec_lock, cpu);
|
||||
drain_pages(cpu);
|
||||
|
||||
/*
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/uio.h>
|
||||
+#include <linux/locallock.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/page_idle.h>
|
||||
|
||||
@@ -51,6 +52,8 @@ static DEFINE_PER_CPU(struct pagevec, lr
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
|
||||
#endif
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
|
||||
+DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
|
||||
|
||||
/*
|
||||
* This path almost never happens for VM activity - pages are normally
|
||||
@@ -253,11 +256,11 @@ void rotate_reclaimable_page(struct page
|
||||
unsigned long flags;
|
||||
|
||||
get_page(page);
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(rotate_lock, flags);
|
||||
pvec = this_cpu_ptr(&lru_rotate_pvecs);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_move_tail(pvec);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(rotate_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,12 +310,13 @@ void activate_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ activate_page_pvecs);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
||||
- put_cpu_var(activate_page_pvecs);
|
||||
+ put_locked_var(swapvec_lock, activate_page_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -339,7 +343,7 @@ void activate_page(struct page *page)
|
||||
|
||||
static void __lru_cache_activate_page(struct page *page)
|
||||
{
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -361,7 +365,7 @@ static void __lru_cache_activate_page(st
|
||||
}
|
||||
}
|
||||
|
||||
- put_cpu_var(lru_add_pvec);
|
||||
+ put_locked_var(swapvec_lock, lru_add_pvec);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -403,12 +407,12 @@ EXPORT_SYMBOL(mark_page_accessed);
|
||||
|
||||
static void __lru_cache_add(struct page *page)
|
||||
{
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
__pagevec_lru_add(pvec);
|
||||
- put_cpu_var(lru_add_pvec);
|
||||
+ put_locked_var(swapvec_lock, lru_add_pvec);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -586,9 +590,9 @@ void lru_add_drain_cpu(int cpu)
|
||||
unsigned long flags;
|
||||
|
||||
/* No harm done if a racing interrupt already did this */
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(rotate_lock, flags);
|
||||
pagevec_move_tail(pvec);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(rotate_lock, flags);
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
@@ -620,11 +624,12 @@ void deactivate_file_page(struct page *p
|
||||
return;
|
||||
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ lru_deactivate_file_pvecs);
|
||||
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
|
||||
- put_cpu_var(lru_deactivate_file_pvecs);
|
||||
+ put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,19 +644,20 @@ void mark_page_lazyfree(struct page *pag
|
||||
{
|
||||
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
|
||||
!PageSwapCache(page) && !PageUnevictable(page)) {
|
||||
- struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
|
||||
+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
|
||||
+ lru_lazyfree_pvecs);
|
||||
|
||||
get_page(page);
|
||||
if (!pagevec_add(pvec, page) || PageCompound(page))
|
||||
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
|
||||
- put_cpu_var(lru_lazyfree_pvecs);
|
||||
+ put_locked_var(swapvec_lock, lru_lazyfree_pvecs);
|
||||
}
|
||||
}
|
||||
|
||||
void lru_add_drain(void)
|
||||
{
|
||||
- lru_add_drain_cpu(get_cpu());
|
||||
- put_cpu();
|
||||
+ lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
|
||||
+ local_unlock_cpu(swapvec_lock);
|
||||
}
|
||||
|
||||
static void lru_add_drain_per_cpu(struct work_struct *dummy)
|
||||
@@ -0,0 +1,102 @@
|
||||
From: Luiz Capitulino <lcapitulino@redhat.com>
|
||||
Date: Fri, 27 May 2016 15:03:28 +0200
|
||||
Subject: [PATCH] mm: perform lru_add_drain_all() remotely
|
||||
|
||||
lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run
|
||||
on all CPUs that have non-empty LRU pagevecs and then waiting for
|
||||
the scheduled work to complete. However, workqueue threads may never
|
||||
have the chance to run on a CPU that's running a SCHED_FIFO task.
|
||||
This causes lru_add_drain_all() to block forever.
|
||||
|
||||
This commit solves this problem by changing lru_add_drain_all()
|
||||
to drain the LRU pagevecs of remote CPUs. This is done by grabbing
|
||||
swapvec_lock and calling lru_add_drain_cpu().
|
||||
|
||||
PS: This is based on an idea and initial implementation by
|
||||
Rik van Riel.
|
||||
|
||||
Signed-off-by: Rik van Riel <riel@redhat.com>
|
||||
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/swap.c | 36 ++++++++++++++++++++++++++++++------
|
||||
1 file changed, 30 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -590,9 +590,15 @@ void lru_add_drain_cpu(int cpu)
|
||||
unsigned long flags;
|
||||
|
||||
/* No harm done if a racing interrupt already did this */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_lock_irqsave_on(rotate_lock, flags, cpu);
|
||||
+ pagevec_move_tail(pvec);
|
||||
+ local_unlock_irqrestore_on(rotate_lock, flags, cpu);
|
||||
+#else
|
||||
local_lock_irqsave(rotate_lock, flags);
|
||||
pagevec_move_tail(pvec);
|
||||
local_unlock_irqrestore(rotate_lock, flags);
|
||||
+#endif
|
||||
}
|
||||
|
||||
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
|
||||
@@ -660,6 +666,16 @@ void lru_add_drain(void)
|
||||
local_unlock_cpu(swapvec_lock);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
|
||||
+{
|
||||
+ local_lock_on(swapvec_lock, cpu);
|
||||
+ lru_add_drain_cpu(cpu);
|
||||
+ local_unlock_on(swapvec_lock, cpu);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
static void lru_add_drain_per_cpu(struct work_struct *dummy)
|
||||
{
|
||||
lru_add_drain();
|
||||
@@ -667,6 +683,16 @@ static void lru_add_drain_per_cpu(struct
|
||||
|
||||
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
|
||||
|
||||
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
|
||||
+{
|
||||
+ struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
+
|
||||
+ INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
+ queue_work_on(cpu, mm_percpu_wq, work);
|
||||
+ cpumask_set_cpu(cpu, has_work);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Doesn't need any cpu hotplug locking because we do rely on per-cpu
|
||||
* kworkers being shut down before our page_alloc_cpu_dead callback is
|
||||
@@ -691,21 +717,19 @@ void lru_add_drain_all(void)
|
||||
cpumask_clear(&has_work);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
- struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
|
||||
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
|
||||
- need_activate_page_drain(cpu)) {
|
||||
- INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
- queue_work_on(cpu, mm_percpu_wq, work);
|
||||
- cpumask_set_cpu(cpu, &has_work);
|
||||
- }
|
||||
+ need_activate_page_drain(cpu))
|
||||
+ remote_lru_add_drain(cpu, &has_work);
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
for_each_cpu(cpu, &has_work)
|
||||
flush_work(&per_cpu(lru_add_drain_work, cpu));
|
||||
+#endif
|
||||
|
||||
mutex_unlock(&lock);
|
||||
}
|
||||
136
kernel/patches-4.19.x-rt/0081-mm-make-vmstat-rt-aware.patch
Normal file
136
kernel/patches-4.19.x-rt/0081-mm-make-vmstat-rt-aware.patch
Normal file
@@ -0,0 +1,136 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:30:13 -0500
|
||||
Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT
|
||||
|
||||
Disable preemption on -RT for the vmstat code. On vanila the code runs in
|
||||
IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the
|
||||
same ressources is not updated in parallel due to preemption.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/vmstat.h | 4 ++++
|
||||
mm/vmstat.c | 12 ++++++++++++
|
||||
2 files changed, 16 insertions(+)
|
||||
|
||||
--- a/include/linux/vmstat.h
|
||||
+++ b/include/linux/vmstat.h
|
||||
@@ -54,7 +54,9 @@ DECLARE_PER_CPU(struct vm_event_state, v
|
||||
*/
|
||||
static inline void __count_vm_event(enum vm_event_item item)
|
||||
{
|
||||
+ preempt_disable_rt();
|
||||
raw_cpu_inc(vm_event_states.event[item]);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
static inline void count_vm_event(enum vm_event_item item)
|
||||
@@ -64,7 +66,9 @@ static inline void count_vm_event(enum v
|
||||
|
||||
static inline void __count_vm_events(enum vm_event_item item, long delta)
|
||||
{
|
||||
+ preempt_disable_rt();
|
||||
raw_cpu_add(vm_event_states.event[item], delta);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
static inline void count_vm_events(enum vm_event_item item, long delta)
|
||||
--- a/mm/vmstat.c
|
||||
+++ b/mm/vmstat.c
|
||||
@@ -320,6 +320,7 @@ void __mod_zone_page_state(struct zone *
|
||||
long x;
|
||||
long t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -329,6 +330,7 @@ void __mod_zone_page_state(struct zone *
|
||||
x = 0;
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_zone_page_state);
|
||||
|
||||
@@ -340,6 +342,7 @@ void __mod_node_page_state(struct pglist
|
||||
long x;
|
||||
long t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -349,6 +352,7 @@ void __mod_node_page_state(struct pglist
|
||||
x = 0;
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_node_page_state);
|
||||
|
||||
@@ -381,6 +385,7 @@ void __inc_zone_state(struct zone *zone,
|
||||
s8 __percpu *p = pcp->vm_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v > t)) {
|
||||
@@ -389,6 +394,7 @@ void __inc_zone_state(struct zone *zone,
|
||||
zone_page_state_add(v + overstep, zone, item);
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -397,6 +403,7 @@ void __inc_node_state(struct pglist_data
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v > t)) {
|
||||
@@ -405,6 +412,7 @@ void __inc_node_state(struct pglist_data
|
||||
node_page_state_add(v + overstep, pgdat, item);
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
@@ -425,6 +433,7 @@ void __dec_zone_state(struct zone *zone,
|
||||
s8 __percpu *p = pcp->vm_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v < - t)) {
|
||||
@@ -433,6 +442,7 @@ void __dec_zone_state(struct zone *zone,
|
||||
zone_page_state_add(v - overstep, zone, item);
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -441,6 +451,7 @@ void __dec_node_state(struct pglist_data
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v < - t)) {
|
||||
@@ -449,6 +460,7 @@ void __dec_node_state(struct pglist_data
|
||||
node_page_state_add(v - overstep, pgdat, item);
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
@@ -0,0 +1,68 @@
|
||||
Subject: ARM: Initialize split page table locks for vector page
|
||||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Sat, 1 Oct 2011 18:58:13 -0700
|
||||
|
||||
Without this patch, ARM can not use SPLIT_PTLOCK_CPUS if
|
||||
PREEMPT_RT_FULL=y because vectors_user_mapping() creates a
|
||||
VM_ALWAYSDUMP mapping of the vector page (address 0xffff0000), but no
|
||||
ptl->lock has been allocated for the page. An attempt to coredump
|
||||
that page will result in a kernel NULL pointer dereference when
|
||||
follow_page() attempts to lock the page.
|
||||
|
||||
The call tree to the NULL pointer dereference is:
|
||||
|
||||
do_notify_resume()
|
||||
get_signal_to_deliver()
|
||||
do_coredump()
|
||||
elf_core_dump()
|
||||
get_dump_page()
|
||||
__get_user_pages()
|
||||
follow_page()
|
||||
pte_offset_map_lock() <----- a #define
|
||||
...
|
||||
rt_spin_lock()
|
||||
|
||||
The underlying problem is exposed by mm-shrink-the-page-frame-to-rt-size.patch.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Cc: Frank <Frank_Rowand@sonyusa.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Link: http://lkml.kernel.org/r/4E87C535.2030907@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/process.c | 24 ++++++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
|
||||
--- a/arch/arm/kernel/process.c
|
||||
+++ b/arch/arm/kernel/process.c
|
||||
@@ -324,6 +324,30 @@ unsigned long arch_randomize_brk(struct
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
+/*
|
||||
+ * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
|
||||
+ * initialized by pgtable_page_ctor() then a coredump of the vector page will
|
||||
+ * fail.
|
||||
+ */
|
||||
+static int __init vectors_user_mapping_init_page(void)
|
||||
+{
|
||||
+ struct page *page;
|
||||
+ unsigned long addr = 0xffff0000;
|
||||
+ pgd_t *pgd;
|
||||
+ pud_t *pud;
|
||||
+ pmd_t *pmd;
|
||||
+
|
||||
+ pgd = pgd_offset_k(addr);
|
||||
+ pud = pud_offset(pgd, addr);
|
||||
+ pmd = pmd_offset(pud, addr);
|
||||
+ page = pmd_page(*(pmd));
|
||||
+
|
||||
+ pgtable_page_ctor(page);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+late_initcall(vectors_user_mapping_init_page);
|
||||
+
|
||||
#ifdef CONFIG_KUSER_HELPERS
|
||||
/*
|
||||
* The vectors page is always readable from user space for the
|
||||
35
kernel/patches-4.19.x-rt/0083-mm-enable-slub.patch
Normal file
35
kernel/patches-4.19.x-rt/0083-mm-enable-slub.patch
Normal file
@@ -0,0 +1,35 @@
|
||||
Subject: mm: Enable SLUB for RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 25 Oct 2012 10:32:35 +0100
|
||||
|
||||
Avoid the memory allocation in IRQ section
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: factor out everything except the kcalloc() workaorund ]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -3677,6 +3677,11 @@ static void list_slab_objects(struct kme
|
||||
const char *text)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ /* XXX move out of irq-off section */
|
||||
+ slab_err(s, page, text, s->name);
|
||||
+#else
|
||||
+
|
||||
void *addr = page_address(page);
|
||||
void *p;
|
||||
unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
|
||||
@@ -3698,6 +3703,7 @@ static void list_slab_objects(struct kme
|
||||
slab_unlock(page);
|
||||
kfree(map);
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -0,0 +1,41 @@
|
||||
Subject: slub: Enable irqs for __GFP_WAIT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 09 Jan 2013 12:08:15 +0100
|
||||
|
||||
SYSTEM_RUNNING might be too late for enabling interrupts. Allocations
|
||||
with GFP_WAIT can happen before that. So use this as an indicator.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
mm/slub.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -1570,10 +1570,17 @@ static struct page *allocate_slab(struct
|
||||
void *start, *p;
|
||||
int idx, order;
|
||||
bool shuffle;
|
||||
+ bool enableirqs = false;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
if (gfpflags_allow_blocking(flags))
|
||||
+ enableirqs = true;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (system_state > SYSTEM_BOOTING)
|
||||
+ enableirqs = true;
|
||||
+#endif
|
||||
+ if (enableirqs)
|
||||
local_irq_enable();
|
||||
|
||||
flags |= s->allocflags;
|
||||
@@ -1632,7 +1639,7 @@ static struct page *allocate_slab(struct
|
||||
page->frozen = 1;
|
||||
|
||||
out:
|
||||
- if (gfpflags_allow_blocking(flags))
|
||||
+ if (enableirqs)
|
||||
local_irq_disable();
|
||||
if (!page)
|
||||
return NULL;
|
||||
@@ -0,0 +1,47 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 15 Apr 2015 19:00:47 +0200
|
||||
Subject: slub: Disable SLUB_CPU_PARTIAL
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7
|
||||
|1 lock held by rcuop/7/87:
|
||||
| #0: (rcu_callback){......}, at: [<ffffffff8112c76a>] rcu_nocb_kthread+0x1ca/0x5d0
|
||||
|Preemption disabled at:[<ffffffff811eebd9>] put_cpu_partial+0x29/0x220
|
||||
|
|
||||
|CPU: 0 PID: 87 Comm: rcuop/7 Tainted: G W 4.0.0-rt0+ #477
|
||||
|Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
|
||||
| 000000000007a9fc ffff88013987baf8 ffffffff817441c7 0000000000000007
|
||||
| 0000000000000000 ffff88013987bb18 ffffffff810eee51 0000000000000000
|
||||
| ffff88013fc10200 ffff88013987bb48 ffffffff8174a1c4 000000000007a9fc
|
||||
|Call Trace:
|
||||
| [<ffffffff817441c7>] dump_stack+0x4f/0x90
|
||||
| [<ffffffff810eee51>] ___might_sleep+0x121/0x1b0
|
||||
| [<ffffffff8174a1c4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff811a689a>] __free_pages_ok+0xaa/0x540
|
||||
| [<ffffffff811a729d>] __free_pages+0x1d/0x30
|
||||
| [<ffffffff811eddd5>] __free_slab+0xc5/0x1e0
|
||||
| [<ffffffff811edf46>] free_delayed+0x56/0x70
|
||||
| [<ffffffff811eecfd>] put_cpu_partial+0x14d/0x220
|
||||
| [<ffffffff811efc98>] __slab_free+0x158/0x2c0
|
||||
| [<ffffffff811f0021>] kmem_cache_free+0x221/0x2d0
|
||||
| [<ffffffff81204d0c>] file_free_rcu+0x2c/0x40
|
||||
| [<ffffffff8112c7e3>] rcu_nocb_kthread+0x243/0x5d0
|
||||
| [<ffffffff810e951c>] kthread+0xfc/0x120
|
||||
| [<ffffffff8174abc8>] ret_from_fork+0x58/0x90
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
init/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1698,7 +1698,7 @@ config SLAB_FREELIST_HARDENED
|
||||
|
||||
config SLUB_CPU_PARTIAL
|
||||
default y
|
||||
- depends on SLUB && SMP
|
||||
+ depends on SLUB && SMP && !PREEMPT_RT_FULL
|
||||
bool "SLUB per cpu partial cache"
|
||||
help
|
||||
Per cpu partial caches accellerate objects allocation and freeing
|
||||
@@ -0,0 +1,68 @@
|
||||
From: Yang Shi <yang.shi@windriver.com>
|
||||
Subject: mm/memcontrol: Don't call schedule_work_on in preemption disabled context
|
||||
Date: Wed, 30 Oct 2013 11:48:33 -0700
|
||||
|
||||
The following trace is triggered when running ltp oom test cases:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 17188, name: oom03
|
||||
Preemption disabled at:[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
|
||||
|
||||
CPU: 2 PID: 17188 Comm: oom03 Not tainted 3.10.10-rt3 #2
|
||||
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
|
||||
ffff88007684d730 ffff880070df9b58 ffffffff8169918d ffff880070df9b70
|
||||
ffffffff8106db31 ffff88007688b4a0 ffff880070df9b88 ffffffff8169d9c0
|
||||
ffff88007688b4a0 ffff880070df9bc8 ffffffff81059da1 0000000170df9bb0
|
||||
Call Trace:
|
||||
[<ffffffff8169918d>] dump_stack+0x19/0x1b
|
||||
[<ffffffff8106db31>] __might_sleep+0xf1/0x170
|
||||
[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
|
||||
[<ffffffff81059da1>] queue_work_on+0x61/0x100
|
||||
[<ffffffff8112b361>] drain_all_stock+0xe1/0x1c0
|
||||
[<ffffffff8112ba70>] mem_cgroup_reclaim+0x90/0xe0
|
||||
[<ffffffff8112beda>] __mem_cgroup_try_charge+0x41a/0xc40
|
||||
[<ffffffff810f1c91>] ? release_pages+0x1b1/0x1f0
|
||||
[<ffffffff8106f200>] ? sched_exec+0x40/0xb0
|
||||
[<ffffffff8112cc87>] mem_cgroup_charge_common+0x37/0x70
|
||||
[<ffffffff8112e2c6>] mem_cgroup_newpage_charge+0x26/0x30
|
||||
[<ffffffff8110af68>] handle_pte_fault+0x618/0x840
|
||||
[<ffffffff8103ecf6>] ? unpin_current_cpu+0x16/0x70
|
||||
[<ffffffff81070f94>] ? migrate_enable+0xd4/0x200
|
||||
[<ffffffff8110cde5>] handle_mm_fault+0x145/0x1e0
|
||||
[<ffffffff810301e1>] __do_page_fault+0x1a1/0x4c0
|
||||
[<ffffffff8169c9eb>] ? preempt_schedule_irq+0x4b/0x70
|
||||
[<ffffffff8169e3b7>] ? retint_kernel+0x37/0x40
|
||||
[<ffffffff8103053e>] do_page_fault+0xe/0x10
|
||||
[<ffffffff8169e4c2>] page_fault+0x22/0x30
|
||||
|
||||
So, to prevent schedule_work_on from being called in preempt disabled context,
|
||||
replace the pair of get/put_cpu() to get/put_cpu_light().
|
||||
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
|
||||
mm/memcontrol.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2052,7 +2052,7 @@ static void drain_all_stock(struct mem_c
|
||||
* as well as workers from this path always operate on the local
|
||||
* per-cpu data. CPU up doesn't touch memcg_stock at all.
|
||||
*/
|
||||
- curcpu = get_cpu();
|
||||
+ curcpu = get_cpu_light();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
|
||||
struct mem_cgroup *memcg;
|
||||
@@ -2072,7 +2072,7 @@ static void drain_all_stock(struct mem_c
|
||||
}
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
mutex_unlock(&percpu_charge_mutex);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Subject: mm/memcontrol: Replace local_irq_disable with local locks
|
||||
Date: Wed, 28 Jan 2015 17:14:16 +0100
|
||||
|
||||
There are a few local_irq_disable() which then take sleeping locks. This
|
||||
patch converts them local locks.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/memcontrol.c | 24 ++++++++++++++++--------
|
||||
1 file changed, 16 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -69,6 +69,7 @@
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
#include "slab.h"
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
@@ -94,6 +95,8 @@ int do_swap_account __read_mostly;
|
||||
#define do_swap_account 0
|
||||
#endif
|
||||
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(event_lock);
|
||||
+
|
||||
/* Whether legacy memory+swap accounting is active */
|
||||
static bool do_memsw_account(void)
|
||||
{
|
||||
@@ -4859,12 +4862,12 @@ static int mem_cgroup_move_account(struc
|
||||
|
||||
ret = 0;
|
||||
|
||||
- local_irq_disable();
|
||||
+ local_lock_irq(event_lock);
|
||||
mem_cgroup_charge_statistics(to, page, compound, nr_pages);
|
||||
memcg_check_events(to, page);
|
||||
mem_cgroup_charge_statistics(from, page, compound, -nr_pages);
|
||||
memcg_check_events(from, page);
|
||||
- local_irq_enable();
|
||||
+ local_unlock_irq(event_lock);
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
out:
|
||||
@@ -5983,10 +5986,10 @@ void mem_cgroup_commit_charge(struct pag
|
||||
|
||||
commit_charge(page, memcg, lrucare);
|
||||
|
||||
- local_irq_disable();
|
||||
+ local_lock_irq(event_lock);
|
||||
mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
|
||||
memcg_check_events(memcg, page);
|
||||
- local_irq_enable();
|
||||
+ local_unlock_irq(event_lock);
|
||||
|
||||
if (do_memsw_account() && PageSwapCache(page)) {
|
||||
swp_entry_t entry = { .val = page_private(page) };
|
||||
@@ -6055,7 +6058,7 @@ static void uncharge_batch(const struct
|
||||
memcg_oom_recover(ug->memcg);
|
||||
}
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file);
|
||||
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
|
||||
@@ -6063,7 +6066,7 @@ static void uncharge_batch(const struct
|
||||
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
|
||||
__this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
|
||||
memcg_check_events(ug->memcg, ug->dummy_page);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
|
||||
if (!mem_cgroup_is_root(ug->memcg))
|
||||
css_put_many(&ug->memcg->css, nr_pages);
|
||||
@@ -6226,10 +6229,10 @@ void mem_cgroup_migrate(struct page *old
|
||||
|
||||
commit_charge(newpage, memcg, false);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
|
||||
memcg_check_events(memcg, newpage);
|
||||
- local_irq_restore(flags);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
|
||||
@@ -6421,6 +6424,7 @@ void mem_cgroup_swapout(struct page *pag
|
||||
struct mem_cgroup *memcg, *swap_memcg;
|
||||
unsigned int nr_entries;
|
||||
unsigned short oldid;
|
||||
+ unsigned long flags;
|
||||
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
@@ -6466,13 +6470,17 @@ void mem_cgroup_swapout(struct page *pag
|
||||
* important here to have the interrupts disabled because it is the
|
||||
* only synchronisation we have for updating the per-CPU variables.
|
||||
*/
|
||||
+ local_lock_irqsave(event_lock, flags);
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
VM_BUG_ON(!irqs_disabled());
|
||||
+#endif
|
||||
mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
|
||||
-nr_entries);
|
||||
memcg_check_events(memcg, page);
|
||||
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
css_put_many(&memcg->css, nr_entries);
|
||||
+ local_unlock_irqrestore(event_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -0,0 +1,196 @@
|
||||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Tue, 22 Mar 2016 11:16:09 +0100
|
||||
Subject: [PATCH] mm/zsmalloc: copy with get_cpu_var() and locking
|
||||
|
||||
get_cpu_var() disables preemption and triggers a might_sleep() splat later.
|
||||
This is replaced with get_locked_var().
|
||||
This bitspinlocks are replaced with a proper mutex which requires a slightly
|
||||
larger struct to allocate.
|
||||
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
|
||||
fixed the size magic]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/zsmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 74 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/mm/zsmalloc.c
|
||||
+++ b/mm/zsmalloc.c
|
||||
@@ -55,6 +55,7 @@
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/fs.h>
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
#define ZSPAGE_MAGIC 0x58
|
||||
|
||||
@@ -72,9 +73,22 @@
|
||||
*/
|
||||
#define ZS_MAX_ZSPAGE_ORDER 2
|
||||
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
|
||||
-
|
||||
#define ZS_HANDLE_SIZE (sizeof(unsigned long))
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+struct zsmalloc_handle {
|
||||
+ unsigned long addr;
|
||||
+ struct mutex lock;
|
||||
+};
|
||||
+
|
||||
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Object location (<PFN>, <obj_idx>) is encoded as
|
||||
* as single (unsigned long) handle value.
|
||||
@@ -320,7 +334,7 @@ static void SetZsPageMovable(struct zs_p
|
||||
|
||||
static int create_cache(struct zs_pool *pool)
|
||||
{
|
||||
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
|
||||
+ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
|
||||
0, 0, NULL);
|
||||
if (!pool->handle_cachep)
|
||||
return 1;
|
||||
@@ -344,10 +358,27 @@ static void destroy_cache(struct zs_pool
|
||||
|
||||
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
|
||||
{
|
||||
- return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
|
||||
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
|
||||
+ void *p;
|
||||
+
|
||||
+ p = kmem_cache_alloc(pool->handle_cachep,
|
||||
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (p) {
|
||||
+ struct zsmalloc_handle *zh = p;
|
||||
+
|
||||
+ mutex_init(&zh->lock);
|
||||
+ }
|
||||
+#endif
|
||||
+ return (unsigned long)p;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
|
||||
+{
|
||||
+ return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
|
||||
{
|
||||
kmem_cache_free(pool->handle_cachep, (void *)handle);
|
||||
@@ -366,12 +397,18 @@ static void cache_free_zspage(struct zs_
|
||||
|
||||
static void record_obj(unsigned long handle, unsigned long obj)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ WRITE_ONCE(zh->addr, obj);
|
||||
+#else
|
||||
/*
|
||||
* lsb of @obj represents handle lock while other bits
|
||||
* represent object value the handle is pointing so
|
||||
* updating shouldn't do store tearing.
|
||||
*/
|
||||
WRITE_ONCE(*(unsigned long *)handle, obj);
|
||||
+#endif
|
||||
}
|
||||
|
||||
/* zpool driver */
|
||||
@@ -453,6 +490,7 @@ MODULE_ALIAS("zpool-zsmalloc");
|
||||
|
||||
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
|
||||
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
|
||||
|
||||
static bool is_zspage_isolated(struct zspage *zspage)
|
||||
{
|
||||
@@ -882,7 +920,13 @@ static unsigned long location_to_obj(str
|
||||
|
||||
static unsigned long handle_to_obj(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return zh->addr;
|
||||
+#else
|
||||
return *(unsigned long *)handle;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static unsigned long obj_to_head(struct page *page, void *obj)
|
||||
@@ -896,22 +940,46 @@ static unsigned long obj_to_head(struct
|
||||
|
||||
static inline int testpin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_is_locked(&zh->lock);
|
||||
+#else
|
||||
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int trypin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_trylock(&zh->lock);
|
||||
+#else
|
||||
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void pin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_lock(&zh->lock);
|
||||
+#else
|
||||
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void unpin_tag(unsigned long handle)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
|
||||
+
|
||||
+ return mutex_unlock(&zh->lock);
|
||||
+#else
|
||||
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void reset_page(struct page *page)
|
||||
@@ -1337,7 +1405,7 @@ void *zs_map_object(struct zs_pool *pool
|
||||
class = pool->size_class[class_idx];
|
||||
off = (class->size * obj_idx) & ~PAGE_MASK;
|
||||
|
||||
- area = &get_cpu_var(zs_map_area);
|
||||
+ area = &get_locked_var(zs_map_area_lock, zs_map_area);
|
||||
area->vm_mm = mm;
|
||||
if (off + class->size <= PAGE_SIZE) {
|
||||
/* this object is contained entirely within a page */
|
||||
@@ -1391,7 +1459,7 @@ void zs_unmap_object(struct zs_pool *poo
|
||||
|
||||
__zs_unmap_object(area, pages, off, class->size);
|
||||
}
|
||||
- put_cpu_var(zs_map_area);
|
||||
+ put_locked_var(zs_map_area_lock, zs_map_area);
|
||||
|
||||
migrate_read_unlock(zspage);
|
||||
unpin_tag(handle);
|
||||
@@ -0,0 +1,54 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 11 Dec 2018 21:53:43 +0100
|
||||
Subject: [PATCH] x86/mm/pat: disable preemption __split_large_page() after
|
||||
spin_lock()
|
||||
|
||||
Commit "x86/mm/pat: Disable preemption around __flush_tlb_all()" added a
|
||||
warning if __flush_tlb_all() is invoked in preemptible context. On !RT
|
||||
the warning does not trigger because a spin lock is acquired which
|
||||
disables preemption. On RT the spin lock does not disable preemption and
|
||||
so the warning is seen.
|
||||
|
||||
Disable preemption to avoid the warning __flush_tlb_all().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/mm/pageattr.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/arch/x86/mm/pageattr.c
|
||||
+++ b/arch/x86/mm/pageattr.c
|
||||
@@ -688,11 +688,17 @@ static int
|
||||
|
||||
spin_lock(&pgd_lock);
|
||||
/*
|
||||
+ * Keep preemption disabled after __flush_tlb_all() which expects not be
|
||||
+ * preempted during the flush of the local TLB.
|
||||
+ */
|
||||
+ preempt_disable();
|
||||
+ /*
|
||||
* Check for races, another CPU might have split this page
|
||||
* up for us already:
|
||||
*/
|
||||
tmp = _lookup_address_cpa(cpa, address, &level);
|
||||
if (tmp != kpte) {
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
@@ -726,6 +732,7 @@ static int
|
||||
break;
|
||||
|
||||
default:
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
@@ -764,6 +771,7 @@ static int
|
||||
* going on.
|
||||
*/
|
||||
__flush_tlb_all();
|
||||
+ preempt_enable();
|
||||
spin_unlock(&pgd_lock);
|
||||
|
||||
return 0;
|
||||
165
kernel/patches-4.19.x-rt/0090-radix-tree-use-local-locks.patch
Normal file
165
kernel/patches-4.19.x-rt/0090-radix-tree-use-local-locks.patch
Normal file
@@ -0,0 +1,165 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 25 Jan 2017 16:34:27 +0100
|
||||
Subject: [PATCH] radix-tree: use local locks
|
||||
|
||||
The preload functionality uses per-CPU variables and preempt-disable to
|
||||
ensure that it does not switch CPUs during its usage. This patch adds
|
||||
local_locks() instead preempt_disable() for the same purpose and to
|
||||
remain preemptible on -RT.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Reported-and-debugged-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/idr.h | 5 +----
|
||||
include/linux/radix-tree.h | 7 ++-----
|
||||
lib/radix-tree.c | 32 +++++++++++++++++++++++---------
|
||||
3 files changed, 26 insertions(+), 18 deletions(-)
|
||||
|
||||
--- a/include/linux/idr.h
|
||||
+++ b/include/linux/idr.h
|
||||
@@ -169,10 +169,7 @@ static inline bool idr_is_empty(const st
|
||||
* Each idr_preload() should be matched with an invocation of this
|
||||
* function. See idr_preload() for details.
|
||||
*/
|
||||
-static inline void idr_preload_end(void)
|
||||
-{
|
||||
- preempt_enable();
|
||||
-}
|
||||
+void idr_preload_end(void);
|
||||
|
||||
/**
|
||||
* idr_for_each_entry() - Iterate over an IDR's elements of a given type.
|
||||
--- a/include/linux/radix-tree.h
|
||||
+++ b/include/linux/radix-tree.h
|
||||
@@ -330,6 +330,8 @@ unsigned int radix_tree_gang_lookup_slot
|
||||
int radix_tree_preload(gfp_t gfp_mask);
|
||||
int radix_tree_maybe_preload(gfp_t gfp_mask);
|
||||
int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
|
||||
+void radix_tree_preload_end(void);
|
||||
+
|
||||
void radix_tree_init(void);
|
||||
void *radix_tree_tag_set(struct radix_tree_root *,
|
||||
unsigned long index, unsigned int tag);
|
||||
@@ -349,11 +351,6 @@ unsigned int radix_tree_gang_lookup_tag_
|
||||
unsigned int max_items, unsigned int tag);
|
||||
int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
|
||||
|
||||
-static inline void radix_tree_preload_end(void)
|
||||
-{
|
||||
- preempt_enable();
|
||||
-}
|
||||
-
|
||||
int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
|
||||
int radix_tree_split(struct radix_tree_root *, unsigned long index,
|
||||
unsigned new_order);
|
||||
--- a/lib/radix-tree.c
|
||||
+++ b/lib/radix-tree.c
|
||||
@@ -38,7 +38,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
-
|
||||
+#include <linux/locallock.h>
|
||||
|
||||
/* Number of nodes in fully populated tree of given height */
|
||||
static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
|
||||
@@ -87,6 +87,7 @@ struct radix_tree_preload {
|
||||
struct radix_tree_node *nodes;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
|
||||
+static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock);
|
||||
|
||||
static inline struct radix_tree_node *entry_to_node(void *ptr)
|
||||
{
|
||||
@@ -405,12 +406,13 @@ radix_tree_node_alloc(gfp_t gfp_mask, st
|
||||
* succeed in getting a node here (and never reach
|
||||
* kmem_cache_alloc)
|
||||
*/
|
||||
- rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
+ rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
|
||||
if (rtp->nr) {
|
||||
ret = rtp->nodes;
|
||||
rtp->nodes = ret->parent;
|
||||
rtp->nr--;
|
||||
}
|
||||
+ put_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
|
||||
/*
|
||||
* Update the allocation stack trace as this is more useful
|
||||
* for debugging.
|
||||
@@ -476,14 +478,14 @@ static __must_check int __radix_tree_pre
|
||||
*/
|
||||
gfp_mask &= ~__GFP_ACCOUNT;
|
||||
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
while (rtp->nr < nr) {
|
||||
- preempt_enable();
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
|
||||
if (node == NULL)
|
||||
goto out;
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
rtp = this_cpu_ptr(&radix_tree_preloads);
|
||||
if (rtp->nr < nr) {
|
||||
node->parent = rtp->nodes;
|
||||
@@ -525,7 +527,7 @@ int radix_tree_maybe_preload(gfp_t gfp_m
|
||||
if (gfpflags_allow_blocking(gfp_mask))
|
||||
return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
|
||||
/* Preloading doesn't help anything with this gfp mask, skip it */
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(radix_tree_maybe_preload);
|
||||
@@ -563,7 +565,7 @@ int radix_tree_maybe_preload_order(gfp_t
|
||||
|
||||
/* Preloading doesn't help anything with this gfp mask, skip it */
|
||||
if (!gfpflags_allow_blocking(gfp_mask)) {
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -597,6 +599,12 @@ int radix_tree_maybe_preload_order(gfp_t
|
||||
return __radix_tree_preload(gfp_mask, nr_nodes);
|
||||
}
|
||||
|
||||
+void radix_tree_preload_end(void)
|
||||
+{
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(radix_tree_preload_end);
|
||||
+
|
||||
static unsigned radix_tree_load_root(const struct radix_tree_root *root,
|
||||
struct radix_tree_node **nodep, unsigned long *maxindex)
|
||||
{
|
||||
@@ -2102,10 +2110,16 @@ EXPORT_SYMBOL(radix_tree_tagged);
|
||||
void idr_preload(gfp_t gfp_mask)
|
||||
{
|
||||
if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
|
||||
- preempt_disable();
|
||||
+ local_lock(radix_tree_preloads_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(idr_preload);
|
||||
|
||||
+void idr_preload_end(void)
|
||||
+{
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(idr_preload_end);
|
||||
+
|
||||
int ida_pre_get(struct ida *ida, gfp_t gfp)
|
||||
{
|
||||
/*
|
||||
@@ -2114,7 +2128,7 @@ int ida_pre_get(struct ida *ida, gfp_t g
|
||||
* to return to the ida_pre_get() step.
|
||||
*/
|
||||
if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
|
||||
- preempt_enable();
|
||||
+ local_unlock(radix_tree_preloads_lock);
|
||||
|
||||
if (!this_cpu_read(ida_bitmap)) {
|
||||
struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
|
||||
@@ -0,0 +1,166 @@
|
||||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:34 -0500
|
||||
Subject: timers: Prepare for full preemption
|
||||
|
||||
When softirqs can be preempted we need to make sure that cancelling
|
||||
the timer from the active thread can not deadlock vs. a running timer
|
||||
callback. Add a waitqueue to resolve that.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/timer.h | 2 +-
|
||||
kernel/sched/core.c | 9 +++++++--
|
||||
kernel/time/timer.c | 45 +++++++++++++++++++++++++++++++++++++++++----
|
||||
3 files changed, 49 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/include/linux/timer.h
|
||||
+++ b/include/linux/timer.h
|
||||
@@ -172,7 +172,7 @@ extern void add_timer(struct timer_list
|
||||
|
||||
extern int try_to_del_timer_sync(struct timer_list *timer);
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
extern int del_timer_sync(struct timer_list *timer);
|
||||
#else
|
||||
# define del_timer_sync(t) del_timer(t)
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -496,11 +496,14 @@ void resched_cpu(int cpu)
|
||||
*/
|
||||
int get_nohz_timer_target(void)
|
||||
{
|
||||
- int i, cpu = smp_processor_id();
|
||||
+ int i, cpu;
|
||||
struct sched_domain *sd;
|
||||
|
||||
+ preempt_disable_rt();
|
||||
+ cpu = smp_processor_id();
|
||||
+
|
||||
if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
|
||||
- return cpu;
|
||||
+ goto preempt_en_rt;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd) {
|
||||
@@ -519,6 +522,8 @@ int get_nohz_timer_target(void)
|
||||
cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
+preempt_en_rt:
|
||||
+ preempt_enable_rt();
|
||||
return cpu;
|
||||
}
|
||||
|
||||
--- a/kernel/time/timer.c
|
||||
+++ b/kernel/time/timer.c
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compat.h>
|
||||
+#include <linux/swait.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
@@ -197,6 +198,9 @@ EXPORT_SYMBOL(jiffies_64);
|
||||
struct timer_base {
|
||||
raw_spinlock_t lock;
|
||||
struct timer_list *running_timer;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ struct swait_queue_head wait_for_running_timer;
|
||||
+#endif
|
||||
unsigned long clk;
|
||||
unsigned long next_expiry;
|
||||
unsigned int cpu;
|
||||
@@ -1178,6 +1182,33 @@ void add_timer_on(struct timer_list *tim
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_timer_on);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+/*
|
||||
+ * Wait for a running timer
|
||||
+ */
|
||||
+static void wait_for_running_timer(struct timer_list *timer)
|
||||
+{
|
||||
+ struct timer_base *base;
|
||||
+ u32 tf = timer->flags;
|
||||
+
|
||||
+ if (tf & TIMER_MIGRATING)
|
||||
+ return;
|
||||
+
|
||||
+ base = get_timer_base(tf);
|
||||
+ swait_event_exclusive(base->wait_for_running_timer,
|
||||
+ base->running_timer != timer);
|
||||
+}
|
||||
+
|
||||
+# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
|
||||
+#else
|
||||
+static inline void wait_for_running_timer(struct timer_list *timer)
|
||||
+{
|
||||
+ cpu_relax();
|
||||
+}
|
||||
+
|
||||
+# define wakeup_timer_waiters(b) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* del_timer - deactivate a timer.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -1233,7 +1264,7 @@ int try_to_del_timer_sync(struct timer_l
|
||||
}
|
||||
EXPORT_SYMBOL(try_to_del_timer_sync);
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
/**
|
||||
* del_timer_sync - deactivate a timer and wait for the handler to finish.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -1293,7 +1324,7 @@ int del_timer_sync(struct timer_list *ti
|
||||
int ret = try_to_del_timer_sync(timer);
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
- cpu_relax();
|
||||
+ wait_for_running_timer(timer);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(del_timer_sync);
|
||||
@@ -1354,13 +1385,16 @@ static void expire_timers(struct timer_b
|
||||
|
||||
fn = timer->function;
|
||||
|
||||
- if (timer->flags & TIMER_IRQSAFE) {
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) &&
|
||||
+ timer->flags & TIMER_IRQSAFE) {
|
||||
raw_spin_unlock(&base->lock);
|
||||
call_timer_fn(timer, fn);
|
||||
+ base->running_timer = NULL;
|
||||
raw_spin_lock(&base->lock);
|
||||
} else {
|
||||
raw_spin_unlock_irq(&base->lock);
|
||||
call_timer_fn(timer, fn);
|
||||
+ base->running_timer = NULL;
|
||||
raw_spin_lock_irq(&base->lock);
|
||||
}
|
||||
}
|
||||
@@ -1681,8 +1715,8 @@ static inline void __run_timers(struct t
|
||||
while (levels--)
|
||||
expire_timers(base, heads + levels);
|
||||
}
|
||||
- base->running_timer = NULL;
|
||||
raw_spin_unlock_irq(&base->lock);
|
||||
+ wakeup_timer_waiters(base);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1927,6 +1961,9 @@ static void __init init_timer_cpu(int cp
|
||||
base->cpu = cpu;
|
||||
raw_spin_lock_init(&base->lock);
|
||||
base->clk = jiffies;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ init_swait_queue_head(&base->wait_for_running_timer);
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
Subject: x86: kvm Require const tsc for RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 06 Nov 2011 12:26:18 +0100
|
||||
|
||||
Non constant TSC is a nightmare on bare metal already, but with
|
||||
virtualization it becomes a complete disaster because the workarounds
|
||||
are horrible latency wise. That's also a preliminary for running RT in
|
||||
a guest on top of a RT host.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/kvm/x86.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -6698,6 +6698,13 @@ int kvm_arch_init(void *opaque)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
+ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
r = kvm_mmu_module_init();
|
||||
if (r)
|
||||
goto out_free_percpu;
|
||||
@@ -0,0 +1,108 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 4 Oct 2017 10:24:23 +0200
|
||||
Subject: [PATCH] pci/switchtec: Don't use completion's wait queue
|
||||
|
||||
The poll callback is using completion's wait_queue_head_t member and
|
||||
puts it in poll_wait() so the poll() caller gets a wakeup after command
|
||||
completed. This does not work on RT because we don't have a
|
||||
wait_queue_head_t in our completion implementation. Nobody in tree does
|
||||
like that in tree so this is the only driver that breaks.
|
||||
|
||||
Instead of using the completion here is waitqueue with a status flag as
|
||||
suggested by Logan.
|
||||
|
||||
I don't have the HW so I have no idea if it works as expected, so please
|
||||
test it.
|
||||
|
||||
Cc: Kurt Schwemmer <kurt.schwemmer@microsemi.com>
|
||||
Cc: Logan Gunthorpe <logang@deltatee.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/pci/switch/switchtec.c | 22 +++++++++++++---------
|
||||
1 file changed, 13 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/drivers/pci/switch/switchtec.c
|
||||
+++ b/drivers/pci/switch/switchtec.c
|
||||
@@ -43,10 +43,11 @@ struct switchtec_user {
|
||||
|
||||
enum mrpc_state state;
|
||||
|
||||
- struct completion comp;
|
||||
+ wait_queue_head_t cmd_comp;
|
||||
struct kref kref;
|
||||
struct list_head list;
|
||||
|
||||
+ bool cmd_done;
|
||||
u32 cmd;
|
||||
u32 status;
|
||||
u32 return_code;
|
||||
@@ -68,7 +69,7 @@ static struct switchtec_user *stuser_cre
|
||||
stuser->stdev = stdev;
|
||||
kref_init(&stuser->kref);
|
||||
INIT_LIST_HEAD(&stuser->list);
|
||||
- init_completion(&stuser->comp);
|
||||
+ init_waitqueue_head(&stuser->cmd_comp);
|
||||
stuser->event_cnt = atomic_read(&stdev->event_cnt);
|
||||
|
||||
dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
|
||||
@@ -151,7 +152,7 @@ static int mrpc_queue_cmd(struct switcht
|
||||
kref_get(&stuser->kref);
|
||||
stuser->read_len = sizeof(stuser->data);
|
||||
stuser_set_state(stuser, MRPC_QUEUED);
|
||||
- init_completion(&stuser->comp);
|
||||
+ stuser->cmd_done = false;
|
||||
list_add_tail(&stuser->list, &stdev->mrpc_queue);
|
||||
|
||||
mrpc_cmd_submit(stdev);
|
||||
@@ -188,7 +189,8 @@ static void mrpc_complete_cmd(struct swi
|
||||
stuser->read_len);
|
||||
|
||||
out:
|
||||
- complete_all(&stuser->comp);
|
||||
+ stuser->cmd_done = true;
|
||||
+ wake_up_interruptible(&stuser->cmd_comp);
|
||||
list_del_init(&stuser->list);
|
||||
stuser_put(stuser);
|
||||
stdev->mrpc_busy = 0;
|
||||
@@ -458,10 +460,11 @@ static ssize_t switchtec_dev_read(struct
|
||||
mutex_unlock(&stdev->mrpc_mutex);
|
||||
|
||||
if (filp->f_flags & O_NONBLOCK) {
|
||||
- if (!try_wait_for_completion(&stuser->comp))
|
||||
+ if (!READ_ONCE(stuser->cmd_done))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
- rc = wait_for_completion_interruptible(&stuser->comp);
|
||||
+ rc = wait_event_interruptible(stuser->cmd_comp,
|
||||
+ stuser->cmd_done);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
@@ -509,7 +512,7 @@ static __poll_t switchtec_dev_poll(struc
|
||||
struct switchtec_dev *stdev = stuser->stdev;
|
||||
__poll_t ret = 0;
|
||||
|
||||
- poll_wait(filp, &stuser->comp.wait, wait);
|
||||
+ poll_wait(filp, &stuser->cmd_comp, wait);
|
||||
poll_wait(filp, &stdev->event_wq, wait);
|
||||
|
||||
if (lock_mutex_and_test_alive(stdev))
|
||||
@@ -517,7 +520,7 @@ static __poll_t switchtec_dev_poll(struc
|
||||
|
||||
mutex_unlock(&stdev->mrpc_mutex);
|
||||
|
||||
- if (try_wait_for_completion(&stuser->comp))
|
||||
+ if (READ_ONCE(stuser->cmd_done))
|
||||
ret |= EPOLLIN | EPOLLRDNORM;
|
||||
|
||||
if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
|
||||
@@ -1041,7 +1044,8 @@ static void stdev_kill(struct switchtec_
|
||||
|
||||
/* Wake up and kill any users waiting on an MRPC request */
|
||||
list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
|
||||
- complete_all(&stuser->comp);
|
||||
+ stuser->cmd_done = true;
|
||||
+ wake_up_interruptible(&stuser->cmd_comp);
|
||||
list_del_init(&stuser->list);
|
||||
stuser_put(stuser);
|
||||
}
|
||||
32
kernel/patches-4.19.x-rt/0094-wait.h-include-atomic.h.patch
Normal file
32
kernel/patches-4.19.x-rt/0094-wait.h-include-atomic.h.patch
Normal file
@@ -0,0 +1,32 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 28 Oct 2013 12:19:57 +0100
|
||||
Subject: wait.h: include atomic.h
|
||||
|
||||
| CC init/main.o
|
||||
|In file included from include/linux/mmzone.h:9:0,
|
||||
| from include/linux/gfp.h:4,
|
||||
| from include/linux/kmod.h:22,
|
||||
| from include/linux/module.h:13,
|
||||
| from init/main.c:15:
|
||||
|include/linux/wait.h: In function ‘wait_on_atomic_t’:
|
||||
|include/linux/wait.h:982:2: error: implicit declaration of function ‘atomic_read’ [-Werror=implicit-function-declaration]
|
||||
| if (atomic_read(val) == 0)
|
||||
| ^
|
||||
|
||||
This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/wait.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/include/linux/wait.h
|
||||
+++ b/include/linux/wait.h
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <asm/current.h>
|
||||
#include <uapi/linux/wait.h>
|
||||
+#include <linux/atomic.h>
|
||||
|
||||
typedef struct wait_queue_entry wait_queue_entry_t;
|
||||
|
||||
@@ -0,0 +1,231 @@
|
||||
From: Daniel Wagner <daniel.wagner@bmw-carit.de>
|
||||
Date: Fri, 11 Jul 2014 15:26:11 +0200
|
||||
Subject: work-simple: Simple work queue implemenation
|
||||
|
||||
Provides a framework for enqueuing callbacks from irq context
|
||||
PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
|
||||
|
||||
Bases on wait-simple.
|
||||
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
|
||||
---
|
||||
include/linux/swork.h | 24 ++++++
|
||||
kernel/sched/Makefile | 2
|
||||
kernel/sched/swork.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 198 insertions(+), 1 deletion(-)
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/swork.h
|
||||
@@ -0,0 +1,24 @@
|
||||
+#ifndef _LINUX_SWORK_H
|
||||
+#define _LINUX_SWORK_H
|
||||
+
|
||||
+#include <linux/list.h>
|
||||
+
|
||||
+struct swork_event {
|
||||
+ struct list_head item;
|
||||
+ unsigned long flags;
|
||||
+ void (*func)(struct swork_event *);
|
||||
+};
|
||||
+
|
||||
+static inline void INIT_SWORK(struct swork_event *event,
|
||||
+ void (*func)(struct swork_event *))
|
||||
+{
|
||||
+ event->flags = 0;
|
||||
+ event->func = func;
|
||||
+}
|
||||
+
|
||||
+bool swork_queue(struct swork_event *sev);
|
||||
+
|
||||
+int swork_get(void);
|
||||
+void swork_put(void);
|
||||
+
|
||||
+#endif /* _LINUX_SWORK_H */
|
||||
--- a/kernel/sched/Makefile
|
||||
+++ b/kernel/sched/Makefile
|
||||
@@ -18,7 +18,7 @@ endif
|
||||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle.o fair.o rt.o deadline.o
|
||||
-obj-y += wait.o wait_bit.o swait.o completion.o
|
||||
+obj-y += wait.o wait_bit.o swait.o swork.o completion.o
|
||||
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
||||
--- /dev/null
|
||||
+++ b/kernel/sched/swork.c
|
||||
@@ -0,0 +1,173 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
|
||||
+ *
|
||||
+ * Provides a framework for enqueuing callbacks from irq context
|
||||
+ * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/swait.h>
|
||||
+#include <linux/swork.h>
|
||||
+#include <linux/kthread.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/spinlock.h>
|
||||
+#include <linux/export.h>
|
||||
+
|
||||
+#define SWORK_EVENT_PENDING (1 << 0)
|
||||
+
|
||||
+static DEFINE_MUTEX(worker_mutex);
|
||||
+static struct sworker *glob_worker;
|
||||
+
|
||||
+struct sworker {
|
||||
+ struct list_head events;
|
||||
+ struct swait_queue_head wq;
|
||||
+
|
||||
+ raw_spinlock_t lock;
|
||||
+
|
||||
+ struct task_struct *task;
|
||||
+ int refs;
|
||||
+};
|
||||
+
|
||||
+static bool swork_readable(struct sworker *worker)
|
||||
+{
|
||||
+ bool r;
|
||||
+
|
||||
+ if (kthread_should_stop())
|
||||
+ return true;
|
||||
+
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ r = !list_empty(&worker->events);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+static int swork_kthread(void *arg)
|
||||
+{
|
||||
+ struct sworker *worker = arg;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ swait_event_interruptible_exclusive(worker->wq,
|
||||
+ swork_readable(worker));
|
||||
+ if (kthread_should_stop())
|
||||
+ break;
|
||||
+
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ while (!list_empty(&worker->events)) {
|
||||
+ struct swork_event *sev;
|
||||
+
|
||||
+ sev = list_first_entry(&worker->events,
|
||||
+ struct swork_event, item);
|
||||
+ list_del(&sev->item);
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+
|
||||
+ WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
|
||||
+ &sev->flags));
|
||||
+ sev->func(sev);
|
||||
+ raw_spin_lock_irq(&worker->lock);
|
||||
+ }
|
||||
+ raw_spin_unlock_irq(&worker->lock);
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct sworker *swork_create(void)
|
||||
+{
|
||||
+ struct sworker *worker;
|
||||
+
|
||||
+ worker = kzalloc(sizeof(*worker), GFP_KERNEL);
|
||||
+ if (!worker)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ INIT_LIST_HEAD(&worker->events);
|
||||
+ raw_spin_lock_init(&worker->lock);
|
||||
+ init_swait_queue_head(&worker->wq);
|
||||
+
|
||||
+ worker->task = kthread_run(swork_kthread, worker, "kswork");
|
||||
+ if (IS_ERR(worker->task)) {
|
||||
+ kfree(worker);
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+ }
|
||||
+
|
||||
+ return worker;
|
||||
+}
|
||||
+
|
||||
+static void swork_destroy(struct sworker *worker)
|
||||
+{
|
||||
+ kthread_stop(worker->task);
|
||||
+
|
||||
+ WARN_ON(!list_empty(&worker->events));
|
||||
+ kfree(worker);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * swork_queue - queue swork
|
||||
+ *
|
||||
+ * Returns %false if @work was already on a queue, %true otherwise.
|
||||
+ *
|
||||
+ * The work is queued and processed on a random CPU
|
||||
+ */
|
||||
+bool swork_queue(struct swork_event *sev)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
|
||||
+ return false;
|
||||
+
|
||||
+ raw_spin_lock_irqsave(&glob_worker->lock, flags);
|
||||
+ list_add_tail(&sev->item, &glob_worker->events);
|
||||
+ raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
|
||||
+
|
||||
+ swake_up_one(&glob_worker->wq);
|
||||
+ return true;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_queue);
|
||||
+
|
||||
+/**
|
||||
+ * swork_get - get an instance of the sworker
|
||||
+ *
|
||||
+ * Returns an negative error code if the initialization if the worker did not
|
||||
+ * work, %0 otherwise.
|
||||
+ *
|
||||
+ */
|
||||
+int swork_get(void)
|
||||
+{
|
||||
+ struct sworker *worker;
|
||||
+
|
||||
+ mutex_lock(&worker_mutex);
|
||||
+ if (!glob_worker) {
|
||||
+ worker = swork_create();
|
||||
+ if (IS_ERR(worker)) {
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ glob_worker = worker;
|
||||
+ }
|
||||
+
|
||||
+ glob_worker->refs++;
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_get);
|
||||
+
|
||||
+/**
|
||||
+ * swork_put - puts an instance of the sworker
|
||||
+ *
|
||||
+ * Will destroy the sworker thread. This function must not be called until all
|
||||
+ * queued events have been completed.
|
||||
+ */
|
||||
+void swork_put(void)
|
||||
+{
|
||||
+ mutex_lock(&worker_mutex);
|
||||
+
|
||||
+ glob_worker->refs--;
|
||||
+ if (glob_worker->refs > 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ swork_destroy(glob_worker);
|
||||
+ glob_worker = NULL;
|
||||
+out:
|
||||
+ mutex_unlock(&worker_mutex);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(swork_put);
|
||||
@@ -0,0 +1,29 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 10 Sep 2018 18:00:31 +0200
|
||||
Subject: [PATCH] work-simple: drop a shit statement in SWORK_EVENT_PENDING
|
||||
|
||||
Dan Carpenter reported
|
||||
| smatch warnings:
|
||||
|kernel/sched/swork.c:63 swork_kthread() warn: test_bit() takes a bit number
|
||||
|
||||
This is not a bug because we shift by zero (and use the same value in
|
||||
both places).
|
||||
Nevertheless I'm dropping that shift by zero to keep smatch quiet.
|
||||
|
||||
Cc: Daniel Wagner <daniel.wagner@siemens.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/swork.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/sched/swork.c
|
||||
+++ b/kernel/sched/swork.c
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
-#define SWORK_EVENT_PENDING (1 << 0)
|
||||
+#define SWORK_EVENT_PENDING 1
|
||||
|
||||
static DEFINE_MUTEX(worker_mutex);
|
||||
static struct sworker *glob_worker;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user