From a93295a7e118b5c58391906e47183775b1eb7cb1 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 15 Aug 2016 10:47:39 +0300 Subject: mtd: nand: omap2: Don't call dma_release_channel() if dma_request_chan() failed dma_request_chan() can fail returning an error pointer. In this case prevent calling dma_release_channel() to prevent a ERR_PTR() dereference. As error path can be called even with no DMA configuration, info->dma can be NULL so don't call dma_release_channel() for that case either. Fixes: de3bfc4a1616: ("mtd: nand: omap2: fix return value check in omap_nand_probe()") Reported-by: Dan Carpenter Signed-off-by: Roger Quadros Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/omap2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index a59361c36f40..5513bfd9cdc9 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -2169,7 +2169,7 @@ scan_tail: return 0; return_error: - if (info->dma) + if (!IS_ERR_OR_NULL(info->dma)) dma_release_channel(info->dma); if (nand_chip->ecc.priv) { nand_bch_free(nand_chip->ecc.priv); -- cgit v1.2.3 From f6d7c1b5598b6407c3f1da795dd54acf99c1990c Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 29 Aug 2016 07:45:49 +0000 Subject: mtd: nand: davinci: Reinitialize the HW ECC engine in 4bit hwctl This fixes subpage writes when using 4-bit HW ECC. There has been numerous reports about ECC errors with devices using this driver for a while. Also the 4-bit ECC has been reported as broken with subpages in [1] and with 16 bits NANDs in the driver and in mach* board files both in mainline and in the vendor BSPs. What I saw with 4-bit ECC on a 16bits NAND (on an LCDK) which got me to try reinitializing the ECC engine: - R/W on whole pages properly generates/checks RS code - try writing the 1st subpage only of a blank page, the subpage is well written and the RS code properly generated, re-reading the same page the HW detects some ECC error, reading the same page again no ECC error is detected Note that the ECC engine is already reinitialized in the 1-bit case. Tested on my LCDK with UBI+UBIFS using subpages. This could potentially get rid of the issue workarounded in [1]. [1] 28c015a9daab ("mtd: davinci-nand: disable subpage write for keystone-nand") Fixes: 6a4123e581b3 ("mtd: nand: davinci_nand, 4-bit ECC for smallpage") Cc: Signed-off-by: Karl Beldan Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/davinci_nand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index cc07ba0f044d..27fa8b87cd5f 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(struct mtd_info *mtd, int mode) unsigned long flags; u32 val; + /* Reset ECC hardware */ + davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET); + spin_lock_irqsave(&davinci_nand_lock, flags); /* Start 4-bit ECC calculation for read/write */ -- cgit v1.2.3 From 43ba588346455dcc984dc98a49af1c2eb1e9aa75 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Sep 2016 19:25:37 -0700 Subject: Input: silead_gsl1680 - document firmware-name, fix implementation The driver has supported touchscreen-fw-name to specify the firmware to load since it has been merged, but this was omitted from the dt-binding documentation. During review of adding touchscreen-fw-name to the binding documentation it was brought up that there is a standard property name called "firmware-name" for this, which should be used. Since there are no users of touchscreen-fw-name yet, this commit adds documentation of "firmware-name" to the dt-binding documentation and switches the driver over to use this. This commit also makes the driver add a "silead/" prefix to the firmware name from dt before calling request_firmware. That the firmware files are stored under /lib/firmware/silead under Linux is an implementation detail and does not belong in devicetree. Signed-off-by: Hans de Goede Acked-by: Rob Herring Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/touchscreen/silead_gsl1680.txt | 1 + drivers/input/touchscreen/silead.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt index 1112e0d794e1..820fee4b77b6 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt @@ -13,6 +13,7 @@ Required properties: - touchscreen-size-y : See touchscreen.txt Optional properties: +- firmware-name : File basename (string) for board specific firmware - touchscreen-inverted-x : See touchscreen.txt - touchscreen-inverted-y : See touchscreen.txt - touchscreen-swapped-x-y : See touchscreen.txt diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index b2744a64e933..c7ab116a16b3 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -390,9 +390,10 @@ static void silead_ts_read_props(struct i2c_client *client) data->max_fingers = 5; /* Most devices handle up-to 5 fingers */ } - error = device_property_read_string(dev, "touchscreen-fw-name", &str); + error = device_property_read_string(dev, "firmware-name", &str); if (!error) - snprintf(data->fw_name, sizeof(data->fw_name), "%s", str); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s", str); else dev_dbg(dev, "Firmware file name read error. Using default."); } -- cgit v1.2.3 From 4af2ff91ec3f42b538a65cf12df5f9faf6aaa914 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Sep 2016 19:32:14 -0700 Subject: Input: silead_gsl1680 - use "silead/" prefix for firmware loading The silead touch-controller ICs use a different firmware per digitizer / tablet model. So there are going to be quite a few of then and they really should be under a separate subdir. This commit prefixes the default firmware names with "silead/" just like we are already doing for devicetree specified firmware names. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index c7ab116a16b3..f502c8488be8 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -411,14 +411,14 @@ static int silead_ts_set_default_fw_name(struct silead_ts_data *data, if (!acpi_id) return -ENODEV; - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", - acpi_id->id); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", acpi_id->id); for (i = 0; i < strlen(data->fw_name); i++) data->fw_name[i] = tolower(data->fw_name[i]); } else { - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", - id->name); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", id->name); } return 0; @@ -427,7 +427,8 @@ static int silead_ts_set_default_fw_name(struct silead_ts_data *data, static int silead_ts_set_default_fw_name(struct silead_ts_data *data, const struct i2c_device_id *id) { - snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name); + snprintf(data->fw_name, sizeof(data->fw_name), + "silead/%s.fw", id->name); return 0; } #endif -- cgit v1.2.3 From e3b23148fd8af277fd55068183ed3ce4c8f51aa5 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Fri, 12 Aug 2016 08:36:54 +0530 Subject: MIPS: ath79: Fix test for error return of clk_register_fixed_factor(). clk_register_fixed_factor returns an ERR_PTR in case of an error and should have an IS_ERR check instead of a null check. The Coccinelle semantic patch used to find this issue is as follows: @@ expression e; statement S; @@ *e = clk_register_fixed_factor(...); if (!e) S Signed-off-by: Amitoj Kaur Chawla Cc: julia.lawall@lip6.fr Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13894/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 2e7378467c5c..cc3a1e33a600 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -96,7 +96,7 @@ static struct clk * __init ath79_reg_ffclk(const char *name, struct clk *clk; clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div); - if (!clk) + if (IS_ERR(clk)) panic("failed to allocate %s clock structure", name); return clk; -- cgit v1.2.3 From 2809328f6ef2139115665dda1b83ee9303c52431 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 11 Aug 2016 09:02:30 +0200 Subject: MIPS: uprobes: fix incorrect uprobe brk handling When a uprobe-replacement breakpoint instruction is handled, a notifier is called with DIE_UPROBE argument, but a corresponding exception notify handler for MIPS attempts to handle DIE_BREAK instead. As a result the breakpoint instruction isn't handled by the uprobe code and the probed application terminates with SIGTRAP. Fix this by changing arch_uprobe_exception_notify code to handle DIE_UPROBE as a pre-singlestep condition instead of DIE_BREAK. Signed-off-by: Marcin Nowakowski Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13884/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 8452d933a645..1149b30c9aeb 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -222,7 +222,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, return NOTIFY_DONE; switch (val) { - case DIE_BREAK: + case DIE_UPROBE: if (uprobe_pre_sstep_notifier(regs)) return NOTIFY_STOP; break; -- cgit v1.2.3 From 58cae9b0f0c1d9cc55de018d927e65549b24cf5b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Aug 2016 13:21:48 +0100 Subject: MIPS: Fix memory regions reaching top of physical Memory regions added with add_memory_region() at the top of the physical address space will have their end address overflow to 0. This causes them to be rejected as invalid, and would cause various other issues later on. This causes issues on Malta and Boston platforms when wanting to use all 2GB of RAM on a 32-bit kernel, either via highmem (using physical addresses 0x90000000..0xFFFFFFFF), or with the Malta Enhanced Virtual Addressing (EVA) layout which exposes the whole 0x80000000..0xFFFFFFFF physical address range to kernel mode at 0x00000000..0x7FFFFFFF. Due to the abundance of these non-overflow assumptions and the fact that memblock already avoids the arithmetic overflow by limiting the size of new memory regions without the arch code knowing it (in particular mem_init_free_highmem() will trigger a page dump due to nonzero mapcount on the last page), it is simpler and safer to just limit the size of the region in a similar way to memblock but at the arch level to allow most of the RAM to be used without arithmetic overflows. Therefore we detect this case specifically and reduce the size of the region slightly to avoid the arithmetic overflows and cause the last page to be ignored. Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13857/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 36cf8d65c47d..3be0e6ba2797 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -87,6 +87,13 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) int x = boot_mem_map.nr_map; int i; + /* + * If the region reaches the top of the physical address space, adjust + * the size slightly so that (start + size) doesn't overflow + */ + if (start + size - 1 == (phys_addr_t)ULLONG_MAX) + --size; + /* Sanity check */ if (start + size < start) { pr_warn("Trying to add an invalid memory region, skipped\n"); -- cgit v1.2.3 From ac7e385f2bf1c39615cf58f7e58246fdd9da5bb9 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Aug 2016 13:21:49 +0100 Subject: MIPS: MAAR: Fix address alignment The alignment of MIPS MAAR region addresses isn't quite right. - It rounds an already 64 KiB aligned start address up to the next 64 KiB boundary, e.g. 0x80000000 is rounded up to 0x80010000. - It assumes the end address is already on a 64 KiB boundary and doesn't round it down. Should that not be the case it will hit the second BUG_ON() in write_maar_pair(). Both cases are addressed by rounding up and down to 64 KiB boundaries in the more traditional way of adding 0xffff (for rounding up) and masking off the low 16 bits. Signed-off-by: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13858/ Signed-off-by: Ralf Baechle --- arch/mips/mm/init.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index a5509e7dcad2..2c3749d98f04 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -261,7 +261,6 @@ unsigned __weak platform_maar_init(unsigned num_pairs) { struct maar_config cfg[BOOT_MEM_MAP_MAX]; unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; for (i = 0; i < boot_mem_map.nr_map; i++) { switch (boot_mem_map.map[i].type) { @@ -272,14 +271,14 @@ unsigned __weak platform_maar_init(unsigned num_pairs) continue; } - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - + /* Round lower up */ cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; + cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff; - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; + /* Round upper down */ + cfg[num_cfg].upper = boot_mem_map.map[i].addr + + boot_mem_map.map[i].size; + cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1; cfg[num_cfg].attrs = MIPS_MAAR_S; num_cfg++; -- cgit v1.2.3 From b03c1e3b8eed9026733c473071d1f528358a0e50 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 12 Sep 2016 10:58:06 +0100 Subject: MIPS: Remove compact branch policy Kconfig entries Commit c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed") added Kconfig entries allowing for the compact branch policy used by the compiler for MIPSr6 kernels to be specified. This can be useful for debugging, particularly in systems where compact branches have recently been introduced. Unfortunately mainline gcc 5.x supports MIPSr6 but not the -mcompact-branches compiler flag, leading to MIPSr6 kernels failing to build with gcc 5.x with errors such as: mipsel-linux-gnu-gcc: error: unrecognized command line option '-mcompact-branches=optimal' make[2]: *** [kernel/bounds.s] Error 1 Fixing this by hiding the Kconfig entry behind another seems to be more hassle than it's worth, as MIPSr6 & compact branches have been around for a while now and if policy does need to be set for debug it can be done easily enough with KCFLAGS. Therefore remove the compact branch policy Kconfig entries & their handling in the Makefile. This reverts commit c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed"). Signed-off-by: Paul Burton Reported-by: kbuild test robot Fixes: c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed") Cc: stable # v4.4+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14241/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig.debug | 36 ------------------------------------ arch/mips/Makefile | 4 ---- 2 files changed, 40 deletions(-) diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index f0e314ceb8ba..7f975b20b20c 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -113,42 +113,6 @@ config SPINLOCK_TEST help Add several files to the debugfs to test spinlock speed. -if CPU_MIPSR6 - -choice - prompt "Compact branch policy" - default MIPS_COMPACT_BRANCHES_OPTIMAL - -config MIPS_COMPACT_BRANCHES_NEVER - bool "Never (force delay slot branches)" - help - Pass the -mcompact-branches=never flag to the compiler in order to - force it to always emit branches with delay slots, and make no use - of the compact branch instructions introduced by MIPSr6. This is - useful if you suspect there may be an issue with compact branches in - either the compiler or the CPU. - -config MIPS_COMPACT_BRANCHES_OPTIMAL - bool "Optimal (use where beneficial)" - help - Pass the -mcompact-branches=optimal flag to the compiler in order for - it to make use of compact branch instructions where it deems them - beneficial, and use branches with delay slots elsewhere. This is the - default compiler behaviour, and should be used unless you have a - reason to choose otherwise. - -config MIPS_COMPACT_BRANCHES_ALWAYS - bool "Always (force compact branches)" - help - Pass the -mcompact-branches=always flag to the compiler in order to - force it to always emit compact branches, making no use of branch - instructions with delay slots. This can result in more compact code - which may be beneficial in some scenarios. - -endchoice - -endif # CPU_MIPSR6 - config SCACHE_DEBUGFS bool "L2 cache debugfs entries" depends on DEBUG_FS diff --git a/arch/mips/Makefile b/arch/mips/Makefile index efd7a9dc93c4..598ab2930fce 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -203,10 +203,6 @@ endif toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt) cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER) += -mcompact-branches=never -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL) += -mcompact-branches=optimal -cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS) += -mcompact-branches=always - # # Firmware support # -- cgit v1.2.3 From 951c39cd3bc0aedf67fbd8fb4b9380287e6205d1 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 5 Sep 2016 15:43:40 +0100 Subject: MIPS: paravirt: Fix undefined reference to smp_bootstrap If the paravirt machine is compiles without CONFIG_SMP, the following linker error occurs arch/mips/kernel/head.o: In function `kernel_entry': (.ref.text+0x10): undefined reference to `smp_bootstrap' due to the kernel entry macro always including SMP startup code. Wrap this code in CONFIG_SMP to fix the error. Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 3.16+ Patchwork: https://patchwork.linux-mips.org/patch/14212/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-paravirt/kernel-entry-init.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h index 2f82bfa3a773..c9f5769dfc8f 100644 --- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h @@ -11,11 +11,13 @@ #define CP0_EBASE $15, 1 .macro kernel_entry_setup +#ifdef CONFIG_SMP mfc0 t0, CP0_EBASE andi t0, t0, 0x3ff # CPUNum beqz t0, 1f # CPUs other than zero goto smp_bootstrap j smp_bootstrap +#endif /* CONFIG_SMP */ 1: .endm -- cgit v1.2.3 From 3cbc6fc9c99f1709203711f125bc3b79487aba06 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 5 Sep 2016 08:48:03 +0800 Subject: MIPS: Add a missing ".set pop" in an early commit Commit 842dfc11ea9a21 ("MIPS: Fix build with binutils 2.24.51+") missing a ".set pop" in macro fpu_restore_16even, so add it. Signed-off-by: Huacai Chen Acked-by: Manuel Lauss Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 3.18+ Patchwork: https://patchwork.linux-mips.org/patch/14210/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/asmmacro.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 56584a659183..83054f79f72a 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -157,6 +157,7 @@ ldc1 $f28, THREAD_FPR28(\thread) ldc1 $f30, THREAD_FPR30(\thread) ctc1 \tmp, fcr31 + .set pop .endm .macro fpu_restore_16odd thread -- cgit v1.2.3 From 28b89b9e6f7b6c8fef7b3af39828722bca20cfee Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Sun, 11 Sep 2016 21:14:58 -0700 Subject: cpuset: handle race between CPU hotplug and cpuset_hotplug_work A discrepancy between cpu_online_mask and cpuset's effective_cpus mask is inevitable during hotplug since cpuset defers updating of effective_cpus mask using a workqueue, during which time nothing prevents the system from more hotplug operations. For that reason guarantee_online_cpus() walks up the cpuset hierarchy until it finds an intersection under the assumption that top cpuset's effective_cpus mask intersects with cpu_online_mask even with such a race occurring. However a sequence of CPU hotplugs can open a time window, during which none of the effective CPUs in the top cpuset intersect with cpu_online_mask. For example when there are 4 possible CPUs 0-3 and only CPU0 is online: ======================== =========================== cpu_online_mask top_cpuset.effective_cpus ======================== =========================== echo 1 > cpu2/online. CPU hotplug notifier woke up hotplug work but not yet scheduled. [0,2] [0] echo 0 > cpu0/online. The workqueue is still runnable. [2] [0] ======================== =========================== Now there is no intersection between cpu_online_mask and top_cpuset.effective_cpus. Thus invoking sys_sched_setaffinity() at this moment can cause following: Unable to handle kernel NULL pointer dereference at virtual address 000000d0 ------------[ cut here ]------------ Kernel BUG at ffffffc0001389b0 [verbose debug info unavailable] Internal error: Oops - BUG: 96000005 [#1] PREEMPT SMP Modules linked in: CPU: 2 PID: 1420 Comm: taskset Tainted: G W 4.4.8+ #98 task: ffffffc06a5c4880 ti: ffffffc06e124000 task.ti: ffffffc06e124000 PC is at guarantee_online_cpus+0x2c/0x58 LR is at cpuset_cpus_allowed+0x4c/0x6c Process taskset (pid: 1420, stack limit = 0xffffffc06e124020) Call trace: [] guarantee_online_cpus+0x2c/0x58 [] cpuset_cpus_allowed+0x4c/0x6c [] sched_setaffinity+0xc0/0x1ac [] SyS_sched_setaffinity+0x98/0xac [] el0_svc_naked+0x24/0x28 The top cpuset's effective_cpus are guaranteed to be identical to cpu_online_mask eventually. Hence fall back to cpu_online_mask when there is no intersection between top cpuset's effective_cpus and cpu_online_mask. Signed-off-by: Joonwoo Park Acked-by: Li Zefan Cc: Tejun Heo Cc: cgroups@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: # 3.17+ Signed-off-by: Tejun Heo --- kernel/cpuset.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c27e53326bef..c08585ad996b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = { /* * Return in pmask the portion of a cpusets's cpus_allowed that * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online cpus. The top - * cpuset always has some cpus online. + * until we find one that does have some online cpus. * * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. @@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { cs = parent_cs(cs); + if (unlikely(!cs)) { + /* + * The top cpuset doesn't have any online cpu as a + * consequence of a race between cpuset_hotplug_work + * and cpu hotplug notifier. But we know the top + * cpuset's effective_cpus is on its way to to be + * identical to cpu_online_mask. + */ + cpumask_copy(pmask, cpu_online_mask); + return; + } + } cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } -- cgit v1.2.3 From 801f823dc2d585253f2f8dd17c4a46d9da560579 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 5 Sep 2016 15:24:54 +0100 Subject: MIPS: c-r4k: Fix size calc when avoiding IPIs for small icache flushes Commit f70ddc07b637 ("MIPS: c-r4k: Avoid small flush_icache_range SMP calls") adds checks to force use of hit-type cache ops for small icache flushes where they are globalised & index-type cache ops aren't, in order to avoid the overhead of IPIs in those cases. However it calculated the size of the region being flushed incorrectly, subtracting the end address from the start address rather than the reverse. This would have led to an overflow with size wrapping round to some large value, and likely to the special case for avoiding IPIs not actually being hit. Signed-off-by: Paul Burton Cc: James Hogan Fixes: f70ddc07b637 ("MIPS: c-r4k: Avoid small flush_icache_range SMP calls") Reviewed-by: James Hogan Reviewed-by: Florian Fainelli Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14211/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index cd72805b64a7..fa7d8d3790bf 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -800,7 +800,7 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end) * If address-based cache ops don't require an SMP call, then * use them exclusively for small flushes. */ - size = start - end; + size = end - start; cache_size = icache_size; if (!cpu_has_ic_fills_f_dc) { size *= 2; -- cgit v1.2.3 From 42857cf512cb34c2c8cb50f1e766689d979d64e0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Thu, 15 Sep 2016 12:20:12 -0400 Subject: configfs: Return -EFBIG from configfs_write_bin_file. The check for writing more than cb_max_size bytes does not 'goto out' so it is a no-op which allows users to vmalloc an arbitrary amount. Fixes: 03607ace807b ("configfs: implement binary attributes") Cc: stable@kernel.org Signed-off-by: Phil Turnbull Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index c30cf49b69d2..2c6312db8516 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -333,6 +333,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf, if (bin_attr->cb_max_size && *ppos + count > bin_attr->cb_max_size) { len = -EFBIG; + goto out; } tbuf = vmalloc(*ppos + count); -- cgit v1.2.3 From 8a15b81741879fa89601b03c0e50b0d780d65bc0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 16 Sep 2016 13:02:37 +0000 Subject: cpuset: fix non static symbol warning Fixes the following sparse warning: kernel/cpuset.c:2088:6: warning: symbol 'cpuset_fork' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Tejun Heo --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c08585ad996b..2b4c20ab5bbe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2085,7 +2085,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) * which could have been changed by cpuset just after it inherits the * state from the parent and before it sits on the cgroup's task list. */ -void cpuset_fork(struct task_struct *task) +static void cpuset_fork(struct task_struct *task) { if (task_css_is_root(task, cpuset_cgrp_id)) return; -- cgit v1.2.3 From 1984e075915cbae65336a99b1879865080d8e55e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Sep 2016 09:49:27 +0100 Subject: genirq: Skip chained interrupt trigger setup if type is IRQ_TYPE_NONE There is no point in trying to configure the trigger of a chained interrupt if no trigger information has been configured. At best this is ignored, and at the worse this confuses the underlying irqchip (which is likely not to handle such a thing), and unnecessarily alarms the user. Only apply the configuration if type is not IRQ_TYPE_NONE. Fixes: 1e12c4a9393b ("genirq: Correctly configure the trigger on chained interrupts") Reported-and-tested-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/CAMuHMdVW1eTn20=EtYcJ8hkVwohaSuH_yQXrY2MGBEvZ8fpFOg@mail.gmail.com Link: http://lkml.kernel.org/r/1474274967-15984-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 637389088b3f..26ba5654d9d5 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -820,6 +820,8 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, desc->name = name; if (handle != handle_bad_irq && is_chained) { + unsigned int type = irqd_get_trigger_type(&desc->irq_data); + /* * We're about to start this interrupt immediately, * hence the need to set the trigger configuration. @@ -828,8 +830,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, * chained interrupt. Reset it immediately because we * do know better. */ - __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data)); - desc->handle_irq = handle; + if (type != IRQ_TYPE_NONE) { + __irq_set_trigger(desc, type); + desc->handle_irq = handle; + } irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); -- cgit v1.2.3 From 7a353289925f01cb188ebc6fc4f4a33456b7de44 Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Mon, 19 Sep 2016 10:53:25 +0800 Subject: mtd: nand: fix generating over-boundary ECC data when writing When mtk_ecc_encode() is writing the ECC parity data to the OOB region,because each register is 4 bytes in length,but the len's unit is in bytes,the operation in the for loop will cross the ECC's boundary. Signed-off-by: RogerCC Lin Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device") Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_ecc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c index 25a4fbd4d24a..d54f666417e1 100644 --- a/drivers/mtd/nand/mtk_ecc.c +++ b/drivers/mtd/nand/mtk_ecc.c @@ -366,7 +366,8 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, u8 *data, u32 bytes) { dma_addr_t addr; - u32 *p, len, i; + u8 *p; + u32 len, i, val; int ret = 0; addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE); @@ -392,11 +393,14 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */ len = (config->strength * ECC_PARITY_BITS + 7) >> 3; - p = (u32 *)(data + bytes); + p = data + bytes; /* write the parity bytes generated by the ECC back to the OOB region */ - for (i = 0; i < len; i++) - p[i] = readl(ecc->regs + ECC_ENCPAR(i)); + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = readl(ecc->regs + ECC_ENCPAR(i / 4)); + p[i] = (val >> ((i % 4) * 8)) & 0xff; + } timeout: dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE); -- cgit v1.2.3 From 559e58e7ed2dadc310f174e609ead8a3e8acfc4e Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Mon, 19 Sep 2016 10:53:26 +0800 Subject: mtd: nand: fix chances to create incomplete ECC data when writing When mtk_nfc_do_write_page() comparing the sector number,because the sector number field is at the 12th-bit position of NFI_BYTELEN register,the masked register should be shifted 12 bits before being compared.The result of this bug may cause the second subpage has incomplete ECC parity bytes. Signed-off-by: RogerCC Lin Fixes: 1d6b1e464950 ("mtd: mediatek: driver for MTK Smart Device") Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_nand.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index ddaa2acb9dd7..5223a2182ee4 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -93,6 +93,9 @@ #define NFI_FSM_MASK (0xf << 16) #define NFI_ADDRCNTR (0x70) #define CNTR_MASK GENMASK(16, 12) +#define ADDRCNTR_SEC_SHIFT (12) +#define ADDRCNTR_SEC(val) \ + (((val) & CNTR_MASK) >> ADDRCNTR_SEC_SHIFT) #define NFI_STRADDR (0x80) #define NFI_BYTELEN (0x84) #define NFI_CSEL (0x90) @@ -699,7 +702,7 @@ static int mtk_nfc_do_write_page(struct mtd_info *mtd, struct nand_chip *chip, } ret = readl_poll_timeout_atomic(nfc->regs + NFI_ADDRCNTR, reg, - (reg & CNTR_MASK) >= chip->ecc.steps, + ADDRCNTR_SEC(reg) >= chip->ecc.steps, 10, MTK_TIMEOUT); if (ret) dev_err(dev, "hwecc write timeout\n"); @@ -902,7 +905,7 @@ static int mtk_nfc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, dev_warn(nfc->dev, "read ahb/dma done timeout\n"); rc = readl_poll_timeout_atomic(nfc->regs + NFI_BYTELEN, reg, - (reg & CNTR_MASK) >= sectors, 10, + ADDRCNTR_SEC(reg) >= sectors, 10, MTK_TIMEOUT); if (rc < 0) { dev_err(nfc->dev, "subpage done timeout\n"); -- cgit v1.2.3 From 38178e7b88dcbe1ab384f27a7370074e774dda81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= Date: Mon, 19 Sep 2016 11:09:40 +0200 Subject: mtd: nand: mxc: fix obiwan error in mxc_nand_v[12]_ooblayout_free() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") introduced a regression accessing the OOB area from the mxc_nand driver due to an Obiwan error in the mxc_nand_v[12]_ooblayout_free() functions. They report a bogus oobregion { 64, 7 } which leads to errors accessing bogus data when reading the oob area. Prior to the commit the mtd-oobtest module could be run without any errors. With the offending commit, this test fails with results like: |Running mtd-oobtest | |================================================= |mtd_oobtest: MTD device: 5 |mtd_oobtest: MTD device size 524288, eraseblock size 131072, page size 2048, count of eraseblocks 4, pages per eraseblock 64, OOB size 64 |mtd_test: scanning for bad eraseblocks |mtd_test: scanned 4 eraseblocks, 0 are bad |mtd_oobtest: test 1 of 5 |mtd_oobtest: writing OOBs of whole device |mtd_oobtest: written up to eraseblock 0 |mtd_oobtest: written 4 eraseblocks |mtd_oobtest: verifying all eraseblocks |mtd_oobtest: error @addr[0x0:0x19] 0x9a -> 0x78 diff 0xe2 |mtd_oobtest: error @addr[0x0:0x1a] 0xcc -> 0x0 diff 0xcc |mtd_oobtest: error @addr[0x0:0x1b] 0xe0 -> 0x85 diff 0x65 |mtd_oobtest: error @addr[0x0:0x1c] 0x60 -> 0x62 diff 0x2 |mtd_oobtest: error @addr[0x0:0x1d] 0x69 -> 0x45 diff 0x2c |mtd_oobtest: error @addr[0x0:0x1e] 0xcd -> 0xa0 diff 0x6d |mtd_oobtest: error @addr[0x0:0x1f] 0xf2 -> 0x60 diff 0x92 |mtd_oobtest: error: verify failed at 0x0 [...] Signed-off-by: Lothar Waßmann Fixes: a894cf6c5a82 ("mtd: nand: mxc: switch to mtd_ooblayout_ops") Cc: Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mxc_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 5173fadc9a4e..57cbe2b83849 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -943,7 +943,7 @@ static int mxc_v2_ooblayout_free(struct mtd_info *mtd, int section, struct nand_chip *nand_chip = mtd_to_nand(mtd); int stepsize = nand_chip->ecc.bytes == 9 ? 16 : 26; - if (section > nand_chip->ecc.steps) + if (section >= nand_chip->ecc.steps) return -ERANGE; if (!section) { -- cgit v1.2.3 From b244614a60ab7ce54c12a9cbe15cfbf8d79d0967 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Wed, 31 Aug 2016 12:33:23 +0200 Subject: MIPS: Avoid a BUG warning during prctl(PR_SET_FP_MODE, ...) cpu_has_fpu macro uses smp_processor_id() and is currently executed with preemption enabled, that triggers the warning at runtime. It is assumed throughout the kernel that if any CPU has an FPU, then all CPUs would have an FPU as well, so it is safe to perform the check with preemption enabled - change the code to use raw_ variant of the check to avoid the warning. Signed-off-by: Marcin Nowakowski Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/14125/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 7429ad09fbe3..d2d061520a23 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -605,14 +605,14 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) return -EOPNOTSUPP; /* Avoid inadvertently triggering emulation */ - if ((value & PR_FP_MODE_FR) && cpu_has_fpu && - !(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && + !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) return -EOPNOTSUPP; - if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre) + if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre) return -EOPNOTSUPP; /* FR = 0 not supported in MIPS R6 */ - if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6) + if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6) return -EOPNOTSUPP; /* Proceed with the mode switch */ -- cgit v1.2.3 From 3312eca519ba3b68b1966705e27b9dd9134f092c Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 23 Aug 2016 01:07:35 +0300 Subject: MIPS: Octeon: mangle-port: fix build failure with VDSO code Commit 1685ddbe35cd ("MIPS: Octeon: Changes to support readq()/writeq() usage.") added bitwise shift operations that assume that unsigned long is always 64-bits. This broke the build of VDSO code, as it gets compiled also in "faked" 32-bit mode. Althought the failing inline functions are never executed in 32-bit mode, they still need to pass the compilation. Fix by using 64-bit types explicitly. The patch fixes the following build failure: CC arch/mips/vdso/gettimeofday-o32.o In file included from los/git/devel/linux/arch/mips/include/asm/io.h:32:0, from los/git/devel/linux/arch/mips/include/asm/page.h:194, from los/git/devel/linux/arch/mips/vdso/vdso.h:26, from los/git/devel/linux/arch/mips/vdso/gettimeofday.c:11: los/git/devel/linux/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h: In function '__should_swizzle_bits': los/git/devel/linux/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h:19:40: error: right shift count >= width of type [-Werror=shift-count-overflow] unsigned long did = ((unsigned long)a >> 40) & 0xff; ^~ Fixes: 1685ddbe35cd ("MIPS: Octeon: Changes to support readq()/writeq() usage.") Signed-off-by: Aaro Koskinen Acked-by: David Daney Cc: David Daney Cc: Steven J. Hill Cc: Alex Smith Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14039/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-cavium-octeon/mangle-port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h index 0cf5ac1f7245..8ff2cbdf2c3e 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h +++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h @@ -15,8 +15,8 @@ static inline bool __should_swizzle_bits(volatile void *a) { extern const bool octeon_should_swizzle_table[]; + u64 did = ((u64)(uintptr_t)a >> 40) & 0xff; - unsigned long did = ((unsigned long)a >> 40) & 0xff; return octeon_should_swizzle_table[did]; } @@ -29,7 +29,7 @@ static inline bool __should_swizzle_bits(volatile void *a) #define __should_swizzle_bits(a) false -static inline bool __should_swizzle_addr(unsigned long p) +static inline bool __should_swizzle_addr(u64 p) { /* boot bus? */ return ((p >> 40) & 0xff) == 0; -- cgit v1.2.3 From 8074d7829595b7c86beba914c04ed5839cf4b3d6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 23 Aug 2016 21:39:43 +0300 Subject: MIPS: Octeon: Fix platform bus probing Commit 44a7185c2ae6 ("of/platform: Add common method to populate default bus") added new arch_initcall of_platform_default_populate_init() that will override device_initcall octeon_publish_devices(). This broke many OCTEON boards as important devices are not getting probed anymore (e.g. on EdgeRouter Lite the USB mass storage/rootfs is missing). Fix by changing octeon_publish_devices() to arch_initcall. Fixes: 44a7185c2ae6 ("of/platform: Add common method to populate default bus") Signed-off-by: Aaro Koskinen Acked-by: Rob Herring Cc: David Daney Cc: Kefeng Wang Cc: linux-mips@linux-mips.org Cc: devicetree@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14041/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index b31fbc9d6eae..37a932d9148c 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -1059,7 +1059,7 @@ static int __init octeon_publish_devices(void) { return of_platform_bus_probe(NULL, octeon_ids, NULL); } -device_initcall(octeon_publish_devices); +arch_initcall(octeon_publish_devices); MODULE_AUTHOR("David Daney "); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 480b6837aa579991c6acc113bccf838e6a90843c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 19 Sep 2016 20:19:00 +1000 Subject: nvdimm: fix PHYS_PFN/PFN_PHYS mixup nd_activate_region() iomaps any hint addresses required when activating a region. To prevent duplicate mappings it checks the PFN of the hint to be mapped against the PFNs of the already mapped hints. Unfortunately it doesn't convert the PFN back into a physical address before passing it to devm_nvdimm_ioremap(). Instead it applies PHYS_PFN a second time which ends about as well as you would imagine. Signed-off-by: Oliver O'Halloran Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e8d5ba7b29af..4eef88eb5144 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -57,7 +57,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, ndrd->flush_wpq[dimm][j] & PAGE_MASK); else flush_page = devm_nvdimm_ioremap(dev, - PHYS_PFN(pfn), PAGE_SIZE); + PFN_PHYS(pfn), PAGE_SIZE); if (!flush_page) return -ENXIO; ndrd->flush_wpq[dimm][i] = flush_page -- cgit v1.2.3 From 08bccf43627e1972035c1fed5b4f570bdbde1b1e Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Fri, 2 Sep 2016 10:13:21 +0200 Subject: MIPS: Select HAVE_REGS_AND_STACK_ACCESS_API Add lost Kconfig symbol. This should have been part of 40e084a506eb ('MIPS: Add uprobes support.'). Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Signed-off-by: Marcin Nowakowski Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 26388562e300..212ff92920d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -65,6 +65,7 @@ config MIPS select ARCH_CLOCKSOURCE_DATA select HANDLE_DOMAIN_IRQ select HAVE_EXIT_THREAD + select HAVE_REGS_AND_STACK_ACCESS_API menu "Machine selection" -- cgit v1.2.3 From 9d15ce9caaf9ecbec74e3be156a4a57451ed16c2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Sep 2016 13:49:48 -0700 Subject: tools/testing/nvdimm: fix allocation range for mock flush hint tables Commit 480b6837aa57 "nvdimm: fix PHYS_PFN/PFN_PHYS mixup" identified that we were passing an invalid address to devm_nvdimm_ioremap(). With that fixed it exposed a bug in the memory reservation size for flush hint tables. Since we map a full page we need to mock a full page of memory to back the flush hint table entries. Cc: Oliver O'Halloran Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index dd48f421844c..f64c57bf1d4b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -603,7 +603,8 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; sprintf(t->label[i], "label%d", i); - t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, + t->flush[i] = test_alloc(t, max(PAGE_SIZE, + sizeof(u64) * NUM_HINTS), &t->flush_dma[i]); if (!t->flush[i]) return -ENOMEM; -- cgit v1.2.3 From 164c80ed84a7669114869d9347c0f3ea7f56ea89 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 19 Sep 2016 15:12:41 -0600 Subject: blk-throttle: Extend slice if throttle group is not empty Right now, if slice is expired, we start a new slice. If a bio is queued, we keep on extending slice by throtle_slice interval (100ms). This worked well as long as pending timer function got executed with-in few milli seconds of scheduled time. But looks like with recent changes in timer subsystem, slack can be much longer depending on the expiry time of the scheduled timer. commit 500462a9de65 ("timers: Switch to a non-cascading wheel") This means, by the time timer function gets executed, it is possible the delay from scheduled time is more than 100ms. That means current code will conclude that existing slice has expired and a new one needs to be started. New slice will be 100ms by default and that will not be sufficient to meet rate requirement of group given the bio size and bio will not be dispatched and we will start a new timer function to wait. And when that timer expires, same process will repeat and we will wait again and this can easily be an infinite loop. Solve this issue by starting a new slice only if throttle gropup is empty. If it is not empty, that means there should be an active slice going on. Ideally it should not be expired but given the slack, it is possible that it has expired. Reported-by: Hou Tao Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f1aba26f4719..a3ea8260c94c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -780,9 +780,11 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, /* * If previous slice expired, start a new one otherwise renew/extend * existing slice to make sure it is at least throtl_slice interval - * long since now. + * long since now. New slice is started only for empty throttle group. + * If there is queued bio, that means there should be an active + * slice and it should be extended instead. */ - if (throtl_slice_used(tg, rw)) + if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) throtl_start_new_slice(tg, rw); else { if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) -- cgit v1.2.3 From 727653d6ce7103b245eb8041f55dd5885f4c3289 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 19 Sep 2016 18:29:15 +0100 Subject: irqchip/gicv3: Silence noisy DEBUG_PER_CPU_MAPS warning gic_raise_softirq() walks the list of cpus using for_each_cpu(), it calls gic_compute_target_list() which advances the iterator by the number of CPUs in the cluster. If gic_compute_target_list() reaches the last CPU it leaves the iterator pointing at the last CPU. This means the next time round the for_each_cpu() loop cpumask_next() will be called with an invalid CPU. This triggers a warning when built with CONFIG_DEBUG_PER_CPU_MAPS: [ 3.077738] GICv3: CPU1: found redistributor 1 region 0:0x000000002f120000 [ 3.077943] CPU1: Booted secondary processor [410fd0f0] [ 3.078542] ------------[ cut here ]------------ [ 3.078746] WARNING: CPU: 1 PID: 0 at ../include/linux/cpumask.h:121 gic_raise_softirq+0x12c/0x170 [ 3.078812] Modules linked in: [ 3.078869] [ 3.078930] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-rc5+ #5188 [ 3.078994] Hardware name: Foundation-v8A (DT) [ 3.079059] task: ffff80087a1a0080 task.stack: ffff80087a19c000 [ 3.079145] PC is at gic_raise_softirq+0x12c/0x170 [ 3.079226] LR is at gic_raise_softirq+0xa4/0x170 [ 3.079296] pc : [] lr : [] pstate: 200001c9 [ 3.081139] Call trace: [ 3.081202] Exception stack(0xffff80087a19fbe0 to 0xffff80087a19fd10) [ 3.082269] [] gic_raise_softirq+0x12c/0x170 [ 3.082354] [] smp_send_reschedule+0x34/0x40 [ 3.082433] [] resched_curr+0x50/0x88 [ 3.082512] [] check_preempt_curr+0x60/0xd0 [ 3.082593] [] ttwu_do_wakeup+0x20/0xe8 [ 3.082672] [] ttwu_do_activate+0x90/0xc0 [ 3.082753] [] try_to_wake_up+0x224/0x370 [ 3.082836] [] default_wake_function+0x10/0x18 [ 3.082920] [] __wake_up_common+0x5c/0xa0 [ 3.083003] [] __wake_up_locked+0x14/0x20 [ 3.083086] [] complete+0x40/0x60 [ 3.083168] [] secondary_start_kernel+0x15c/0x1d0 [ 3.083240] [<00000000808911a4>] 0x808911a4 [ 3.113401] Detected PIPT I-cache on CPU2 Avoid updating the iterator if the next call to cpumask_next() would cause the for_each_cpu() loop to exit. There is no change to gic_raise_softirq()'s behaviour, (cpumask_next()s eventual call to _find_next_bit() will return early as start >= nbits), this patch just silences the warning. Fixes: 021f653791ad ("irqchip: gic-v3: Initial support for GICv3") Signed-off-by: James Morse Acked-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Jason Cooper Link: http://lkml.kernel.org/r/1474306155-3303-1-git-send-email-james.morse@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index ede5672ab34d..da6c0ba61d4f 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -548,7 +548,7 @@ static int gic_starting_cpu(unsigned int cpu) static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, unsigned long cluster_id) { - int cpu = *base_cpu; + int next_cpu, cpu = *base_cpu; unsigned long mpidr = cpu_logical_map(cpu); u16 tlist = 0; @@ -562,9 +562,10 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, tlist |= 1 << (mpidr & 0xf); - cpu = cpumask_next(cpu, mask); - if (cpu >= nr_cpu_ids) + next_cpu = cpumask_next(cpu, mask); + if (next_cpu >= nr_cpu_ids) goto out; + cpu = next_cpu; mpidr = cpu_logical_map(cpu); -- cgit v1.2.3 From e535ec0899d1fe52ec3a84c9bc03457ac67ad6f7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 20 Sep 2016 14:26:21 +0100 Subject: x86/mm/pat: Prevent hang during boot when mapping pages There's a mixture of signed 32-bit and unsigned 32-bit and 64-bit data types used for keeping track of how many pages have been mapped. This leads to hangs during boot when mapping large numbers of pages (multiple terabytes, as reported by Waiman) because those values are interpreted as being negative. commit 742563777e8d ("x86/mm/pat: Avoid truncation when converting cpa->numpages to address") fixed one of those bugs, but there is another lurking in __change_page_attr_set_clr(). Additionally, the return value type for the populate_*() functions can return negative values when a large number of pages have been mapped, triggering the error paths even though no error occurred. Consistently use 64-bit types on 64-bit platforms when counting pages. Even in the signed case this gives us room for regions 8PiB (pebibytes) in size whilst still allowing the usual negative value error checking idiom. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 849dc09fa4f0..e3353c97d086 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -917,11 +917,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -991,12 +991,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1052,7 +1052,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1078,7 +1078,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1327,7 +1327,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* -- cgit v1.2.3 From 1297667083d5442aafe3e337b9413bf02b114edb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 19 Sep 2016 13:09:09 +0100 Subject: x86/efi: Only map RAM into EFI page tables if in mixed-mode Waiman reported that booting with CONFIG_EFI_MIXED enabled on his multi-terabyte HP machine results in boot crashes, because the EFI region mapping functions loop forever while trying to map those regions describing RAM. While this patch doesn't fix the underlying hang, there's really no reason to map EFI_CONVENTIONAL_MEMORY regions into the EFI page tables when mixed-mode is not in use at runtime. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Cc: # v4.6+ Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..8dd3784eb075 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -245,7 +245,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED)) + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) return 0; /* -- cgit v1.2.3 From f1e1c9e5e357c05253affb13be29285c5cb56bf0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Sep 2016 15:12:21 +0200 Subject: perf/x86/intel/bts: Make sure debug store is valid Since commit 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") my box goes boom on boot: | .... node #0, CPUs: #1 #2 #3 #4 #5 #6 #7 | BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 | IP: [] intel_bts_interrupt+0x43/0x130 | Call Trace: | d [] intel_pmu_handle_irq+0x51/0x4b0 | [] perf_event_nmi_handler+0x27/0x40 This happens because the code introduced in this commit dereferences the debug store pointer unconditionally. The debug store is not guaranteed to be available, so a NULL pointer check as on other places is required. Fixes: 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: vince@deater.net Cc: eranian@google.com Link: http://lkml.kernel.org/r/20160920131220.xg5pbdjtznszuyzb@breakpoint.cc Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index bdcd6510992c..6ff66efa0feb 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -455,7 +455,7 @@ int intel_bts_interrupt(void) * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. */ - if (ds->bts_index >= ds->bts_interrupt_threshold) + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) handled = 1; /* -- cgit v1.2.3 From e875bd66dfb68f4e898e9a43ef42858c504a7f23 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 13 Sep 2016 17:53:35 +0100 Subject: irqchip/mips-gic: Fix local interrupts Since the device hierarchy domain was added by commit c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain"), GIC local interrupts have been broken. Users attempting to setup a per-cpu local IRQ, for example the GIC timer clock events code in drivers/clocksource/mips-gic-timer.c, the setup_percpu_irq function would refuse with -EINVAL because the GIC irqchip driver never called irq_set_percpu_devid so the IRQ_PER_CPU_DEVID flag was never set for the IRQ. This happens because irq_set_percpu_devid was being called from the gic_irq_domain_map function which is no longer called. Doing only that runs into further problems because gic_dev_domain_alloc set the struct irq_chip for all interrupts, local or shared, to gic_level_irq_controller despite that only being suitable for shared interrupts. The typical outcome of this is that gic_level_irq_controller callback functions are called for local interrupts, and then hwirq number calculations overflow & the driver ends up attempting to access some invalid register with an address calculated from an invalid hwirq number. Best case scenario is that this then leads to a bus error. This is fixed by abstracting the setup of the hwirq & chip to a new function gic_setup_dev_chip which is used by both the root GIC IRQ domain & the device domain. Finally, decoding local interrupts failed because gic_dev_domain_alloc only called irq_domain_alloc_irqs_parent for shared interrupts. Local ones were therefore never associated with hwirqs in the root GIC IRQ domain and the virq in gic_handle_local_int would always be 0. This is fixed by calling irq_domain_alloc_irqs_parent unconditionally & having gic_irq_domain_alloc handle both local & shared interrupts, which is easy due to the aforementioned abstraction of chip setup into gic_setup_dev_chip. This fixes use of the MIPS GIC timer for clock events, which has been broken since c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") but hadn't been noticed due to a silent fallback to the MIPS coprocessor 0 count/compare clock events device. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Qais Yousef Cc: stable@vger.kernel.org Cc: Marc Zyngier Link: http://lkml.kernel.org/r/20160913165335.31389-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 105 ++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 83f498393a7f..6185696405d5 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -638,27 +638,6 @@ static int gic_local_irq_domain_map(struct irq_domain *d, unsigned int virq, if (!gic_local_irq_is_routable(intr)) return -EPERM; - /* - * HACK: These are all really percpu interrupts, but the rest - * of the MIPS kernel code does not use the percpu IRQ API for - * the CP0 timer and performance counter interrupts. - */ - switch (intr) { - case GIC_LOCAL_INT_TIMER: - case GIC_LOCAL_INT_PERFCTR: - case GIC_LOCAL_INT_FDC: - irq_set_chip_and_handler(virq, - &gic_all_vpes_local_irq_controller, - handle_percpu_irq); - break; - default: - irq_set_chip_and_handler(virq, - &gic_local_irq_controller, - handle_percpu_devid_irq); - irq_set_percpu_devid(virq); - break; - } - spin_lock_irqsave(&gic_lock, flags); for (i = 0; i < gic_vpes; i++) { u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin; @@ -724,16 +703,42 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, return 0; } -static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, - irq_hw_number_t hw) +static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq, + unsigned int hwirq) { - if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) - return gic_local_irq_domain_map(d, virq, hw); + struct irq_chip *chip; + int err; + + if (hwirq >= GIC_SHARED_HWIRQ_BASE) { + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_level_irq_controller, + NULL); + } else { + switch (GIC_HWIRQ_TO_LOCAL(hwirq)) { + case GIC_LOCAL_INT_TIMER: + case GIC_LOCAL_INT_PERFCTR: + case GIC_LOCAL_INT_FDC: + /* + * HACK: These are all really percpu interrupts, but + * the rest of the MIPS kernel code does not use the + * percpu IRQ API for them. + */ + chip = &gic_all_vpes_local_irq_controller; + irq_set_handler(virq, handle_percpu_irq); + break; + + default: + chip = &gic_local_irq_controller; + irq_set_handler(virq, handle_percpu_devid_irq); + irq_set_percpu_devid(virq); + break; + } - irq_set_chip_and_handler(virq, &gic_level_irq_controller, - handle_level_irq); + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + chip, NULL); + } - return gic_shared_irq_domain_map(d, virq, hw, 0); + return err; } static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, @@ -744,15 +749,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, int cpu, ret, i; if (spec->type == GIC_DEVICE) { - /* verify that it doesn't conflict with an IPI irq */ - if (test_bit(spec->hwirq, ipi_resrv)) + /* verify that shared irqs don't conflict with an IPI irq */ + if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) && + test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv)) return -EBUSY; - hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq); - - return irq_domain_set_hwirq_and_chip(d, virq, hwirq, - &gic_level_irq_controller, - NULL); + return gic_setup_dev_chip(d, virq, spec->hwirq); } else { base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs); if (base_hwirq == gic_shared_intrs) { @@ -821,7 +823,6 @@ int gic_irq_domain_match(struct irq_domain *d, struct device_node *node, } static const struct irq_domain_ops gic_irq_domain_ops = { - .map = gic_irq_domain_map, .alloc = gic_irq_domain_alloc, .free = gic_irq_domain_free, .match = gic_irq_domain_match, @@ -852,29 +853,20 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq, struct irq_fwspec *fwspec = arg; struct gic_irq_spec spec = { .type = GIC_DEVICE, - .hwirq = fwspec->param[1], }; int i, ret; - bool is_shared = fwspec->param[0] == GIC_SHARED; - if (is_shared) { - ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); - if (ret) - return ret; - } - - for (i = 0; i < nr_irqs; i++) { - irq_hw_number_t hwirq; + if (fwspec->param[0] == GIC_SHARED) + spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]); + else + spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]); - if (is_shared) - hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i); - else - hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i); + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); + if (ret) + return ret; - ret = irq_domain_set_hwirq_and_chip(d, virq + i, - hwirq, - &gic_level_irq_controller, - NULL); + for (i = 0; i < nr_irqs; i++) { + ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i); if (ret) goto error; } @@ -896,7 +888,10 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, static void gic_dev_domain_activate(struct irq_domain *domain, struct irq_data *d) { - gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); + if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS) + gic_local_irq_domain_map(domain, d->irq, d->hwirq); + else + gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); } static struct irq_domain_ops gic_dev_domain_ops = { -- cgit v1.2.3 From b79331a5eb9f96e4dfd216974581168ec4c8a4d4 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 14 Sep 2016 16:37:17 +1000 Subject: powerpc/powernv/pci: Fix m64 checks for SR-IOV and window alignment Commit 5958d19a143e checks for prefetchable m64 BARs by comparing the addresses instead of using resource flags. This broke SR-IOV as the m64 check in pnv_pci_ioda_fixup_iov_resources() fails. The condition in pnv_pci_window_alignment() also changed to checking only IORESOURCE_MEM_64 instead of both IORESOURCE_MEM_64 and IORESOURCE_PREFETCH. Revert these cases to the previous behaviour, adding a new helper function to do so. This is named pnv_pci_is_m64_flags() to make it clear this function is only looking at resource flags and should not be relied on for non-SRIOV resources. Fixes: 5958d19a143e ("Fix incorrect PE reservation attempt on some 64-bit BARs") Reported-by: Alexey Kardashevskiy Signed-off-by: Russell Currey Tested-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index bc0c91e84ca0..38a5c657ffd3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -124,6 +124,13 @@ static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); } +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { phb->ioda.pe_array[pe_no].phb = phb; @@ -2871,7 +2878,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_m64(phb, res)) { + if (!pnv_pci_is_m64_flags(res->flags)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -3096,7 +3103,7 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, * alignment for any 64-bit resource, PCIe doesn't care and * bridges only do 64-bit prefetchable anyway. */ - if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64)) + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; -- cgit v1.2.3 From 554af0c396380baf416f54c439b99b495180b2f4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 7 Sep 2016 13:37:01 +0100 Subject: MIPS: vDSO: Fix Malta EVA mapping to vDSO page structs The page structures associated with the vDSO pages in the kernel image are calculated using virt_to_page(), which uses __pa() under the hood to find the pfn associated with the virtual address. The vDSO data pointers however point to kernel symbols, so __pa_symbol() should really be used instead. Since there is no equivalent to virt_to_page() which uses __pa_symbol(), fix init_vdso_image() to work directly with pfns, calculated with __phys_to_pfn(__pa_symbol(...)). This issue broke the Malta Enhanced Virtual Addressing (EVA) configuration which has a non-default implementation of __pa_symbol(). This is because it uses a physical alias so that the kernel executes from KSeg0 (VA 0x80000000 -> PA 0x00000000), while RAM is provided to the kernel in the KUSeg range (VA 0x00000000 -> PA 0x80000000) which uses the same underlying RAM. Since there are no page structures associated with the low physical address region, some arbitrary kernel memory would be interpreted as a page structure for the vDSO pages and badness ensues. Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Signed-off-by: James Hogan Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Cc: # 4.4.x- Patchwork: https://patchwork.linux-mips.org/patch/14229/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/vdso.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 9abe447a4b48..f9dbfb14af33 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -39,16 +39,16 @@ static struct vm_special_mapping vdso_vvar_mapping = { static void __init init_vdso_image(struct mips_vdso_image *image) { unsigned long num_pages, i; + unsigned long data_pfn; BUG_ON(!PAGE_ALIGNED(image->data)); BUG_ON(!PAGE_ALIGNED(image->size)); num_pages = image->size / PAGE_SIZE; - for (i = 0; i < num_pages; i++) { - image->mapping.pages[i] = - virt_to_page(image->data + (i * PAGE_SIZE)); - } + data_pfn = __phys_to_pfn(__pa_symbol(image->data)); + for (i = 0; i < num_pages; i++) + image->mapping.pages[i] = pfn_to_page(data_pfn + i); } static int __init init_vdso(void) -- cgit v1.2.3 From 371a015344b6e270e7e3632107d9554ec6d27a6b Mon Sep 17 00:00:00 2001 From: "Yadi.hu" Date: Sun, 18 Sep 2016 18:52:31 +0800 Subject: i2c-eg20t: fix race between i2c init and interrupt enable the eg20t driver call request_irq() function before the pch_base_address, base address of i2c controller's register, is assigned an effective value. there is one possible scenario that an interrupt which isn't inside eg20t arrives immediately after request_irq() is executed when i2c controller shares an interrupt number with others. since the interrupt handler pch_i2c_handler() has already active as shared action, it will be called and read its own register to determine if this interrupt is from itself. At that moment, since base address of i2c registers is not remapped in kernel space yet,so the INT handler will access an illegal address and then a error occurs. Signed-off-by: Yadi.hu Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-eg20t.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 137125b5eae7..5ce71ce7b6c4 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -773,13 +773,6 @@ static int pch_i2c_probe(struct pci_dev *pdev, /* Set the number of I2C channel instance */ adap_info->ch_num = id->driver_data; - ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED, - KBUILD_MODNAME, adap_info); - if (ret) { - pch_pci_err(pdev, "request_irq FAILED\n"); - goto err_request_irq; - } - for (i = 0; i < adap_info->ch_num; i++) { pch_adap = &adap_info->pch_data[i].pch_adapter; adap_info->pch_i2c_suspended = false; @@ -797,6 +790,17 @@ static int pch_i2c_probe(struct pci_dev *pdev, pch_adap->dev.of_node = pdev->dev.of_node; pch_adap->dev.parent = &pdev->dev; + } + + ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED, + KBUILD_MODNAME, adap_info); + if (ret) { + pch_pci_err(pdev, "request_irq FAILED\n"); + goto err_request_irq; + } + + for (i = 0; i < adap_info->ch_num; i++) { + pch_adap = &adap_info->pch_data[i].pch_adapter; pch_i2c_init(&adap_info->pch_data[i]); -- cgit v1.2.3 From ecfb6d8a041cc2ca80bc69ffc20c00067d190df5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Sep 2016 09:22:33 -0700 Subject: libnvdimm: fix devm_nvdimm_memremap() error path The internal alloc_nvdimm_map() helper might fail, particularly if the memory region is already busy. Report request_mem_region() failures and check for the failure. Reported-by: Ryan Chen Signed-off-by: Dan Williams --- drivers/nvdimm/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 715583f69d28..4d7bbd2df5c0 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -99,8 +99,11 @@ static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, nvdimm_map->size = size; kref_init(&nvdimm_map->kref); - if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) { + dev_err(&nvdimm_bus->dev, "failed to request %pa + %zd for %s\n", + &offset, size, dev_name(dev)); goto err_request_region; + } if (flags) nvdimm_map->mem = memremap(offset, size, flags); @@ -171,6 +174,9 @@ void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, kref_get(&nvdimm_map->kref); nvdimm_bus_unlock(dev); + if (!nvdimm_map) + return NULL; + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) return NULL; -- cgit v1.2.3 From 11294d63ac915230a36b0603c62134ef7b173d0a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Sep 2016 09:21:26 -0700 Subject: nfit: fail DSMs that return non-zero status by default For the DSMs where the kernel knows the format of the output buffer and originates those DSMs from within the kernel, return -EIO for any non-zero status. If the BIOS is indicating a status that we do not know how to handle, fail the DSM. Cc: Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 80cc7c089a15..e1d5ea6d5e40 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -94,54 +94,50 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } -static int xlat_status(void *buf, unsigned int cmd) +static int xlat_status(void *buf, unsigned int cmd, u32 status) { struct nd_cmd_clear_error *clear_err; struct nd_cmd_ars_status *ars_status; - struct nd_cmd_ars_start *ars_start; - struct nd_cmd_ars_cap *ars_cap; u16 flags; switch (cmd) { case ND_CMD_ARS_CAP: - ars_cap = buf; - if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + if ((status & 0xffff) == NFIT_ARS_CAP_NONE) return -ENOTTY; /* Command failed */ - if (ars_cap->status & 0xffff) + if (status & 0xffff) return -EIO; /* No supported scan types for this range */ flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; - if ((ars_cap->status >> 16 & flags) == 0) + if ((status >> 16 & flags) == 0) return -ENOTTY; break; case ND_CMD_ARS_START: - ars_start = buf; /* ARS is in progress */ - if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + if ((status & 0xffff) == NFIT_ARS_START_BUSY) return -EBUSY; /* Command failed */ - if (ars_start->status & 0xffff) + if (status & 0xffff) return -EIO; break; case ND_CMD_ARS_STATUS: ars_status = buf; /* Command failed */ - if (ars_status->status & 0xffff) + if (status & 0xffff) return -EIO; /* Check extended status (Upper two bytes) */ - if (ars_status->status == NFIT_ARS_STATUS_DONE) + if (status == NFIT_ARS_STATUS_DONE) return 0; /* ARS is in progress */ - if (ars_status->status == NFIT_ARS_STATUS_BUSY) + if (status == NFIT_ARS_STATUS_BUSY) return -EBUSY; /* No ARS performed for the current boot */ - if (ars_status->status == NFIT_ARS_STATUS_NONE) + if (status == NFIT_ARS_STATUS_NONE) return -EAGAIN; /* @@ -149,19 +145,19 @@ static int xlat_status(void *buf, unsigned int cmd) * agent wants the scan to stop. If we didn't overflow * then just continue with the returned results. */ - if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (status == NFIT_ARS_STATUS_INTR) { if (ars_status->flags & NFIT_ARS_F_OVERFLOW) return -ENOSPC; return 0; } /* Unknown status */ - if (ars_status->status >> 16) + if (status >> 16) return -EIO; break; case ND_CMD_CLEAR_ERROR: clear_err = buf; - if (clear_err->status & 0xffff) + if (status & 0xffff) return -EIO; if (!clear_err->cleared) return -EIO; @@ -172,6 +168,9 @@ static int xlat_status(void *buf, unsigned int cmd) break; } + /* all other non-zero status results in an error */ + if (status) + return -EIO; return 0; } @@ -186,10 +185,10 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nd_cmd_pkg *call_pkg = NULL; const char *cmd_name, *dimm_name; unsigned long cmd_mask, dsm_mask; + u32 offset, fw_status = 0; acpi_handle handle; unsigned int func; const u8 *uuid; - u32 offset; int rc, i; func = cmd; @@ -317,6 +316,15 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, out_obj->buffer.pointer + offset, out_size); offset += out_size; } + + /* + * Set fw_status for all the commands with a known format to be + * later interpreted by xlat_status(). + */ + if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR) + || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR))) + fw_status = *(u32 *) out_obj->buffer.pointer; + if (offset + in_buf.buffer.length < buf_len) { if (i >= 1) { /* @@ -325,7 +333,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, */ rc = buf_len - offset - in_buf.buffer.length; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -335,7 +343,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { rc = 0; if (cmd_rc) - *cmd_rc = xlat_status(buf, cmd); + *cmd_rc = xlat_status(buf, cmd, fw_status); } out: -- cgit v1.2.3 From 463e8f845cbf1c01e4cc8aeef1703212991d8e1e Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 14 Sep 2016 15:24:12 +0200 Subject: i2c: mux: pca954x: retry updating the mux selection on failure The cached value of the last selected channel prevents retries on the next call, even on failure to update the selected channel. Fix that. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/muxes/i2c-mux-pca954x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 528e755c468f..3278ebf1cc5c 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -164,7 +164,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); - data->last_chan = regval; + data->last_chan = ret ? 0 : regval; } return ret; -- cgit v1.2.3 From 1e5ec2e709bd8c5588fdbdda909945e4e2be8d23 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Sep 2016 14:57:48 -0400 Subject: Btrfs: handle quota reserve failure properly btrfs/022 was spitting a warning for the case that we exceed the quota. If we fail to make our quota reservation we need to clean up our data space reservation. Thanks, Signed-off-by: Josef Bacik Tested-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d09cf7aa083b..db76cc18c562 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4271,13 +4271,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len) if (ret < 0) return ret; - /* - * Use new btrfs_qgroup_reserve_data to reserve precious data space - * - * TODO: Find a good method to avoid reserve data space for NOCOW - * range, but don't impact performance on quota disable case. - */ + /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ ret = btrfs_qgroup_reserve_data(inode, start, len); + if (ret) + btrfs_free_reserved_data_space_noquota(inode, start, len); return ret; } -- cgit v1.2.3 From 325c50e3cebb9208009083e841550f98a863bfa0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 21 Sep 2016 08:31:29 -0400 Subject: btrfs: ensure that file descriptor used with subvol ioctls is a dir If the subvol/snapshot create/destroy ioctls are passed a regular file with execute permissions set, we'll eventually Oops while trying to do inode->i_op->lookup via lookup_one_len. This patch ensures that the file descriptor refers to a directory. Fixes: cb8e70901d (Btrfs: Fix subvolume creation locking rules) Fixes: 76dda93c6a (Btrfs: add snapshot/subvolume destroy ioctl) Cc: #v2.6.29+ Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b2a2da5893af..7fd939bfbd99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, int namelen; int ret = 0; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + ret = mnt_want_write_file(file); if (ret) goto out; @@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, struct btrfs_ioctl_vol_args *vol_args; int ret; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, bool readonly = false; struct btrfs_qgroup_inherit *inherit = NULL; + if (!S_ISDIR(file_inode(file)->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); @@ -2357,6 +2366,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, int ret; int err = 0; + if (!S_ISDIR(dir->i_mode)) + return -ENOTDIR; + vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); -- cgit v1.2.3 From 666ca3d8f19082f40745d75f3cc7cc0200ee87e3 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 9 Sep 2016 22:34:02 -0400 Subject: drm/nouveau/fifo/nv04: avoid ramht race against cookie insertion Signed-off-by: Ilia Mirkin Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c index edec30fd3ecd..0a7b6ed5ed28 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c @@ -37,7 +37,10 @@ nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie) { struct nv04_fifo_chan *chan = nv04_fifo_chan(base); struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem; + + mutex_lock(&chan->fifo->base.engine.subdev.mutex); nvkm_ramht_remove(imem->ramht, cookie); + mutex_unlock(&chan->fifo->base.engine.subdev.mutex); } static int -- cgit v1.2.3 From bad3d80fd001d3b74ab8bb3d561bc4d1b08797d3 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 18 Sep 2016 12:21:56 +0200 Subject: drm/nouveau: Revert "bus: remove cpu_coherent flag" This reverts commit aff51175cdbf345740ec9203eff88e772af88059. The commit caused fence timeouts within nvc0_screen_destroy and most likely other places as well. The most obvious effect is, that userspace processes take minutes to actually quit. Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 1 + drivers/gpu/drm/nouveau/nouveau_bo.c | 3 ++- drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 7ea8aa7ca408..6bc712f32c8b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -175,6 +175,7 @@ struct nvkm_device_func { void (*fini)(struct nvkm_device *, bool suspend); resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar); resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar); + bool cpu_coherent; }; struct nvkm_device_quirk { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6190035edfea..864323b19cf7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -209,7 +209,8 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &drm->ttm.bdev; - nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; + if (!nvxx_device(&drm->device)->func->cpu_coherent) + nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED; nvbo->page_shift = 12; if (drm->client.vm) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index b1b693219db3..62ad0300cfa5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1614,6 +1614,7 @@ nvkm_device_pci_func = { .fini = nvkm_device_pci_fini, .resource_addr = nvkm_device_pci_resource_addr, .resource_size = nvkm_device_pci_resource_size, + .cpu_coherent = !IS_ENABLED(CONFIG_ARM), }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 939682f18788..9b638bd905ff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -245,6 +245,7 @@ nvkm_device_tegra_func = { .fini = nvkm_device_tegra_fini, .resource_addr = nvkm_device_tegra_resource_addr, .resource_size = nvkm_device_tegra_resource_size, + .cpu_coherent = false, }; int -- cgit v1.2.3 From 005d675aa1909ad70456dec8c5b0ba9b60b52d24 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Thu, 22 Sep 2016 14:12:00 +0900 Subject: mmc: dw_mmc: fix the spamming log message When there is no Card which is set to "broken-cd", it's displayed a clock information continuously. Because it's polling for detecting card. This patch is fixed this problem. Fixes: 65257a0deed5 ("mmc: dw_mmc: remove UBSAN warning in dw_mci_setup_bus()") Reported-by: Tobias Jakobi Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 14 +++++++++----- drivers/mmc/host/dw_mmc.h | 3 +++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 32380d5d4f6b..767af2026f8b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1112,11 +1112,12 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) div = (host->bus_hz != clock) ? DIV_ROUND_UP(div, 2) : 0; - dev_info(&slot->mmc->class_dev, - "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n", - slot->id, host->bus_hz, clock, - div ? ((host->bus_hz / div) >> 1) : - host->bus_hz, div); + if (clock != slot->__clk_old || force_clkinit) + dev_info(&slot->mmc->class_dev, + "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n", + slot->id, host->bus_hz, clock, + div ? ((host->bus_hz / div) >> 1) : + host->bus_hz, div); /* disable clock */ mci_writel(host, CLKENA, 0); @@ -1139,6 +1140,9 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) /* inform CIU */ mci_send_cmd(slot, sdmmc_cmd_bits, 0); + + /* keep the last clock value that was requested from core */ + slot->__clk_old = clock; } host->current_speed = clock; diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 9e740bc232a8..e8cd2dec3263 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -249,6 +249,8 @@ extern int dw_mci_resume(struct dw_mci *host); * @queue_node: List node for placing this node in the @queue list of * &struct dw_mci. * @clock: Clock rate configured by set_ios(). Protected by host->lock. + * @__clk_old: The last clock value that was requested from core. + * Keeping track of this helps us to avoid spamming the console. * @flags: Random state bits associated with the slot. * @id: Number of this slot. * @sdio_id: Number of this slot in the SDIO interrupt registers. @@ -263,6 +265,7 @@ struct dw_mci_slot { struct list_head queue_node; unsigned int clock; + unsigned int __clk_old; unsigned long flags; #define DW_MMC_CARD_PRESENT 0 -- cgit v1.2.3 From 9abefcb1aaa58b9d5aa40a8bb12c87d02415e4c8 Mon Sep 17 00:00:00 2001 From: Sergei Miroshnichenko Date: Wed, 7 Sep 2016 16:51:12 +0300 Subject: can: dev: fix deadlock reported after bus-off A timer was used to restart after the bus-off state, leading to a relatively large can_restart() executed in an interrupt context, which in turn sets up pinctrl. When this happens during system boot, there is a high probability of grabbing the pinctrl_list_mutex, which is locked already by the probe() of other device, making the kernel suspect a deadlock condition [1]. To resolve this issue, the restart_timer is replaced by a delayed work. [1] https://github.com/victronenergy/venus/issues/24 Signed-off-by: Sergei Miroshnichenko Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 27 +++++++++++++++++---------- include/linux/can/dev.h | 3 ++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e21f7cc5ae4d..8d6208c0b400 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -501,9 +502,8 @@ EXPORT_SYMBOL_GPL(can_free_echo_skb); /* * CAN device restart for bus-off recovery */ -static void can_restart(unsigned long data) +static void can_restart(struct net_device *dev) { - struct net_device *dev = (struct net_device *)data; struct can_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; struct sk_buff *skb; @@ -543,6 +543,14 @@ restart: netdev_err(dev, "Error %d during restart", err); } +static void can_restart_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); + + can_restart(priv->dev); +} + int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -556,8 +564,8 @@ int can_restart_now(struct net_device *dev) if (priv->state != CAN_STATE_BUS_OFF) return -EBUSY; - /* Runs as soon as possible in the timer context */ - mod_timer(&priv->restart_timer, jiffies); + cancel_delayed_work_sync(&priv->restart_work); + can_restart(dev); return 0; } @@ -578,8 +586,8 @@ void can_bus_off(struct net_device *dev) netif_carrier_off(dev); if (priv->restart_ms) - mod_timer(&priv->restart_timer, - jiffies + (priv->restart_ms * HZ) / 1000); + schedule_delayed_work(&priv->restart_work, + msecs_to_jiffies(priv->restart_ms)); } EXPORT_SYMBOL_GPL(can_bus_off); @@ -688,6 +696,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) return NULL; priv = netdev_priv(dev); + priv->dev = dev; if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; @@ -697,7 +706,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max) priv->state = CAN_STATE_STOPPED; - init_timer(&priv->restart_timer); + INIT_DELAYED_WORK(&priv->restart_work, can_restart_work); return dev; } @@ -778,8 +787,6 @@ int open_candev(struct net_device *dev) if (!netif_carrier_ok(dev)) netif_carrier_on(dev); - setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev); - return 0; } EXPORT_SYMBOL_GPL(open_candev); @@ -794,7 +801,7 @@ void close_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - del_timer_sync(&priv->restart_timer); + cancel_delayed_work_sync(&priv->restart_work); can_flush_echo_skb(dev); } EXPORT_SYMBOL_GPL(close_candev); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5261751f6bd4..5f5270941ba0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,7 @@ enum can_mode { * CAN common private data */ struct can_priv { + struct net_device *dev; struct can_device_stats can_stats; struct can_bittiming bittiming, data_bittiming; @@ -47,7 +48,7 @@ struct can_priv { u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; - struct timer_list restart_timer; + struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); -- cgit v1.2.3 From 456bee986e0a372ad4beed5d3cedb3622633d9df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 20 Sep 2016 20:35:55 +0800 Subject: KEYS: Fix skcipher IV clobbering The IV must not be modified by the skcipher operation so we need to duplicate it. Fixes: c3917fd9dfbc ("KEYS: Use skcipher") Cc: stable@vger.kernel.org Reported-by: Mimi Zohar Signed-off-by: Herbert Xu --- security/keys/encrypted-keys/encrypted.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 5adbfc32242f..17a06105ccb6 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -478,6 +479,7 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; + u8 iv[AES_BLOCK_SIZE]; unsigned int padlen; char pad[16]; int ret; @@ -500,8 +502,8 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, sg_init_table(sg_out, 1); sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen); - skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, - epayload->iv); + memcpy(iv, epayload->iv, sizeof(iv)); + skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_encrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); @@ -581,6 +583,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; + u8 iv[AES_BLOCK_SIZE]; char pad[16]; int ret; @@ -599,8 +602,8 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, epayload->decrypted_datalen); sg_set_buf(&sg_out[1], pad, sizeof pad); - skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, - epayload->iv); + memcpy(iv, epayload->iv, sizeof(iv)); + skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_decrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); -- cgit v1.2.3 From 0cf43f509f72128196e23f5ade7e512a72152cc6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Sep 2016 17:04:57 +0800 Subject: crypto: rsa-pkcs1pad - Handle leading zero for decryption As the software RSA implementation now produces fixed-length output, we need to eliminate leading zeros in the calling code instead. This patch does just that for pkcs1pad decryption while signature verification was fixed in an earlier patch. Fixes: 9b45b7bba3d2 ("crypto: rsa - Generate fixed-length output") Reported-by: Mat Martineau Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 877019a6d3ea..8baab4307f7b 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -298,41 +298,48 @@ static int pkcs1pad_decrypt_complete(struct akcipher_request *req, int err) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm); struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req); + unsigned int dst_len; unsigned int pos; - - if (err == -EOVERFLOW) - /* Decrypted value had no leading 0 byte */ - err = -EINVAL; + u8 *out_buf; if (err) goto done; - if (req_ctx->child_req.dst_len != ctx->key_size - 1) { - err = -EINVAL; + err = -EINVAL; + dst_len = req_ctx->child_req.dst_len; + if (dst_len < ctx->key_size - 1) goto done; + + out_buf = req_ctx->out_buf; + if (dst_len == ctx->key_size) { + if (out_buf[0] != 0x00) + /* Decrypted value had no leading 0 byte */ + goto done; + + dst_len--; + out_buf++; } - if (req_ctx->out_buf[0] != 0x02) { - err = -EINVAL; + if (out_buf[0] != 0x02) goto done; - } - for (pos = 1; pos < req_ctx->child_req.dst_len; pos++) - if (req_ctx->out_buf[pos] == 0x00) + + for (pos = 1; pos < dst_len; pos++) + if (out_buf[pos] == 0x00) break; - if (pos < 9 || pos == req_ctx->child_req.dst_len) { - err = -EINVAL; + if (pos < 9 || pos == dst_len) goto done; - } pos++; - if (req->dst_len < req_ctx->child_req.dst_len - pos) + err = 0; + + if (req->dst_len < dst_len - pos) err = -EOVERFLOW; - req->dst_len = req_ctx->child_req.dst_len - pos; + req->dst_len = dst_len - pos; if (!err) sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, req->dst_len), - req_ctx->out_buf + pos, req->dst_len); + out_buf + pos, req->dst_len); done: kzfree(req_ctx->out_buf); -- cgit v1.2.3 From f0aa1ce6259eb65f53f969b3250c1d0aac84f30b Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Thu, 22 Sep 2016 12:02:25 +0300 Subject: regmap: fix deadlock on _regmap_raw_write() error path Commit 815806e39bf6 ("regmap: drop cache if the bus transfer error") added a call to regcache_drop_region() to error path in _regmap_raw_write(). However that path runs with regmap lock taken, and regcache_drop_region() tries to re-take it, causing a deadlock. Fix that by calling map->cache_ops->drop() directly. Signed-off-by: Nikita Yushchenko Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 25d26bb18970..e964d068874d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1475,7 +1475,11 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(buf); } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) { - regcache_drop_region(map, reg, reg + 1); + /* regcache_drop_region() takes lock that we already have, + * thus call map->cache_ops->drop() directly + */ + if (map->cache_ops && map->cache_ops->drop) + map->cache_ops->drop(map, reg, reg + 1); } trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); -- cgit v1.2.3 From 9bf6ffdabdd6e70a0b69d032a0aff091afe1773e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Aug 2016 15:06:04 +0200 Subject: locking/atomic, arch/sh: Fix ATOMIC_FETCH_OP() We cannot use the "z" constraint twice, since its a single register (r0). Change the one not used by movli.l/movco.l to "r". Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Signed-off-by: Ingo Molnar --- arch/sh/include/asm/atomic-llsc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index caea2c45f6c2..1d159ce50f5a 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -60,7 +60,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ " movco.l %0, @%3 \n" \ " bf 1b \n" \ " synco \n" \ - : "=&z" (temp), "=&z" (res) \ + : "=&z" (temp), "=&r" (res) \ : "r" (i), "r" (&v->counter) \ : "t"); \ \ -- cgit v1.2.3 From 08b90f0655258411a1b41d856331e20e7ec8d55c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 20 Sep 2016 18:48:10 +0300 Subject: perf/x86/intel/bts: Make it an exclusive PMU Just like intel_pt, intel_bts can only handle one event at a time, which is the reason we introduced PERF_PMU_CAP_EXCLUSIVE in the first place. However, at the moment one can have as many intel_bts events within the same context at the same time as one pleases. Only one of them, however, will get scheduled and receive the actual trace data. Fix this by making intel_bts an "exclusive" PMU. Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160920154811.3255-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 6ff66efa0feb..982c9e31daca 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -584,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; -- cgit v1.2.3 From 3bf6215a1b30db7df6083c708caab3fe1a8e8abe Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 20 Sep 2016 18:48:11 +0300 Subject: perf/core: Limit matching exclusive events to one PMU An "exclusive" PMU is the one that can only have one event scheduled in at any given time. There may be more than one of such PMUs in a system, though, like Intel PT and BTS. It should be allowed to have one event for either of those inside the same context (there may be other constraints that may prevent this, but those would be hardware-specific). However, the exclusivity code is written so that only one event from any of the "exclusive" PMUs is allowed in a context. Fix this by making the exclusive event filter explicitly match two events' PMUs. Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160920154811.3255-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a54f2c2cdb20..fc9bb2225291 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3929,7 +3929,7 @@ static void exclusive_event_destroy(struct perf_event *event) static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) { - if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && + if ((e1->pmu == e2->pmu) && (e1->cpu == e2->cpu || e1->cpu == -1 || e2->cpu == -1)) -- cgit v1.2.3 From 331dcf421c34d227784d07943eb01e4023a42b0a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 25 Aug 2016 12:23:39 +0100 Subject: i2c: qup: skip qup_i2c_suspend if the device is already runtime suspended If the i2c device is already runtime suspended, if qup_i2c_suspend is executed during suspend-to-idle or suspend-to-ram it will result in the following splat: WARNING: CPU: 3 PID: 1593 at drivers/clk/clk.c:476 clk_core_unprepare+0x80/0x90 Modules linked in: CPU: 3 PID: 1593 Comm: bash Tainted: G W 4.8.0-rc3 #14 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) PC is at clk_core_unprepare+0x80/0x90 LR is at clk_unprepare+0x28/0x40 pc : [] lr : [] pstate: 60000145 Call trace: clk_core_unprepare+0x80/0x90 qup_i2c_disable_clocks+0x2c/0x68 qup_i2c_suspend+0x10/0x20 platform_pm_suspend+0x24/0x68 ... This patch fixes the issue by executing qup_i2c_pm_suspend_runtime conditionally in qup_i2c_suspend. Signed-off-by: Sudeep Holla Reviewed-by: Andy Gross Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-qup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 501bd15cb78e..a8497cfdae6f 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1599,7 +1599,8 @@ static int qup_i2c_pm_resume_runtime(struct device *device) #ifdef CONFIG_PM_SLEEP static int qup_i2c_suspend(struct device *device) { - qup_i2c_pm_suspend_runtime(device); + if (!pm_runtime_suspended(device)) + return qup_i2c_pm_suspend_runtime(device); return 0; } -- cgit v1.2.3 From 3b4ac78610690bd83fb33762ef97e8b8a89285ae Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2016 19:58:17 -0600 Subject: nvme-rdma: only clear queue flags after successful connect Otherwise, nvme_rdma_stop_and_clear_queue() will incorrectly try to stop/free rdma qps/cm_ids that are already freed. Fixes: e89ca58f9c90 ("nvme-rdma: add DELETING queue flag") Reported-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c2c2c28e6eb5..fbdb2267e460 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -561,7 +561,6 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; - queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -595,6 +594,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, goto out_destroy_cm_id; } + clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags); return 0; -- cgit v1.2.3 From 96b03ab86d843524ec4aed7fe0ceef412c684c68 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:55:13 -0400 Subject: locking/hung_task: Fix typo in CONFIG_DETECT_HUNG_TASK help text Fix the indefinitiley -> indefinitely typo in Kconfig.debug. Signed-off-by: Vivien Didelot Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160922205513.17821-1-vivien.didelot@savoirfairelinux.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e2cca509231..cab7405f48d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -821,7 +821,7 @@ config DETECT_HUNG_TASK help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in - uninterruptible "D" state indefinitiley. + uninterruptible "D" state indefinitely. When a hung task is detected, the kernel will print the current stack trace (which you should report), but the -- cgit v1.2.3 From c18df0adabf8400c1825b90382d06df5edc303fa Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 20 Sep 2016 11:46:35 -0700 Subject: arm64: Call numa_store_cpu_info() earlier. The wq_numa_init() function makes a private CPU to node map by calling cpu_to_node() early in the boot process, before the non-boot CPUs are brought online. Since the default implementation of cpu_to_node() returns zero for CPUs that have never been brought online, the workqueue system's view is that *all* CPUs are on node zero. When the unbound workqueue for a non-zero node is created, the tsk_cpus_allowed() for the worker threads is the empty set because there are, in the view of the workqueue system, no CPUs on non-zero nodes. The code in try_to_wake_up() using this empty cpumask ends up using the cpumask empty set value of NR_CPUS as an index into the per-CPU area pointer array, and gets garbage as it is one past the end of the array. This results in: [ 0.881970] Unable to handle kernel paging request at virtual address fffffb1008b926a4 [ 1.970095] pgd = fffffc00094b0000 [ 1.973530] [fffffb1008b926a4] *pgd=0000000000000000, *pud=0000000000000000, *pmd=0000000000000000 [ 1.982610] Internal error: Oops: 96000004 [#1] SMP [ 1.987541] Modules linked in: [ 1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: G W 4.8.0-rc6-preempt-vol+ #9 [ 1.999435] Hardware name: Cavium ThunderX CN88XX board (DT) [ 2.005159] task: fffffe0fe89cc300 task.stack: fffffe0fe8b8c000 [ 2.011158] PC is at try_to_wake_up+0x194/0x34c [ 2.015737] LR is at try_to_wake_up+0x150/0x34c [ 2.020318] pc : [] lr : [] pstate: 600000c5 [ 2.027803] sp : fffffe0fe8b8fb10 [ 2.031149] x29: fffffe0fe8b8fb10 x28: 0000000000000000 [ 2.036522] x27: fffffc0008c63bc8 x26: 0000000000001000 [ 2.041896] x25: fffffc0008c63c80 x24: fffffc0008bfb200 [ 2.047270] x23: 00000000000000c0 x22: 0000000000000004 [ 2.052642] x21: fffffe0fe89d25bc x20: 0000000000001000 [ 2.058014] x19: fffffe0fe89d1d00 x18: 0000000000000000 [ 2.063386] x17: 0000000000000000 x16: 0000000000000000 [ 2.068760] x15: 0000000000000018 x14: 0000000000000000 [ 2.074133] x13: 0000000000000000 x12: 0000000000000000 [ 2.079505] x11: 0000000000000000 x10: 0000000000000000 [ 2.084879] x9 : 0000000000000000 x8 : 0000000000000000 [ 2.090251] x7 : 0000000000000040 x6 : 0000000000000000 [ 2.095621] x5 : ffffffffffffffff x4 : 0000000000000000 [ 2.100991] x3 : 0000000000000000 x2 : 0000000000000000 [ 2.106364] x1 : fffffc0008be4c24 x0 : ffffff0ffffada80 [ 2.111737] [ 2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfffffe0fe8b8c020) [ 2.120102] Stack: (0xfffffe0fe8b8fb10 to 0xfffffe0fe8b90000) [ 2.125914] fb00: fffffe0fe8b8fb80 fffffc00080e7648 . . . [ 2.442859] Call trace: [ 2.445327] Exception stack(0xfffffe0fe8b8f940 to 0xfffffe0fe8b8fa70) [ 2.451843] f940: fffffe0fe89d1d00 0000040000000000 fffffe0fe8b8fb10 fffffc00080e7468 [ 2.459767] f960: fffffe0fe8b8f980 fffffc00080e4958 ffffff0ff91ab200 fffffc00080e4b64 [ 2.467690] f980: fffffe0fe8b8f9d0 fffffc00080e515c fffffe0fe8b8fa80 0000000000000000 [ 2.475614] f9a0: fffffe0fe8b8f9d0 fffffc00080e58e4 fffffe0fe8b8fa80 0000000000000000 [ 2.483540] f9c0: fffffe0fe8d10000 0000000000000040 fffffe0fe8b8fa50 fffffc00080e5ac4 [ 2.491465] f9e0: ffffff0ffffada80 fffffc0008be4c24 0000000000000000 0000000000000000 [ 2.499387] fa00: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000040 [ 2.507309] fa20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 2.515233] fa40: 0000000000000000 0000000000000000 0000000000000000 0000000000000018 [ 2.523156] fa60: 0000000000000000 0000000000000000 [ 2.528089] [] try_to_wake_up+0x194/0x34c [ 2.533723] [] wake_up_process+0x28/0x34 [ 2.539275] [] create_worker+0x110/0x19c [ 2.544824] [] alloc_unbound_pwq+0x3cc/0x4b0 [ 2.550724] [] wq_update_unbound_numa+0x10c/0x1e4 [ 2.557066] [] workqueue_online_cpu+0x220/0x28c [ 2.563234] [] cpuhp_invoke_callback+0x6c/0x168 [ 2.569398] [] cpuhp_up_callbacks+0x44/0xe4 [ 2.575210] [] cpuhp_thread_fun+0x13c/0x148 [ 2.581027] [] smpboot_thread_fn+0x19c/0x1a8 [ 2.586929] [] kthread+0xdc/0xf0 [ 2.591776] [] ret_from_fork+0x10/0x50 [ 2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821) [ 2.603464] ---[ end trace 58c0cd36b88802bc ]--- [ 2.608138] Kernel panic - not syncing: Fatal exception Fix by moving call to numa_store_cpu_info() for all CPUs into smp_prepare_cpus(), which happens before wq_numa_init(). Since smp_store_cpu_info() now contains only a single function call, simplify by removing the function and out-lining its contents. Suggested-by: Robert Richter Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Cc: # 4.7.x- Signed-off-by: David Daney Reviewed-by: Robert Richter Tested-by: Yisheng Xie Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d93d43352504..3ff173e92582 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -201,12 +201,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } -static void smp_store_cpu_info(unsigned int cpuid) -{ - store_cpu_topology(cpuid); - numa_store_cpu_info(cpuid); -} - /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -254,7 +248,7 @@ asmlinkage void secondary_start_kernel(void) */ notify_cpu_starting(cpu); - smp_store_cpu_info(cpu); + store_cpu_topology(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -689,10 +683,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { int err; unsigned int cpu; + unsigned int this_cpu; init_cpu_topology(); - smp_store_cpu_info(smp_processor_id()); + this_cpu = smp_processor_id(); + store_cpu_topology(this_cpu); + numa_store_cpu_info(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set @@ -719,6 +716,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); + numa_store_cpu_info(cpu); } } -- cgit v1.2.3 From 67787b68ec48c239d5ec12f9bf5adaf5c459517a Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 23 Sep 2016 16:42:08 +0900 Subject: arm64: kgdb: handle read-only text / modules Handle read-only cases when CONFIG_DEBUG_RODATA (4.0) or CONFIG_DEBUG_SET_MODULE_RONX (3.18) are enabled by using aarch64_insn_write() instead of probe_kernel_write() as introduced by commit 2f896d586610 ("arm64: use fixmap for text patching") in 4.0. Fixes: 11d91a770f1f ("arm64: Add CONFIG_DEBUG_SET_MODULE_RONX support") Signed-off-by: AKASHI Takahiro Reviewed-by: Mark Rutland Cc: Will Deacon Cc: Jason Wessel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/debug-monitors.h | 2 -- arch/arm64/kernel/kgdb.c | 36 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 4b6b3f72a215..b71420a12f26 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -61,8 +61,6 @@ #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) -#define KGDB_DYN_BRK_INS_BYTE(x) \ - ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff) #define CACHE_FLUSH_IS_SAFE 1 diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 8c57f6496e56..e017a9493b92 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -19,10 +19,13 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include +#include #include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -338,15 +341,24 @@ void kgdb_arch_exit(void) unregister_die_notifier(&kgdb_notifier); } -/* - * ARM instructions are always in LE. - * Break instruction is encoded in LE format - */ -struct kgdb_arch arch_kgdb_ops = { - .gdb_bpt_instr = { - KGDB_DYN_BRK_INS_BYTE(0), - KGDB_DYN_BRK_INS_BYTE(1), - KGDB_DYN_BRK_INS_BYTE(2), - KGDB_DYN_BRK_INS_BYTE(3), - } -}; +struct kgdb_arch arch_kgdb_ops; + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + + BUILD_BUG_ON(AARCH64_INSN_SIZE != BREAK_INSTR_SIZE); + + err = aarch64_insn_read((void *)bpt->bpt_addr, (u32 *)bpt->saved_instr); + if (err) + return err; + + return aarch64_insn_write((void *)bpt->bpt_addr, + (u32)AARCH64_BREAK_KGDB_DYN_DBG); +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + return aarch64_insn_write((void *)bpt->bpt_addr, + *(u32 *)bpt->saved_instr); +} -- cgit v1.2.3 From 2ed5c3f09627f72a2e0e407a86b2ac05494190f9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 16:22:47 -0700 Subject: sch_qfq: keep backlog updated with qlen Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f27ffee106f6..ca0516e6f743 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1153,6 +1153,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); @@ -1256,6 +1257,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } bstats_update(&cl->bstats, skb); + qdisc_qstats_backlog_inc(sch, skb); ++sch->q.qlen; agg = cl->agg; @@ -1476,6 +1478,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.2.3 From 3d4357fba82b3cf19ebf0a04d1c9cb086af15d02 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 18 Sep 2016 16:22:48 -0700 Subject: sch_sfb: keep backlog updated with qlen Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_sfb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index add3cc7d37ec..20a350bd1b1d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, enqueue: ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; increment_qlen(skb, q); } else if (net_xmit_drop_count(ret)) { @@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; decrement_qlen(skb, q); } @@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; -- cgit v1.2.3 From 2fe664f1fcf7c4da6891f95708a7a56d3c024354 Mon Sep 17 00:00:00 2001 From: Douglas Caetano dos Santos Date: Thu, 22 Sep 2016 15:52:04 -0300 Subject: tcp: fix wrong checksum calculation on MTU probing With TCP MTU probing enabled and offload TX checksumming disabled, tcp_mtu_probe() calculated the wrong checksum when a fragment being copied into the probe's SKB had an odd length. This was caused by the direct use of skb_copy_and_csum_bits() to calculate the checksum, as it pads the fragment being copied, if needed. When this fragment was not the last, a subsequent call used the previous checksum without considering this padding. The effect was a stale connection in one way, as even retransmissions wouldn't solve the problem, because the checksum was never recalculated for the full SKB length. Signed-off-by: Douglas Caetano dos Santos Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5288cec4a2b2..d48d5571e62a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1966,12 +1966,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. -- cgit v1.2.3 From 019b1c9fe32a2a32c1153e31375f87ec3e591273 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Sep 2016 17:54:00 -0700 Subject: tcp: fix a compile error in DBGUNDO() If DBGUNDO() is enabled (FASTRETRANS_DEBUG > 1), a compile error will happen, since inet6_sk(sk)->daddr became sk->sk_v6_daddr Fixes: efe4208f47f9 ("ipv6: make lookups simpler and faster") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08323bd95f2a..a756b8749a26 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2329,10 +2329,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); -- cgit v1.2.3 From 7e956304eb8a285304a78582e4537e72c6365f20 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 23 Sep 2016 15:13:53 +0100 Subject: MIPS: Fix pre-r6 emulation FPU initialisation In the mipsr2_decoder() function, used to emulate pre-MIPSr6 instructions that were removed in MIPSr6, the init_fpu() function is called if a removed pre-MIPSr6 floating point instruction is the first floating point instruction used by the task. However, init_fpu() performs varous actions that rely upon not being migrated. For example in the most basic case it sets the coprocessor 0 Status.CU1 bit to enable the FPU & then loads FP register context into the FPU registers. If the task were to migrate during this time, it may end up attempting to load FP register context on a different CPU where it hasn't set the CU1 bit, leading to errors such as: do_cpu invoked from kernel context![#2]: CPU: 2 PID: 7338 Comm: fp-prctl Tainted: G D 4.7.0-00424-g49b0c82 #2 task: 838e4000 ti: 88d38000 task.ti: 88d38000 $ 0 : 00000000 00000001 ffffffff 88d3fef8 $ 4 : 838e4000 88d38004 00000000 00000001 $ 8 : 3400fc01 801f8020 808e9100 24000000 $12 : dbffffff 807b69d8 807b0000 00000000 $16 : 00000000 80786150 00400fc4 809c0398 $20 : 809c0338 0040273c 88d3ff28 808e9d30 $24 : 808e9d30 00400fb4 $28 : 88d38000 88d3fe88 00000000 8011a2ac Hi : 0040273c Lo : 88d3ff28 epc : 80114178 _restore_fp+0x10/0xa0 ra : 8011a2ac mipsr2_decoder+0xd5c/0x1660 Status: 1400fc03 KERNEL EXL IE Cause : 1080002c (ExcCode 0b) PrId : 0001a920 (MIPS I6400) Modules linked in: Process fp-prctl (pid: 7338, threadinfo=88d38000, task=838e4000, tls=766527d0) Stack : 00000000 00000000 00000000 88d3fe98 00000000 00000000 809c0398 809c0338 808e9100 00000000 88d3ff28 00400fc4 00400fc4 0040273c 7fb69e18 004a0000 004a0000 004a0000 7664add0 8010de18 00000000 00000000 88d3fef8 88d3ff28 808e9100 00000000 766527d0 8010e534 000c0000 85755000 8181d580 00000000 00000000 00000000 004a0000 00000000 766527d0 7fb69e18 004a0000 80105c20 ... Call Trace: [<80114178>] _restore_fp+0x10/0xa0 [<8011a2ac>] mipsr2_decoder+0xd5c/0x1660 [<8010de18>] do_ri+0x90/0x6b8 [<80105c20>] ret_from_exception+0x0/0x10 Fix this by disabling preemption around the call to init_fpu(), ensuring that it starts & completes on one CPU. Signed-off-by: Paul Burton Fixes: b0a668fb2038 ("MIPS: kernel: mips-r2-to-r6-emul: Add R2 emulator for MIPS R6") Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.0+ Patchwork: https://patchwork.linux-mips.org/patch/14305/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips-r2-to-r6-emul.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index c3372cac6db2..0a7e10b5f9e3 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1164,7 +1164,9 @@ fpu_emul: regs->regs[31] = r31; regs->cp0_epc = epc; if (!used_math()) { /* First time FPU user. */ + preempt_disable(); err = init_fpu(); + preempt_enable(); set_used_math(); } lose_fpu(1); /* Save FPU state for the emulator. */ -- cgit v1.2.3 From c8712c6a674e3382fe4d26d108251ccfa55d08e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Sep 2016 10:25:48 -0600 Subject: blk-mq: skip unmapped queues in blk_mq_alloc_request_hctx This provides the caller a feedback that a given hctx is not mapped and thus no command can be sent on it. Signed-off-by: Christoph Hellwig Tested-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-mq.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 13f5a6c1de76..c207fa9870eb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -296,17 +296,29 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, if (ret) return ERR_PTR(ret); + /* + * Check if the hardware context is actually mapped to anything. + * If not tell the caller that it should skip this queue. + */ hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(hctx)) { + ret = -EXDEV; + goto out_queue_exit; + } ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw, 0); if (!rq) { - blk_queue_exit(q); - return ERR_PTR(-EWOULDBLOCK); + ret = -EWOULDBLOCK; + goto out_queue_exit; } return rq; + +out_queue_exit: + blk_queue_exit(q); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -- cgit v1.2.3 From 9157056da8f8c4a6305f15619e269f164b63a6de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 23 Sep 2016 16:55:49 -0400 Subject: cgroup: fix invalid controller enable rejections with cgroup namespace On the v2 hierarchy, "cgroup.subtree_control" rejects controller enables if the cgroup has processes in it. The enforcement of this logic assumes that the cgroup wouldn't have any css_sets associated with it if there are no tasks in the cgroup, which is no longer true since a79a908fd2b0 ("cgroup: introduce cgroup namespaces"). When a cgroup namespace is created, it pins the css_set of the creating task to use it as the root css_set of the namespace. This extra reference stays as long as the namespace is around and makes "cgroup.subtree_control" think that the namespace root cgroup is not empty even when it is and thus reject controller enables. Fix it by making cgroup_subtree_control() walk and test emptiness of each css_set instead of testing whether the list_head is empty. While at it, update the comment of cgroup_task_count() to indicate that the returned value may be higher than the number of tasks, which has always been true due to temporary references and doesn't break anything. Signed-off-by: Tejun Heo Reported-by: Evgeny Vereshchagin Cc: Serge E. Hallyn Cc: Aditya Kali Cc: Eric W. Biederman Cc: stable@vger.kernel.org # v4.6+ Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") Link: https://github.com/systemd/systemd/pull/3589#issuecomment-249089541 --- kernel/cgroup.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..0d4ee1ea5c31 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * Except for the root, subtree_control must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ - if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { - ret = -EBUSY; - goto out_unlock; + if (enable && cgroup_parent(cgrp)) { + struct cgrp_cset_link *link; + + /* + * Because namespaces pin csets too, @cgrp->cset_links + * might not be empty even when @cgrp is empty. Walk and + * verify each cset. + */ + spin_lock_irq(&css_set_lock); + + ret = 0; + list_for_each_entry(link, &cgrp->cset_links, cset_link) { + if (css_set_populated(link->cset)) { + ret = -EBUSY; + break; + } + } + + spin_unlock_irq(&css_set_lock); + + if (ret) + goto out_unlock; } /* save and update control masks and prepare csses */ @@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile) * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question * - * Return the number of tasks in the cgroup. + * Return the number of tasks in the cgroup. The returned number can be + * higher than the actual number of tasks due to css_set references from + * namespace roots and temporary usages. */ static int cgroup_task_count(const struct cgroup *cgrp) { -- cgit v1.2.3 From db32e4e49ce2b0e5fcc17803d011a401c0a637f6 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Fri, 23 Sep 2016 15:50:29 -0400 Subject: ip6_gre: fix flowi6_proto value in ip6gre_xmit_other() Similar to commit 3be07244b733 ("ip6_gre: fix flowi6_proto value in xmit path"), set flowi6_proto to IPPROTO_GRE for output route lookup. Up until now, ip6gre_xmit_other() has set flowi6_proto to a bogus value. This affected output route lookup for packets sent on an ip6gretap device in cases where routing was dependent on the value of flowi6_proto. Since the correct proto is already set in the tunnel flowi6 template via commit 252f3f5a1189 ("ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path."), simply delete the line setting the incorrect flowi6_proto value. Suggested-by: Jiri Benc Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reviewed-by: Shmulik Ladkani Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 704274cbd495..edc3daab354e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -648,7 +648,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) -- cgit v1.2.3 From 3089bf614c7e2fd441ee001e3ff3d18326f6f091 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 23 Sep 2016 20:21:56 -0700 Subject: shmem: fix tmpfs to handle the huge= option properly shmem_get_unmapped_area() checks SHMEM_SB(sb)->huge incorrectly, which leads to a reversed effect of "huge=" mount option. Fix the check in shmem_get_unmapped_area(). Note, the default value of SHMEM_SB(sb)->huge remains as SHMEM_HUGE_NEVER. User will need to specify "huge=" option to enable huge page mappings. Reported-by: Hillf Danton Signed-off-by: Toshi Kani Acked-by: Kirill A. Shutemov Reviewed-by: Aneesh Kumar K.V Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index fd8b2b5741b1..aec5b492d947 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1980,7 +1980,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, return addr; sb = shm_mnt->mnt_sb; } - if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER) + if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER) return addr; } -- cgit v1.2.3 From 71664665c3e3ca5ff61ef5fc65480f82cd575eb2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Sep 2016 20:24:23 -0700 Subject: huge tmpfs: fix Committed_AS leak Under swapping load on huge tmpfs, /proc/meminfo's Committed_AS grows bigger and bigger: just a cosmetic issue for most users, but disabling for those who run without overcommit (/proc/sys/vm/overcommit_memory 2). shmem_uncharge() was forgetting to unaccount __vm_enough_memory's charge, and shmem_charge() was forgetting it on the filesystem-full error path. Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/shmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index aec5b492d947..971fc83e6402 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -270,7 +270,7 @@ bool shmem_charge(struct inode *inode, long pages) info->alloced -= pages; shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - + shmem_unacct_blocks(info->flags, pages); return false; } percpu_counter_add(&sbinfo->used_blocks, pages); @@ -291,6 +291,7 @@ void shmem_uncharge(struct inode *inode, long pages) if (sbinfo->max_blocks) percpu_counter_sub(&sbinfo->used_blocks, pages); + shmem_unacct_blocks(info->flags, pages); } /* -- cgit v1.2.3 From b385d21f27d86426472f6ae92a231095f7de2a8d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Sep 2016 20:27:04 -0700 Subject: mm: delete unnecessary and unsafe init_tlb_ubc() init_tlb_ubc() looked unnecessary to me: tlb_ubc is statically initialized with zeroes in the init_task, and copied from parent to child while it is quiescent in arch_dup_task_struct(); so I went to delete it. But inserted temporary debug WARN_ONs in place of init_tlb_ubc() to check that it was always empty at that point, and found them firing: because memcg reclaim can recurse into global reclaim (when allocating biosets for swapout in my case), and arrive back at the init_tlb_ubc() in shrink_node_memcg(). Resetting tlb_ubc.flush_required at that point is wrong: if the upper level needs a deferred TLB flush, but the lower level turns out not to, we miss a TLB flush. But fortunately, that's the only part of the protocol that does not nest: with the initialization removed, cpumask collects bits from upper and lower levels, and flushes TLB when needed. Fixes: 72b252aed506 ("mm: send one IPI per CPU to TLB flush all entries after unmapping pages") Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Linus Torvalds --- mm/vmscan.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index b1e12a1ea9cf..0fe8b7113868 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2303,23 +2303,6 @@ out: } } -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH -static void init_tlb_ubc(void) -{ - /* - * This deliberately does not clear the cpumask as it's expensive - * and unnecessary. If there happens to be data in there then the - * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and - * then will be cleared. - */ - current->tlb_ubc.flush_required = false; -} -#else -static inline void init_tlb_ubc(void) -{ -} -#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ - /* * This is a basic per-node page freer. Used by both kswapd and direct reclaim. */ @@ -2355,8 +2338,6 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() && sc->priority == DEF_PRIORITY); - init_tlb_ubc(); - blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { -- cgit v1.2.3 From 595c73071e6641e59b83911fbb4026e767471000 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 23 Sep 2016 17:53:52 -0700 Subject: libnvdimm, region: fix flush hint table thinko The definition of the flush hint table as: void __iomem *flush_wpq[0][0]; ...passed the unit test, but is broken as flush_wpq[0][1] and flush_wpq[1][0] refer to the same entry. Fix this to use a helper that calculates a slot in the table based on the geometry of flush hints in the region. This is important to get right since virtualization solutions use this mechanism to trigger hypervisor flushes to platform persistence. Reported-by: Dave Jiang Tested-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/nd.h | 22 ++++++++++++++++++++-- drivers/nvdimm/region_devs.c | 20 ++++++++++++-------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8024a0ef86d3..0b78a8211f4a 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -52,10 +52,28 @@ struct nvdimm_drvdata { struct nd_region_data { int ns_count; int ns_active; - unsigned int flush_mask; - void __iomem *flush_wpq[0][0]; + unsigned int hints_shift; + void __iomem *flush_wpq[0]; }; +static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, + int dimm, int hint) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + return ndrd->flush_wpq[dimm * num + (hint & mask)]; +} + +static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm, + int hint, void __iomem *flush) +{ + unsigned int num = 1 << ndrd->hints_shift; + unsigned int mask = num - 1; + + ndrd->flush_wpq[dimm * num + (hint & mask)] = flush; +} + static inline struct nd_namespace_index *to_namespace_index( struct nvdimm_drvdata *ndd, int i) { diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 4eef88eb5144..4c0ac4abb629 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -38,7 +38,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); - for (i = 0; i < nvdimm->num_flush; i++) { + for (i = 0; i < (1 << ndrd->hints_shift); i++) { struct resource *res = &nvdimm->flush_wpq[i]; unsigned long pfn = PHYS_PFN(res->start); void __iomem *flush_page; @@ -54,14 +54,15 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, if (j < i) flush_page = (void __iomem *) ((unsigned long) - ndrd->flush_wpq[dimm][j] & PAGE_MASK); + ndrd_get_flush_wpq(ndrd, dimm, j) + & PAGE_MASK); else flush_page = devm_nvdimm_ioremap(dev, PFN_PHYS(pfn), PAGE_SIZE); if (!flush_page) return -ENXIO; - ndrd->flush_wpq[dimm][i] = flush_page - + (res->start & ~PAGE_MASK); + ndrd_set_flush_wpq(ndrd, dimm, i, flush_page + + (res->start & ~PAGE_MASK)); } return 0; @@ -93,7 +94,10 @@ int nd_region_activate(struct nd_region *nd_region) return -ENOMEM; dev_set_drvdata(dev, ndrd); - ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; + if (!num_flush) + return 0; + + ndrd->hints_shift = ilog2(num_flush); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; @@ -900,8 +904,8 @@ void nvdimm_flush(struct nd_region *nd_region) */ wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) - if (ndrd->flush_wpq[i][0]) - writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); + if (ndrd_get_flush_wpq(ndrd, i, 0)) + writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); wmb(); } EXPORT_SYMBOL_GPL(nvdimm_flush); @@ -925,7 +929,7 @@ int nvdimm_has_flush(struct nd_region *nd_region) for (i = 0; i < nd_region->ndr_mappings; i++) /* flush hints present, flushing required */ - if (ndrd->flush_wpq[i][0]) + if (ndrd_get_flush_wpq(ndrd, i, 0)) return 1; /* -- cgit v1.2.3 From 8f46cca1e6c06a058374816887059bcc017b382f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Thu, 22 Sep 2016 17:15:47 +0100 Subject: MIPS: SMP: Fix possibility of deadlock when bringing CPUs online This patch fixes the possibility of a deadlock when bringing up secondary CPUs. The deadlock occurs because the set_cpu_online() is called before synchronise_count_slave(). This can cause a deadlock if the boot CPU, having scheduled another thread, attempts to send an IPI to the secondary CPU, which it sees has been marked online. The secondary is blocked in synchronise_count_slave() waiting for the boot CPU to enter synchronise_count_master(), but the boot cpu is blocked in smp_call_function_many() waiting for the secondary to respond to it's IPI request. Fix this by marking the CPU online in cpu_callin_map and synchronising counters before declaring the CPU online and calculating the maps for IPIs. Signed-off-by: Matt Redfearn Reported-by: Justin Chen Tested-by: Justin Chen Cc: Florian Fainelli Cc: stable@vger.kernel.org # v4.1+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14302/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f95f094f36e4..b0baf48951fa 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -322,6 +322,9 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); + cpumask_set_cpu(cpu, &cpu_callin_map); + synchronise_count_slave(cpu); + set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -329,10 +332,6 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); - cpumask_set_cpu(cpu, &cpu_callin_map); - - synchronise_count_slave(cpu); - /* * irq will be enabled in ->smp_finish(), enabling it too early * is dangerous. -- cgit v1.2.3 From 116e7111c8e3cc65ceef9664741bd593483e9517 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 22 Sep 2016 15:47:40 +0100 Subject: MIPS: Fix delay slot emulation count in debugfs Commit 432c6bacbd0c ("MIPS: Use per-mm page to execute branch delay slot instructions") accidentally removed use of the MIPS_FPU_EMU_INC_STATS macro from do_dsemulret, leading to the ds_emul file in debugfs always returning zero even though we perform delay slot emulations. Fix this by re-adding the use of the MIPS_FPU_EMU_INC_STATS macro. Signed-off-by: Paul Burton Fixes: 432c6bacbd0c ("MIPS: Use per-mm page to execute branch delay slot instructions") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14301/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/dsemul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 72a4642eee2c..4a094f7acb3d 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -298,5 +298,6 @@ bool do_dsemulret(struct pt_regs *xcp) /* Set EPC to return to post-branch instruction */ xcp->cp0_epc = current->thread.bd_emu_cont_pc; pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); + MIPS_FPU_EMU_INC_STATS(ds_emul); return true; } -- cgit v1.2.3 From 1245800c0f96eb6ebb368593e251d66c01e61022 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 23 Sep 2016 22:57:13 -0400 Subject: tracing: Move mutex to protect against resetting of seq data The iter->seq can be reset outside the protection of the mutex. So can reading of user data. Move the mutex up to the beginning of the function. Fixes: d7350c3f45694 ("tracing/core: make the read callbacks reentrants") Cc: stable@vger.kernel.org # 2.6.30+ Reported-by: Al Viro Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a4bd6b68a0b..8fb4847b0450 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4890,19 +4890,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; ssize_t sret; - /* return any leftover data */ - sret = trace_seq_to_user(&iter->seq, ubuf, cnt); - if (sret != -EBUSY) - return sret; - - trace_seq_init(&iter->seq); - /* * Avoid more than one consumer on a single file descriptor * This is just a matter of traces coherency, the ring buffer itself * is protected. */ mutex_lock(&iter->mutex); + + /* return any leftover data */ + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) + goto out; + + trace_seq_init(&iter->seq); + if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) -- cgit v1.2.3 From 1ae2293dd6d2f5c823cf97e60b70d03631cd622f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Sep 2016 18:31:46 -0400 Subject: fix memory leaks in tracing_buffers_splice_read() Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- kernel/trace/trace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8fb4847b0450..77eeab2776ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5930,9 +5930,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, return -EBUSY; #endif - if (splice_grow_spd(pipe, &spd)) - return -ENOMEM; - if (*ppos & (PAGE_SIZE - 1)) return -EINVAL; @@ -5942,6 +5939,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + again: trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); @@ -5999,19 +5999,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { if (ret) - return ret; + goto out; + ret = -EAGAIN; if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) - return -EAGAIN; + goto out; ret = wait_on_pipe(iter, true); if (ret) - return ret; + goto out; goto again; } ret = splice_to_pipe(pipe, &spd); +out: splice_shrink_spd(&spd); return ret; -- cgit v1.2.3 From 62fd5258ebe3ea240371234955a6e2cc99e0b6c3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 22 Sep 2016 11:53:34 -0700 Subject: radix tree test suite: Test radix_tree_replace_slot() for multiorder entries When we replace a multiorder entry, check that all indices reflect the new value. Also, compile the test suite with -O2, which shows other problems with the code due to some dodgy pointer operations in the radix tree code. Signed-off-by: Matthew Wilcox Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- tools/testing/radix-tree/multiorder.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 3b530467148e..9d0919ed52a4 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,5 +1,5 @@ -CFLAGS += -I. -g -Wall -D_LGPL_SOURCE +CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 39d9b9568fe2..05d7bc488971 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -124,6 +124,8 @@ static void multiorder_check(unsigned long index, int order) unsigned long i; unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); + void **slot; + struct item *item2 = item_create(min); RADIX_TREE(tree, GFP_KERNEL); printf("Multiorder index %ld, order %d\n", index, order); @@ -139,13 +141,19 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); for (i = max; i < 2*max; i++) item_check_absent(&tree, i); + for (i = min; i < max; i++) + assert(radix_tree_insert(&tree, i, item2) == -EEXIST); + + slot = radix_tree_lookup_slot(&tree, index); + free(*slot); + radix_tree_replace_slot(slot, item2); for (i = min; i < max; i++) { - static void *entry = (void *) - (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY); - assert(radix_tree_insert(&tree, i, entry) == -EEXIST); + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == min); } - assert(item_delete(&tree, index) != 0); + assert(item_delete(&tree, min) != 0); for (i = 0; i < 2*max; i++) item_check_absent(&tree, i); -- cgit v1.2.3 From 8d2c0d36d6826ddc3114801c599619d3f2932f0a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Sep 2016 13:32:46 -0700 Subject: radix tree: fix sibling entry handling in radix_tree_descend() The fixes to the radix tree test suite show that the multi-order case is broken. The basic reason is that the radix tree code uses tagged pointers with the "internal" bit in the low bits, and calculating the pointer indices was supposed to mask off those bits. But gcc will notice that we then use the index to re-create the pointer, and will avoid doing the arithmetic and use the tagged pointer directly. This cleans the code up, using the existing is_sibling_entry() helper to validate the sibling pointer range (instead of open-coding it), and using entry_to_node() to mask off the low tag bit from the pointer. And once you do that, you might as well just use the now cleaned-up pointer directly. [ Side note: the multi-order code isn't actually ever used in the kernel right now, and the only reason I didn't just delete all that code is that Kirill Shutemov piped up and said: "Well, my ext4-with-huge-pages patchset[1] uses multi-order entries. It also converts shmem-with-huge-pages and hugetlb to them. I'm okay with converting it to other mechanism, but I need something. (I looked into Konstantin's RFC patchset[2]. It looks okay, but I don't feel myself qualified to review it as I don't know much about radix-tree internals.)" [1] http://lkml.kernel.org/r/20160915115523.29737-1-kirill.shutemov@linux.intel.com [2] http://lkml.kernel.org/r/147230727479.9957.1087787722571077339.stgit@zurg ] Reported-by: Matthew Wilcox Cc: Andrew Morton Cc: Ross Zwisler Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Cedric Blancher Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1b7bf7314141..91f0727e3cad 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -105,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent, #ifdef CONFIG_RADIX_TREE_MULTIORDER if (radix_tree_is_internal_node(entry)) { - unsigned long siboff = get_slot_offset(parent, entry); - if (siboff < RADIX_TREE_MAP_SIZE) { - offset = siboff; - entry = rcu_dereference_raw(parent->slots[offset]); + if (is_sibling_entry(parent, entry)) { + void **sibentry = (void **) entry_to_node(entry); + offset = get_slot_offset(parent, sibentry); + entry = rcu_dereference_raw(*sibentry); } } #endif -- cgit v1.2.3 From 38e088546522e1e86d2b8f401a1354ad3a9b3303 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 11 Sep 2016 23:54:25 +0100 Subject: mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing The NUMA balancing logic uses an arch-specific PROT_NONE page table flag defined by pte_protnone() or pmd_protnone() to mark PTEs or huge page PMDs respectively as requiring balancing upon a subsequent page fault. User-defined PROT_NONE memory regions which also have this flag set will not normally invoke the NUMA balancing code as do_page_fault() will send a segfault to the process before handle_mm_fault() is even called. However if access_remote_vm() is invoked to access a PROT_NONE region of memory, handle_mm_fault() is called via faultin_page() and __get_user_pages() without any access checks being performed, meaning the NUMA balancing logic is incorrectly invoked on a non-NUMA memory region. A simple means of triggering this problem is to access PROT_NONE mmap'd memory using /proc/self/mem which reliably results in the NUMA handling functions being invoked when CONFIG_NUMA_BALANCING is set. This issue was reported in bugzilla (issue 99101) which includes some simple repro code. There are BUG_ON() checks in do_numa_page() and do_huge_pmd_numa_page() added at commit c0e7cad to avoid accidentally provoking strange behaviour by attempting to apply NUMA balancing to pages that are in fact PROT_NONE. The BUG_ON()'s are consistently triggered by the repro. This patch moves the PROT_NONE check into mm/memory.c rather than invoking BUG_ON() as faulting in these pages via faultin_page() is a valid reason for reaching the NUMA check with the PROT_NONE page table flag set and is therefore not always a bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99101 Reported-by: Trevor Saunders Signed-off-by: Lorenzo Stoakes Acked-by: Rik van Riel Cc: Andrew Morton Cc: Mel Gorman Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 3 --- mm/memory.c | 12 +++++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a6abd76baa72..53ae6d00656a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1138,9 +1138,6 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) bool was_writable; int flags = 0; - /* A PROT_NONE fault should not end up here */ - BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); - fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); if (unlikely(!pmd_same(pmd, *fe->pmd))) goto out_unlock; diff --git a/mm/memory.c b/mm/memory.c index 83be99d9d8a1..793fe0f9841c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3351,9 +3351,6 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) bool was_writable = pte_write(pte); int flags = 0; - /* A PROT_NONE fault should not end up here */ - BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); - /* * The "pte" at this point cannot be used safely without * validation through pte_unmap_same(). It's of NUMA type but @@ -3458,6 +3455,11 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) return VM_FAULT_FALLBACK; } +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); +} + /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -3524,7 +3526,7 @@ static int handle_pte_fault(struct fault_env *fe) if (!pte_present(entry)) return do_swap_page(fe, entry); - if (pte_protnone(entry)) + if (pte_protnone(entry) && vma_is_accessible(fe->vma)) return do_numa_page(fe, entry); fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd); @@ -3590,7 +3592,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, barrier(); if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { - if (pmd_protnone(orig_pmd)) + if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&fe, orig_pmd); if ((fe.flags & FAULT_FLAG_WRITE) && -- cgit v1.2.3 From 90b75db6498a19da96dac4b55c909ff3721f3045 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 26 Sep 2016 09:57:33 +1000 Subject: fault_in_multipages_readable() throws set-but-unused error When building XFS with -Werror, it now fails with: include/linux/pagemap.h: In function 'fault_in_multipages_readable': include/linux/pagemap.h:602:16: error: variable 'c' set but not used [-Werror=unused-but-set-variable] volatile char c; ^ This is a regression caused by commit e23d4159b109 ("fix fault_in_multipages_...() on architectures with no-op access_ok()"). Fix it by re-adding the "(void)c" trick taht was previously used to make the compiler think the variable is used. Signed-off-by: Dave Chinner Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7e3d53753612..01e84436cddf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -620,6 +620,7 @@ static inline int fault_in_multipages_readable(const char __user *uaddr, return __get_user(c, end); } + (void)c; return 0; } -- cgit v1.2.3 From 08895a8b6b06ed2323cd97a36ee40a116b3db8ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Sep 2016 18:47:13 -0700 Subject: Linux 4.8-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 74e22c2f408b..ce2ddb3fec98 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.2.3 From 2cf750704bb6d7ed8c7d732e071dd1bc890ea5e8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 25 Sep 2016 23:08:31 +0200 Subject: ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid instead of the previous dst_pid which was copied from in_skb's portid. Since the skb is new the portid is 0 at that point so the packets are sent to the kernel and we get scheduling while atomic or a deadlock (depending on where it happens) by trying to acquire rtnl two times. Also since this is RTM_GETROUTE, it can be triggered by a normal user. Here's the sleeping while atomic trace: [ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 7858.212881] 2 locks held by swapper/0/0: [ 7858.213013] #0: (((&mrt->ipmr_expire_timer))){+.-...}, at: [] call_timer_fn+0x5/0x350 [ 7858.213422] #1: (mfc_unres_lock){+.....}, at: [] ipmr_expire_process+0x25/0x130 [ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179 [ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 7858.214108] 0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000 [ 7858.214412] ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e [ 7858.214716] 000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f [ 7858.215251] Call Trace: [ 7858.215412] [] dump_stack+0x85/0xc1 [ 7858.215662] [] ___might_sleep+0x192/0x250 [ 7858.215868] [] __might_sleep+0x6f/0x100 [ 7858.216072] [] mutex_lock_nested+0x33/0x4d0 [ 7858.216279] [] ? netlink_lookup+0x25f/0x460 [ 7858.216487] [] rtnetlink_rcv+0x1b/0x40 [ 7858.216687] [] netlink_unicast+0x19c/0x260 [ 7858.216900] [] rtnl_unicast+0x20/0x30 [ 7858.217128] [] ipmr_destroy_unres+0xa9/0xf0 [ 7858.217351] [] ipmr_expire_process+0x8f/0x130 [ 7858.217581] [] ? ipmr_net_init+0x180/0x180 [ 7858.217785] [] ? ipmr_net_init+0x180/0x180 [ 7858.217990] [] call_timer_fn+0xa5/0x350 [ 7858.218192] [] ? call_timer_fn+0x5/0x350 [ 7858.218415] [] ? ipmr_net_init+0x180/0x180 [ 7858.218656] [] run_timer_softirq+0x260/0x640 [ 7858.218865] [] ? __do_softirq+0xbb/0x54f [ 7858.219068] [] __do_softirq+0xe8/0x54f [ 7858.219269] [] irq_exit+0xb8/0xc0 [ 7858.219463] [] smp_apic_timer_interrupt+0x42/0x50 [ 7858.219678] [] apic_timer_interrupt+0x8c/0xa0 [ 7858.219897] [] ? native_safe_halt+0x6/0x10 [ 7858.220165] [] ? trace_hardirqs_on+0xd/0x10 [ 7858.220373] [] default_idle+0x23/0x190 [ 7858.220574] [] arch_cpu_idle+0xf/0x20 [ 7858.220790] [] default_idle_call+0x4c/0x60 [ 7858.221016] [] cpu_startup_entry+0x39b/0x4d0 [ 7858.221257] [] rest_init+0x135/0x140 [ 7858.221469] [] start_kernel+0x50e/0x51b [ 7858.221670] [] ? early_idt_handler_array+0x120/0x120 [ 7858.221894] [] x86_64_start_reservations+0x2a/0x2c [ 7858.222113] [] x86_64_start_kernel+0x13b/0x14a Fixes: 2942e9005056 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- net/ipv4/ipmr.c | 3 ++- net/ipv4/route.c | 3 ++- net/ipv6/ip6mr.c | 5 +++-- net/ipv6/route.c | 4 +++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d351fd3e1049..e5fb81376e92 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -120,5 +120,5 @@ struct mfc_cache { struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3987b64040c5..19a1c0c2993b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -116,7 +116,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a87bcd2d4a94..5f006e13de56 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2123,7 +2123,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2168,6 +2168,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5b47a26d4ec..62c3ed0b7556 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2503,7 +2503,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fccb5dd91902..7f4265b1649b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2285,8 +2285,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2331,6 +2331,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3a224b97905..269218aacbea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3202,7 +3202,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) -- cgit v1.2.3 From 8d58881b995904bf8b150dae69be0829f832e7be Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Sep 2016 14:20:54 -0700 Subject: scsi: Avoid that toggling use_blk_mq triggers a memory leak This patch avoids that the following memory leak is triggered if use_blk_mq is disabled after a SCSI host has been allocated by the ib_srp driver and before the same SCSI host is freed: unreferenced object 0xffff8803a168c568 (size 256): backtrace: [] kmemleak_alloc+0x45/0xa0 [] __kmalloc_node+0x1e4/0x400 [] blk_mq_alloc_tag_set+0xb4/0x230 [] scsi_mq_setup_tags+0xc7/0xd0 [] scsi_add_host_with_dma+0x216/0x2d0 [] srp_create_target+0xe55/0x13d0 [ib_srp] [] dev_attr_store+0x13/0x20 [] sysfs_kf_write+0x40/0x50 [] kernfs_fop_write+0x137/0x1c0 [] __vfs_write+0x23/0x140 [] vfs_write+0xb0/0x190 [] SyS_write+0x44/0xa0 [] entry_SYSCALL_64_fastpath+0x18/0xa8 Fixes: 9aa9cc4221f5 ("scsi: remove the disable_blk_mq host flag") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 2 ++ drivers/scsi/scsi.c | 1 - drivers/scsi/scsi_priv.h | 1 + include/scsi/scsi_host.h | 5 +---- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ba9af4a2bd2a..ec6381e57eb7 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -486,6 +486,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; + shost->use_blk_mq = scsi_use_blk_mq; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1f36aca44394..1deb6adc411f 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1160,7 +1160,6 @@ bool scsi_use_blk_mq = true; bool scsi_use_blk_mq = false; #endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); -EXPORT_SYMBOL_GPL(scsi_use_blk_mq); static int __init init_scsi(void) { diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 57a4b9973320..85c8a51bc563 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -29,6 +29,7 @@ extern int scsi_init_hosts(void); extern void scsi_exit_hosts(void); /* scsi.c */ +extern bool scsi_use_blk_mq; extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); #ifdef CONFIG_SCSI_LOGGING diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 0dee7afa93d6..7e4cd53139ed 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -771,12 +771,9 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost) shost->tmf_in_progress; } -extern bool scsi_use_blk_mq; - static inline bool shost_use_blk_mq(struct Scsi_Host *shost) { - return scsi_use_blk_mq; - + return shost->use_blk_mq; } extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); -- cgit v1.2.3 From 9fb6de1b0bf4ec11573b76059da3c3b39ac7f2ff Mon Sep 17 00:00:00 2001 From: Ville Ranki Date: Thu, 15 Sep 2016 15:31:27 -0700 Subject: Input: joydev - recognize devices with Z axis as joysticks Current implementation of joydev's input_device_id table recognizes only devices with ABS_X, ABS_WHEEL or ABS_THROTTLE axes as joysticks. There are joystick devices that do not have those axes, for example TRC Rudder device. The device in question has ABS_Z, ABS_RX and ABS_RY axes causing it not being detected as joystick. This patch adds ABS_Z to the input_device_id list allowing devices with ABS_Z axis to be detected correctly. Signed-off-by: Ville Ranki Signed-off-by: Dmitry Torokhov --- drivers/input/joydev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 5d11fea3c8ec..f3135ae22df4 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -946,6 +946,12 @@ static const struct input_device_id joydev_ids[] = { .evbit = { BIT_MASK(EV_ABS) }, .absbit = { BIT_MASK(ABS_X) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_ABSBIT, + .evbit = { BIT_MASK(EV_ABS) }, + .absbit = { BIT_MASK(ABS_Z) }, + }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, -- cgit v1.2.3 From 653d37d8bca0be2c7c53c8578980b06840482427 Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:17 -0700 Subject: net: fec: remove QUIRK_HAS_RACC from i.mx25 According to the i.MX25 reference manual, this SoC does not have support for the receive accelerator (RACC) register at offset 0x1C4. http://www.nxp.com/files/dsp/doc/ref_manual/IMX25RM.pdf Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 01f7e811739b..3019f27b9ef8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -89,7 +89,7 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC, + .driver_data = FEC_QUIRK_USE_GASKET, }, { .name = "imx27-fec", .driver_data = FEC_QUIRK_HAS_RACC, -- cgit v1.2.3 From 97dc499c1ae396bc0d529f5b1aef3138de1ae7de Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:18 -0700 Subject: net: fec: remove QUIRK_HAS_RACC from i.mx27 According to the i.MX27 reference manual, this SoC does not have support for the receive accelerator (RACC) register at offset 0x1C4. http://cache.nxp.com/files/32bit/doc/ref_manual/MCIMX27RM.pdf Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3019f27b9ef8..3fb11f929adc 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -92,7 +92,7 @@ static struct platform_device_id fec_devtype[] = { .driver_data = FEC_QUIRK_USE_GASKET, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_HAS_RACC, + .driver_data = 0, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | -- cgit v1.2.3 From 3ac72b7b63d57b231dda1e8f8d13872e0d7e8603 Mon Sep 17 00:00:00 2001 From: Eric Nelson Date: Sat, 24 Sep 2016 07:42:19 -0700 Subject: net: fec: align IP header in hardware The FEC receive accelerator (RACC) supports shifting the data payload of received packets by 16-bits, which aligns the payload (IP header) on a 4-byte boundary, which is, if not required, at least strongly suggested by the Linux networking layer. Without this patch, a huge number of alignment faults will be taken by the IP stack, as seen in /proc/cpu/alignment: ~/$ cat /proc/cpu/alignment User: 0 System: 72645 (inet_gro_receive+0x104/0x27c) Skipped: 0 Half: 0 Word: 0 DWord: 0 Multi: 72645 User faults: 3 (fixup+warn) This patch was suggested by Andrew Lunn in this message to linux-netdev: http://marc.info/?l=linux-arm-kernel&m=147465452108384&w=2 and adapted from a patch by Russell King from 2014: http://git.arm.linux.org.uk/cgit/linux-arm.git/commit/?id=70d8a8a Signed-off-by: Eric Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3fb11f929adc..692ee248e486 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -180,6 +180,7 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); /* FEC receive acceleration */ #define FEC_RACC_IPDIS (1 << 1) #define FEC_RACC_PRODIS (1 << 2) +#define FEC_RACC_SHIFT16 BIT(7) #define FEC_RACC_OPTIONS (FEC_RACC_IPDIS | FEC_RACC_PRODIS) /* @@ -945,9 +946,11 @@ fec_restart(struct net_device *ndev) #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) { - /* set RX checksum */ val = readl(fep->hwp + FEC_RACC); + /* align IP header */ + val |= FEC_RACC_SHIFT16; if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) + /* set RX checksum */ val |= FEC_RACC_OPTIONS; else val &= ~FEC_RACC_OPTIONS; @@ -1428,6 +1431,12 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) prefetch(skb->data - NET_IP_ALIGN); skb_put(skb, pkt_len - 4); data = skb->data; + +#if !defined(CONFIG_M5272) + if (fep->quirks & FEC_QUIRK_HAS_RACC) + data = skb_pull_inline(skb, 2); +#endif + if (!is_copybreak && need_swap) swap_buffer(data, pkt_len); -- cgit v1.2.3 From bf1a85a8381a0f749aa321d7881b456b36eb398d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 24 Sep 2016 12:58:30 -0700 Subject: Revert "net: ethernet: bcmgenet: use phydev from struct net_device" This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from struct net_device") because it causes GENETv1/2/3 adapters to expose the following behavior after an ifconfig down/up sequence: PING fainelli-linux (10.112.156.244): 56 data bytes 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.352 ms 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.472 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.496 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.517 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.536 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.557 ms (DUP!) 64 bytes from 10.112.156.244: seq=1 ttl=61 time=752.448 ms (DUP!) This was previously fixed by commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on") but the commit we are reverting was essentially making this previous commit void, here is why. Without commit 62469c76007e we would have the following scenario after an ifconfig down then up sequence: - bcmgenet_open() calls bcmgenet_power_up() to make sure the PHY is initialized *before* we get to initialize the UniMAC, this is critical to ensure the PHY is in a correct state, priv->phydev is valid, this code executes fine - second time from bcmgenet_mii_probe(), through the normal phy_init_hw() call (which arguably could be optimized out) Everything is fine in that case. With commit 62469c76007e, we would have the following scenario to happen after an ifconfig down then up sequence: - bcmgenet_close() calls phy_disonnect() which makes dev->phydev become NULL - when bcmgenet_open() executes again and calls bcmgenet_mii_reset() from bcmgenet_power_up() to initialize the internal PHY, the NULL check becomes true, so we do not reset the PHY, yet we keep going on and initialize the UniMAC, causing MAC activity to occur - we call bcmgenet_mii_reset() from bcmgenet_mii_probe(), but this is too late, the PHY is botched, and causes the above bogus pings/packets transmission/reception to occur Reported-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 45 ++++++++++++++------------ drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 24 +++++++------- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 8d4f8495dbb3..541456398dfb 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, static int bcmgenet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(dev->phydev, cmd); + return phy_ethtool_gset(priv->phydev, cmd); } static int bcmgenet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) + if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(dev->phydev, cmd); + return phy_ethtool_sset(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return phy_ethtool_get_eee(priv->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(priv->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(dev->phydev, e); + return phy_ethtool_set_eee(priv->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - return genphy_restart_aneg(dev->phydev); + struct bcmgenet_priv *priv = netdev_priv(dev); + + return genphy_restart_aneg(priv->phydev); } /* standard ethtool support functions. */ @@ -996,13 +1002,12 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { - struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(ndev->phydev); + phy_detach(priv->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) + if (!priv->phydev) val = -ENODEV; else - val = phy_mii_ioctl(dev->phydev, rq, cmd); + val = phy_mii_ioctl(priv->phydev, rq, cmd); break; default: @@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); - struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(ndev->phydev, + phy_mac_interrupt(priv->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2833,7 +2838,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(dev->phydev); + phy_start(priv->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2932,7 +2937,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(dev->phydev); + phy_stop(priv->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2958,7 +2963,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(dev->phydev); + phy_disconnect(priv->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3517,7 +3522,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(dev->phydev); + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3581,7 +3586,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); + phy_init_hw(priv->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3614,7 +3619,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(dev->phydev); + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f0868c56f05..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,6 +597,7 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; + struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e907acd81da9..457c3bc8cfff 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (dev->phydev) { - phy_init_hw(dev->phydev); - phy_start_aneg(dev->phydev); + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); } } @@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { - struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(ndev->phydev, + fixed_phy_set_link_update(priv->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = priv->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((phydev->supported & PHY_BASIC_FEATURES) == + if ((priv->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = dev->phydev; + phydev = priv->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev) } } + priv->phydev = phydev; + /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(phydev); + phy_disconnect(priv->phydev); return ret; } @@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - phydev->irq = PHY_IGNORE_INTERRUPT; + priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } + priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; -- cgit v1.2.3 From 1190cfdb1a19d89561ae51cff7d9c2ead24b3ebe Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Mon, 26 Sep 2016 23:59:53 -0700 Subject: VSOCK: Don't dec ack backlog twice for rejected connections If a pending socket is marked as rejected, we will decrease the sk_ack_backlog twice. So don't decrement it for rejected sockets in vsock_pending_work(). Testing of the rejected socket path was done through code modifications. Reported-by: Stefan Hajnoczi Signed-off-by: Jorgen Hansen Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 17dbbe64cd73..8a398b3fb532 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); + + listener->sk_ack_backlog--; } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work) goto out; } - listener->sk_ack_backlog--; - /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. @@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.1.0-k"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 4b1d488a285a446329825d5fe91f987b7880e6e5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 26 Sep 2016 13:45:25 +0300 Subject: act_ife: Fix external mac header on encode On ife encode side, external mac header is copied from the original packet and may be overridden if the user requests. Before, the mac header copy was done from memory region that might not be accessible anymore, as skb_cow_head might free it and copy the packet. This led to random values in the external mac header once the values were not set by user. This fix takes the internal mac header from the packet, after the call to skb_cow_head. Fixes: ef6980b6becb ("net sched: introduce IFE action") Acked-by: Jamal Hadi Salim Signed-off-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/act_ife.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..88ac9a8c9bbd 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -740,8 +740,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - iethh = eth_hdr(skb); - err = skb_cow_head(skb, hdrm); if (unlikely(err)) { ife->tcf_qstats.drops++; @@ -752,6 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) skb_push(skb, skb->dev->hard_header_len); + iethh = (struct ethhdr *)skb->data; __skb_push(skb, hdrm); memcpy(skb->data, iethh, skb->mac_len); skb_reset_mac_header(skb); -- cgit v1.2.3 From c006da0be033b6ddcd27ee603d0ee01491236642 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 26 Sep 2016 13:45:26 +0300 Subject: act_ife: Fix false encoding On ife encode side, the action stores the different tlvs inside the ife header, where each tlv length field should refer to the length of the whole tlv (without additional padding) and not just the data length. On ife decode side, the action iterates over the tlvs in the ife header and parses them one by one, where in each iteration the current pointer is advanced according to the tlv size. Before, the encoding encoded only the data length inside the tlv, which led to false parsing of ife the header. In addition, due to the fact that the loop counter was unsigned, it could lead to infinite parsing loop. This fix changes the loop counter to be signed and fixes the encoding to take into account the tlv type and size. Fixes: 28a10c426e81 ("net sched: fix encoding to use real length") Acked-by: Jamal Hadi Salim Signed-off-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/act_ife.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 88ac9a8c9bbd..4a60cd5e1875 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | dlen; + u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -627,7 +627,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - u16 ifehdrln = ifehdr->metalen; + int ifehdrln = (int)ifehdr->metalen; struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); spin_lock(&ife->tcf_lock); -- cgit v1.2.3 From a951ed85abd4615e98e36b536e3b3b07b22a88ac Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 25 Sep 2016 23:34:48 +0300 Subject: drm/amdgpu: disable CRTCs before teardown Some code called by drm_crtc_force_disable_all() wants to wait for all fences, so only do fence teardown after CRTCs are disabled. Fixes: 84b89bdcedf8 ("drm/amdgpu: Turn off CRTCs on driver unload") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index df7ab2458e50..39c01b942ee4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1708,11 +1708,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; + drm_crtc_force_disable_all(adev->ddev); /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); - drm_crtc_force_disable_all(adev->ddev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); kfree(adev->ip_block_status); -- cgit v1.2.3 From 670bb4fd21c966d0d2a59ad4a99bb4889f9a2987 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 26 Sep 2016 15:32:50 -0400 Subject: drm/radeon/si/dpm: add workaround for for Jet parts Add clock quirks for Jet parts. Reviewed-by: Sonny Jiang Tested-by: Sonny Jiang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index e6abc09b67e3..1f78ec2548ec 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3015,6 +3015,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->pdev->device == 0x6811 && rdev->pdev->revision == 0x81) max_mclk = 120000; + /* limit sclk/mclk on Jet parts for stability */ + if (rdev->pdev->device == 0x6665 && + rdev->pdev->revision == 0xc3) { + max_sclk = 75000; + max_mclk = 80000; + } if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; -- cgit v1.2.3 From 90fd68dcf9a763f7e575c8467415bd8a66d073f4 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 23 Sep 2016 12:36:02 +0200 Subject: drm/udl: fix line iterator in damage handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The udl damage handler is supposed to render 'height' lines, but its iterator has an obvious typo that makes it miss most lines if the rectangle does not cover 0/0. Fix the damage handler to correctly render all lines. This is a fallout from: commit e375882406d0cc24030746638592004755ed4ae0 Author: Noralf Trønnes Date: Thu Apr 28 17:18:37 2016 +0200 drm/udl: Use drm_fb_helper deferred_io support Tested-by: poma Cc: stable@vger.kernel.org # 4.7+ Reviewed-by: Daniel Vetter Signed-off-by: David Herrmann Reviewed-by: Eric Engestrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 9688bfa92ccd..611b6b9bb3cb 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -122,7 +122,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, return 0; cmd = urb->transfer_buffer; - for (i = y; i < height ; i++) { + for (i = y; i < y + height ; i++) { const int line_offset = fb->base.pitches[0] * i; const int byte_offset = line_offset + (x * bpp); const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); -- cgit v1.2.3 From bdf2f59e64eb9cd9e9dd90f990a9577640470c8a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sat, 6 Aug 2016 00:31:48 -0400 Subject: sparc64: fix section mismatch in find_numa_latencies_for_group To fix: WARNING: vmlinux.o(.text.unlikely+0x580): Section mismatch in reference from the function find_numa_latencies_for_group() to the function .init.text:find_mlgroup() The function find_numa_latencies_for_group() references the function __init find_mlgroup(). This is often because find_numa_latencies_for_group lacks a __init annotation or the annotation of find_mlgroup is wrong. It turns out find_numa_latencies_for_group is only called from: static int __init numa_parse_mdesc(void) and hence we can tag find_numa_latencies_for_group with __init. In doing so we see that find_best_numa_node_for_mlgroup is only called from within __init and hence can also be marked with __init. Cc: "David S. Miller" Cc: Nitin Gupta Cc: Chris Hyser Cc: Santosh Shilimkar Cc: sparclinux@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 65457c9f1365..80031fc2e2d6 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1160,7 +1160,7 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } -static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) return i; } -static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, - int index) +static void __init find_numa_latencies_for_group(struct mdesc_handle *md, + u64 grp, int index) { u64 arc; -- cgit v1.2.3 From 1e953d846ac015fbfcf09c857e8f893924cb629c Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 31 Aug 2016 13:48:19 -0700 Subject: sparc64 mm: Fix more TSB sizing issues Commit af1b1a9b36b8 ("sparc64 mm: Fix base TSB sizing when hugetlb pages are used") addressed the difference between hugetlb and THP pages when computing TSB sizes. The following additional issues were also discovered while working with the code. In order to save memory, THP makes use of a huge zero page. This huge zero page does not count against a task's RSS, but it does consume TSB entries. This is similar to hugetlb pages. Therefore, count huge zero page entries in hugetlb_pte_count. Accounting of THP pages is done in the routine set_pmd_at(). Unfortunately, this does not catch the case where a THP page is split. To handle this case, decrement the count in pmdp_invalidate(). pmdp_invalidate is only called when splitting a THP. However, 'sanity checks' are added in case it is ever called for other purposes. A more general issue exists with HPAGE_SIZE accounting. hugetlb_pte_count tracks the number of HPAGE_SIZE (8M) pages. This value is used to size the TSB for HPAGE_SIZE pages. However, each HPAGE_SIZE page consists of two REAL_HPAGE_SIZE (4M) pages. The TSB contains an entry for each REAL_HPAGE_SIZE page. Therefore, the number of REAL_HPAGE_SIZE pages should be used to size the huge page TSB. A new compile time constant REAL_HPAGE_PER_HPAGE is used to multiply hugetlb_pte_count before sizing the TSB. Changes from V1 - Fixed build issue if hugetlb or THP not configured Signed-off-by: Mike Kravetz Signed-off-by: David S. Miller --- arch/sparc/include/asm/page_64.h | 1 + arch/sparc/mm/fault_64.c | 1 + arch/sparc/mm/tlb.c | 35 +++++++++++++++++++++++++++++++---- arch/sparc/mm/tsb.c | 18 ++++++++++++------ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e16fdd28a931..3f291d8c57f7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -484,6 +484,7 @@ good_area: tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 6725ed45580e..f2b77112e9d8 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -469,8 +469,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) -- cgit v1.2.3 From 9b2f753ec23710aa32c0d837d2499db92fe9115b Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 15 Sep 2016 14:54:40 -0600 Subject: sparc64: Fix cpu_possible_mask if nr_cpus is set If kernel boot parameter nr_cpus is set, it should define the number of CPUs that can ever be available in the system i.e. cpu_possible_mask. setup_nr_cpu_ids() overrides the nr_cpu_ids based on the cpu_possible_mask during kernel initialization. If cpu_possible_mask is not set based on the nr_cpus value, earlier part of the kernel would be initialized using nr_cpus value leading to a kernel crash. Set cpu_possible_mask based on nr_cpus value. Thus setup_nr_cpu_ids() becomes redundant and does not corrupt nr_cpu_ids value. Signed-off-by: Atish Patra Reviewed-by: Bob Picco Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_64.h | 1 + arch/sparc/kernel/setup_64.c | 1 + arch/sparc/kernel/smp_64.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 26d9e7726867..87b05751955a 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) +void smp_fill_in_cpu_possible_map(void); void smp_fill_in_sib_core_maps(void); void cpu_play_dead(void); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 599f1207eed2..f32cfe83dfd1 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -650,6 +650,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); + smp_fill_in_cpu_possible_map(); } extern int stop_a_enabled; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8a6151a628ce..d3035ba6cd31 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void) xcall_deliver_impl = hypervisor_xcall_deliver; } +void __init smp_fill_in_cpu_possible_map(void) +{ + int possible_cpus = num_possible_cpus(); + int i; + + if (possible_cpus > nr_cpu_ids) + possible_cpus = nr_cpu_ids; + + for (i = 0; i < possible_cpus; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); +} + void smp_fill_in_sib_core_maps(void) { unsigned int i; -- cgit v1.2.3 From ebb99a4c12e4daabe1940ae936e8e7e97ae68c6f Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 15 Sep 2016 14:54:41 -0600 Subject: sparc64: Fix irq stack bootmem allocation. Currently, irq stack bootmem is allocated for all possible cpus before nr_cpus value changes the list of possible cpus. As a result, there is unnecessary wastage of bootmemory. Move the irq stack bootmem allocation so that it happens after possible cpu list is modified based on nr_cpus value. Signed-off-by: Atish Patra Reviewed-by: Bob Picco Reviewed-by: Vijay Kumar Signed-off-by: David S. Miller --- arch/sparc/kernel/setup_64.c | 25 +++++++++++++++++++++++++ arch/sparc/mm/init_64.c | 16 ---------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index f32cfe83dfd1..6b7331d198e9 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,8 @@ #include #include #include +#include +#include #ifdef CONFIG_IP_PNP #include @@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } +void __init alloc_irqstack_bootmem(void) +{ + unsigned int i, node; + + for_each_possible_cpu(i) { + node = cpu_to_node(i); + + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + } +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -651,6 +670,12 @@ void __init setup_arch(char **cmdline_p) paging_init(); init_sparc64_elf_hwcap(); smp_fill_in_cpu_possible_map(); + /* + * Once the OF device tree and MDESC have been setup and nr_cpus has + * been parsed, we know the list of possible cpus. Therefore we can + * allocate the IRQ stacks. + */ + alloc_irqstack_bootmem(); } extern int stop_a_enabled; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 80031fc2e2d6..7ac6b62fb7c1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2081,7 +2081,6 @@ void __init paging_init(void) { unsigned long end_pfn, shift, phys_base; unsigned long real_end, i; - int node; setup_page_offset(); @@ -2250,21 +2249,6 @@ void __init paging_init(void) /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); - /* Once the OF device tree and MDESC have been setup, we know - * the list of possible cpus. Therefore we can allocate the - * IRQ stacks. - */ - for_each_possible_cpu(i) { - node = cpu_to_node(i); - - softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), - THREAD_SIZE, - THREAD_SIZE, 0); - } - kernel_physical_mapping_init(); { -- cgit v1.2.3 From 0a966fa8914aafeb430f9e8489227e86d8468625 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 28 Sep 2016 12:51:04 -0700 Subject: MAINTAINERS: Update my e-mail I will be starting employment at Versity next week and would like to update my MAINTAINERS e-mail to reflect that change. My versity e-mail is already activated so I shouldn't get any bounces on the new one. My ability to help with Ocfs2 kernel maintenance won't change as a result of the new job. Signed-off-by: Mark Fasheh Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 01bff8ea28d8..b003d0ca6238 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8745,7 +8745,7 @@ F: drivers/oprofile/ F: include/linux/oprofile.h ORACLE CLUSTER FILESYSTEM 2 (OCFS2) -M: Mark Fasheh +M: Mark Fasheh M: Joel Becker L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) W: http://ocfs2.wiki.kernel.org -- cgit v1.2.3 From 5b398e416e880159fe55eefd93c6588fa072cd66 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 28 Sep 2016 15:22:30 -0700 Subject: mm,ksm: fix endless looping in allocating memory when ksm enable I hit the following hung task when runing a OOM LTP test case with 4.1 kernel. Call trace: [] __switch_to+0x74/0x8c [] __schedule+0x23c/0x7bc [] schedule+0x3c/0x94 [] rwsem_down_write_failed+0x214/0x350 [] down_write+0x64/0x80 [] __ksm_exit+0x90/0x19c [] mmput+0x118/0x11c [] do_exit+0x2dc/0xa74 [] do_group_exit+0x4c/0xe4 [] get_signal+0x444/0x5e0 [] do_signal+0x1d8/0x450 [] do_notify_resume+0x70/0x78 The oom victim cannot terminate because it needs to take mmap_sem for write while the lock is held by ksmd for read which loops in the page allocator ksm_do_scan scan_get_next_rmap_item down_read get_next_rmap_item alloc_rmap_item #ksmd will loop permanently. There is no way forward because the oom victim cannot release any memory in 4.1 based kernel. Since 4.6 we have the oom reaper which would solve this problem because it would release the memory asynchronously. Nevertheless we can relax alloc_rmap_item requirements and use __GFP_NORETRY because the allocation failure is acceptable as ksm_do_scan would just retry later after the lock got dropped. Such a patch would be also easy to backport to older stable kernels which do not have oom_reaper. While we are at it add GFP_NOWARN so the admin doesn't have to be alarmed by the allocation failure. Link: http://lkml.kernel.org/r/1474165570-44398-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Suggested-by: Hugh Dickins Suggested-by: Michal Hocko Acked-by: Michal Hocko Acked-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 73d43bafd9fb..5048083b60f2 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void) { struct rmap_item *rmap_item; - rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); + rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | + __GFP_NORETRY | __GFP_NOWARN); if (rmap_item) ksm_rmap_items++; return rmap_item; -- cgit v1.2.3 From 2481366afd71a0c0b7cd725e6750c04cf589673b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 28 Sep 2016 15:22:33 -0700 Subject: dma-mapping.h: preserve unmap info for CONFIG_DMA_API_DEBUG When CONFIG_DMA_API_DEBUG is enabled we need to preserve unmapping address even if "unmap" is a no-op for our architecutre because we need debug_dma_unmap_page() to correctly cleanup all of the debug bookkeeping. Failing to do so results in a false positive warnings about previously mapped areas never being unmapped. Link: http://lkml.kernel.org/r/1474387125-3713-1-git-send-email-andrew.smirnov@gmail.com Signed-off-by: Andrey Smirnov Reviewed-by: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: Zhen Lei Cc: "Luis R. Rodriguez" Cc: Christian Borntraeger Cc: Geliang Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66533e18276c..dc69df04abc1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -718,7 +718,7 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_mmap_writecombine dma_mmap_wc #endif -#ifdef CONFIG_NEED_DMA_MAP_STATE +#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG) #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) -- cgit v1.2.3 From e436fd61a8f62cb7a16310a42b95ab076ff72eff Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 28 Sep 2016 15:22:36 -0700 Subject: scripts/recordmcount.c: account for .softirqentry.text be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections") added .softirqentry.text section, but it was not added to recordmcount. So functions in the section are untracable. Add the section to scripts/recordmcount.c and scripts/recordmcount.pl. Fixes: be7635e7287e ("arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections") Link: http://lkml.kernel.org/r/1474902626-73468-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Steve Rostedt Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/recordmcount.c | 1 + scripts/recordmcount.pl | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 42396a74405d..a68f03133df9 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -363,6 +363,7 @@ is_mcounted_section_name(char const *const txtname) strcmp(".sched.text", txtname) == 0 || strcmp(".spinlock.text", txtname) == 0 || strcmp(".irqentry.text", txtname) == 0 || + strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || strcmp(".text.unlikely", txtname) == 0; } diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 96e2486a6fc4..2d48011bc362 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -134,6 +134,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".softirqentry.text" => 1, ".kprobes.text" => 1, ".text.unlikely" => 1, ); -- cgit v1.2.3 From 231e97e2b8ec9a1556ced5d8a89cda03a480b179 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 28 Sep 2016 15:22:38 -0700 Subject: mem-hotplug: use nodes that contain memory as mask in new_node_page() 9bb627be47a5 ("mem-hotplug: don't clear the only node in new_node_page()") prevents allocating from an empty nodemask, but as David points out, it is still wrong. As node_online_map may include memoryless nodes, only allocating from these nodes is meaningless. This patch uses node_states[N_MEMORY] mask to prevent the above case. Fixes: 9bb627be47a5 ("mem-hotplug: don't clear the only node in new_node_page()") Fixes: 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") Link: http://lkml.kernel.org/r/1474447117.28370.6.camel@TP420 Signed-off-by: Li Zhong Suggested-by: David Rientjes Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: John Allen Cc: Xishi Qiu Cc: Joonsoo Kim Cc: Naoya Horiguchi Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b58906b6215c..9d29ba0f7192 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private, { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; int nid = page_to_nid(page); - nodemask_t nmask = node_online_map; - struct page *new_page; + nodemask_t nmask = node_states[N_MEMORY]; + struct page *new_page = NULL; /* * TODO: allocate a destination hugepage from a nearest neighbor node, @@ -1567,14 +1567,14 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - if (nid != next_node_in(nid, nmask)) - node_clear(nid, nmask); + node_clear(nid, nmask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, 0, + if (!nodes_empty(nmask)) + new_page = __alloc_pages_nodemask(gfp_mask, 0, node_zonelist(nid, gfp_mask), &nmask); if (!new_page) new_page = __alloc_pages(gfp_mask, 0, -- cgit v1.2.3 From 2a0100d7beebd2fb2c436852648f75396253e9f8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Sep 2016 20:40:52 -0400 Subject: sparc64: Fix non-SMP build. Need to provide a dummy smp_fill_in_cpu_possible_map. Fixes: 9b2f753ec237 ("sparc64: Fix cpu_possible_mask if nr_cpus is set") Reported-by: kbuild test robot Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_64.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 87b05751955a..ce2233f7e662 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -73,6 +73,7 @@ void __cpu_die(unsigned int cpu); #define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) +#define smp_fill_in_cpu_possible_map() do { } while (0) #endif /* !(CONFIG_SMP) */ -- cgit v1.2.3 From ba6dea4f7cedb4b1c17e36f4087675d817c2e24b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 26 Sep 2016 16:50:55 +0100 Subject: ARM: 8616/1: dt: Respect property size when parsing CPUs Whilst MPIDR values themselves are less than 32 bits, it is still perfectly valid for a DT to have #address-cells > 1 in the CPUs node, resulting in the "reg" property having leading zero cell(s). In that situation, the big-endian nature of the data conspires with the current behaviour of only reading the first cell to cause the kernel to think all CPUs have ID 0, and become resoundingly unhappy as a consequence. Take the full property length into account when parsing CPUs so as to be correct under any circumstances. Cc: Russell King Signed-off-by: Robin Murphy Signed-off-by: Russell King --- arch/arm/kernel/devtree.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 40ecd5f514a2..f676febbb270 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -88,6 +88,8 @@ void __init arm_dt_init_cpu_maps(void) return; for_each_child_of_node(cpus, cpu) { + const __be32 *cell; + int prop_bytes; u32 hwid; if (of_node_cmp(cpu->type, "cpu")) @@ -99,7 +101,8 @@ void __init arm_dt_init_cpu_maps(void) * properties is considered invalid to build the * cpu_logical_map. */ - if (of_property_read_u32(cpu, "reg", &hwid)) { + cell = of_get_property(cpu, "reg", &prop_bytes); + if (!cell || prop_bytes < sizeof(*cell)) { pr_debug(" * %s missing reg property\n", cpu->full_name); of_node_put(cpu); @@ -107,10 +110,15 @@ void __init arm_dt_init_cpu_maps(void) } /* - * 8 MSBs must be set to 0 in the DT since the reg property + * Bits n:24 must be set to 0 in the DT since the reg property * defines the MPIDR[23:0]. */ - if (hwid & ~MPIDR_HWID_BITMASK) { + do { + hwid = be32_to_cpu(*cell++); + prop_bytes -= sizeof(*cell); + } while (!hwid && prop_bytes > 0); + + if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) { of_node_put(cpu); return; } -- cgit v1.2.3 From d248220f0465b818887baa9829e691fe662b2c5e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 29 Sep 2016 08:32:55 +0100 Subject: ARM: 8617/1: dma: fix dma_max_pfn() Since commit 6ce0d2001692 ("ARM: dma: Use dma_pfn_offset for dma address translation"), dma_to_pfn() already returns the PFN with the physical memory start offset so we don't need to add it again. This fixes USB mass storage lock-up problem on systems that can't do DMA over the entire physical memory range (e.g.) Keystone 2 systems with 4GB RAM can only do DMA over the first 2GB. [K2E-EVM]. What happens there is that without this patch SCSI layer sets a wrong bounce buffer limit in scsi_calculate_bounce_limit() for the USB mass storage device. dma_max_pfn() evaluates to 0x8fffff and bounce_limit is set to 0x8fffff000 whereas maximum DMA'ble physical memory on Keystone 2 is 0x87fffffff. This results in non DMA'ble pages being given to the USB controller and hence the lock-up. NOTE: in the above case, USB-SCSI-device's dma_pfn_offset was showing as 0. This should have really been 0x780000 as on K2e, LOWMEM_START is 0x80000000 and HIGHMEM_START is 0x800000000. DMA zone is 2GB so dma_max_pfn should be 0x87ffff. The incorrect dma_pfn_offset for the USB storage device is because USB devices are not correctly inheriting the dma_pfn_offset from the USB host controller. This will be fixed by a separate patch. Fixes: 6ce0d2001692 ("ARM: dma: Use dma_pfn_offset for dma address translation") Cc: stable@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Santosh Shilimkar Cc: Arnd Bergmann Cc: Olof Johansson Cc: Catalin Marinas Cc: Linus Walleij Reported-by: Grygorii Strashko Signed-off-by: Roger Quadros Signed-off-by: Russell King --- arch/arm/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index d009f7911ffc..bf02dbd9ccda 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -111,7 +111,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) /* The ARM override for dma_max_pfn() */ static inline unsigned long dma_max_pfn(struct device *dev) { - return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask); + return dma_to_pfn(dev, *dev->dma_mask); } #define dma_max_pfn(dev) dma_max_pfn(dev) -- cgit v1.2.3 From 6ca8ac773e97e2dfa5734ae435c40e672dd19ac4 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Thu, 22 Sep 2016 11:59:47 +0100 Subject: MIPS: smp-cps: Avoid BUG() when offlining pre-r6 CPUs Commit 0d2808f338c7 ("MIPS: smp-cps: Add support for CPU hotplug of MIPSr6 processors") added a call to mips_cm_lock_other in order to lock the CPC in CPUs containing a version 3 or higher Coherence Manager, which use the general CM core other register, where previous CMs had a dedicated core other register for the CPC. A kernel BUG() is triggered, however, if mips_cm_lock_other is called with a VP other than 0 on a CPU with CM < 3, a condition introduced by 0d2808f338c7. Avoid the BUG() by always locking VP0 when locking the CPC, since the required register, cpc_stat_conf, is shared by all vps in a core. Fixes: 0d2808f338c7 ("MIPS: smp-cps: Add support for CPU hotplug...) Signed-off-by: Matt Redfearn Cc: Qais Yousef Cc: Masahiro Yamada Cc: James Hogan Cc: Paul Burton Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14297/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-cps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e9d9fc6c754c..6183ad84cc73 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -513,7 +513,7 @@ static void cps_cpu_die(unsigned int cpu) * in which case the CPC will refuse to power down the core. */ do { - mips_cm_lock_other(core, vpe_id); + mips_cm_lock_other(core, 0); mips_cpc_lock_other(core); stat = read_cpc_co_stat_conf(); stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK; -- cgit v1.2.3 From db06068a4fd44a57b642b369d2a295b8448f6b65 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:31 +0200 Subject: MIPS: fix uretprobe implementation arch_uretprobe_hijack_return_addr should replace the return address for a call with a trampoline address. Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14298/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 1149b30c9aeb..cd8a68948066 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -257,7 +257,7 @@ unsigned long arch_uretprobe_hijack_return_addr( ra = regs->regs[31]; /* Replace the return address with the trampoline address */ - regs->regs[31] = ra; + regs->regs[31] = trampoline_vaddr; return ra; } -- cgit v1.2.3 From ddabfa5c2e33f1b495f3e0176de7057850915c0b Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:32 +0200 Subject: MIPS: uprobes: remove incorrect set_orig_insn Generic kernel code implements a weak version of set_orig_insn that moves cached 'insn' from arch_uprobe to the original code location when the trap is removed. MIPS variant used arch_uprobe->orig_inst which was never initialised properly, so this code only inserted a nop instead of the original instruction. With that change orig_inst can also be safely removed. Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14299/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uprobes.h | 1 - arch/mips/kernel/uprobes.c | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h index 34c325c674c4..70a4a2f173ff 100644 --- a/arch/mips/include/asm/uprobes.h +++ b/arch/mips/include/asm/uprobes.h @@ -36,7 +36,6 @@ struct arch_uprobe { unsigned long resume_epc; u32 insn[2]; u32 ixol[2]; - union mips_instruction orig_inst[MAX_UINSN_BYTES / 4]; }; struct arch_uprobe_task { diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index cd8a68948066..3b8accb0b4f0 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -280,24 +280,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } -/** - * set_orig_insn - Restore the original instruction. - * @mm: the probed process address space. - * @auprobe: arch specific probepoint information. - * @vaddr: the virtual address to insert the opcode. - * - * For mm @mm, restore the original opcode (opcode) at @vaddr. - * Return 0 (success) or a negative errno. - * - * This overrides the weak version in kernel/events/uprobes.c. - */ -int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, - unsigned long vaddr) -{ - return uprobe_write_opcode(mm, vaddr, - *(uprobe_opcode_t *)&auprobe->orig_inst[0].word); -} - void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { -- cgit v1.2.3 From ca86c9ef2b322ebf24772009fdea037688cbdac1 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 22 Sep 2016 15:38:33 +0200 Subject: MIPS: uprobes: fix use of uninitialised variable arch_uprobe_pre_xol needs to emulate a branch if a branch instruction has been replaced with a breakpoint, but in fact an uninitialised local variable was passed to the emulator routine instead of the original instruction Signed-off-by: Marcin Nowakowski Fixes: 40e084a506eb ('MIPS: Add uprobes support.') Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14300/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/uprobes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 3b8accb0b4f0..4c7c1558944a 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -157,7 +157,6 @@ bool is_trap_insn(uprobe_opcode_t *insn) int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - union mips_instruction insn; /* * Now find the EPC where to resume after the breakpoint has been @@ -168,10 +167,10 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) unsigned long epc; epc = regs->cp0_epc; - __compute_return_epc_for_insn(regs, insn); + __compute_return_epc_for_insn(regs, + (union mips_instruction) aup->insn[0]); aup->resume_epc = regs->cp0_epc; } - utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; regs->cp0_epc = current->utask->xol_vaddr; -- cgit v1.2.3 From 0a900553715c39cfb6170ccc9846b194a4c13ceb Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Fri, 26 Aug 2016 14:02:04 -0500 Subject: MIPS: Octeon: mark GPIO controller node not populated after IRQ init. We clear the OF_POPULATED flag for the GPIO controller node on Octeon processors. Otherwise, none of the devices hanging on the GPIO lines are probed. The 'gpio-leds' driver on OCTEON failed to probe in addition to other devices on Cavium 71xx and 78xx development boards. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Signed-off-by: Steven J. Hill Tested-by: Aaro Koskinen Cc: David Daney Cc: Rob Herring Cc: linux-mips@linux-mips.org Cc: devicetree@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14091/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5a9b87b7993e..c1eb1ff7c800 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1619,6 +1619,12 @@ static int __init octeon_irq_init_gpio( return -ENOMEM; } + /* + * Clear the OF_POPULATED flag that was set by of_irq_init() + * so that all GPIO devices will be probed. + */ + of_node_clear_flag(gpio_node, OF_POPULATED); + return 0; } /* -- cgit v1.2.3 From 3021773c7c3e75e20b693931a19362681e744ea9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 20 Sep 2016 14:33:01 +0200 Subject: MIPS: DEC: Avoid la pseudo-instruction in delay slots When expanding the la or dla pseudo-instruction in a delay slot the GNU assembler will complain should the pseudo-instruction expand to multiple actual instructions, since only the first of them will be in the delay slot leading to the pseudo-instruction being only partially executed if the branch is taken. Use of PTR_LA in the dec int-handler.S leads to such warnings: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot Avoid this by open coding the PTR_LA macros. Signed-off-by: Ralf Baechle --- arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index d7b99180c6e1..1910223a9c02 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -146,7 +146,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,cpu_mask_nr_tbl + # open coded PTR_LA t1, cpu_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, cpu_mask_nr_tbl + lui t1, %hi(cpu_mask_nr_tbl) + addiu t1, %lo(cpu_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, cpu_mask_nr_tbl + .set push + .set noat + lui t1, %highest(cpu_mask_nr_tbl) + lui AT, %hi(cpu_mask_nr_tbl) + daddiu t1, t1, %higher(cpu_mask_nr_tbl) + daddiu AT, AT, %lo(cpu_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 1: lw t2,(t1) nop and t2,t0 @@ -195,7 +213,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,asic_mask_nr_tbl + # open coded PTR_LA t1,asic_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, asic_mask_nr_tbl + lui t1, %hi(asic_mask_nr_tbl) + addiu t1, %lo(asic_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, asic_mask_nr_tbl + .set push + .set noat + lui t1, %highest(asic_mask_nr_tbl) + lui AT, %hi(asic_mask_nr_tbl) + daddiu t1, t1, %higher(asic_mask_nr_tbl) + daddiu AT, AT, %lo(asic_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 2: lw t2,(t1) nop and t2,t0 -- cgit v1.2.3 From 72c70f010dfcc9ea6cc13500602a29e33748452f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:26 +0100 Subject: MIPS: Stop setting I6400 FTLBP The FTLBP field in Config7 for the I6400 is intended as chicken bits for debugging rather than as a field that software actually makes use of. For best performance, FTLBP should be left at its default value of 0 with all TLB writes hitting the FTLB by default. Additionally, since set_ftlb_enable is called from decode_configs before decode_config4 which determines the size of the TLBs, this was previously always setting FTLBP=3 for a 3:1 FTLB:VTLB write ratio which makes abysmal use of the available FTLB resources. This effectively reverts b0c4e1b79d8a ("MIPS: Set up FTLB probability for I6400"). Signed-off-by: Paul Burton Fixes: b0c4e1b79d8a ("MIPS: Set up FTLB probability for I6400") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14021/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsregs.h | 2 -- arch/mips/kernel/cpu-probe.c | 9 ++------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index def9d8d13f6e..7dd2dd47909a 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -660,8 +660,6 @@ #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) -/* FTLB probability bits for R6 */ -#define MIPS_CONF7_FTLBP_SHIFT (18) /* WatchLo* register definitions */ #define MIPS_WATCHLO_IRW (_ULCAST_(0x7) << 0) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index a88d44247cc8..ae290506873b 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -555,13 +555,8 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) write_c0_config6(config & ~MIPS_CONF6_FTLBEN); break; case CPU_I6400: - /* I6400 & related cores use Config7 to configure FTLB */ - config = read_c0_config7(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT); - write_c0_config7(config | (calculate_ftlb_probability(c) - << MIPS_CONF7_FTLBP_SHIFT)); - break; + /* There's no way to disable the FTLB */ + return !enable; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | -- cgit v1.2.3 From ebd0e0f503d0774407a63ebb5ec1a90bb54941f5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:27 +0100 Subject: MIPS: Configure FTLB after probing TLB sizes from config4 On some cores (proAptiv, P5600) we make use of the sizes of the TLBs to determine the desired FTLB:VTLB write ratio. However set_ftlb_enable & thus calculate_ftlb_probability is called before decode_config4. This results in us calculating a probability based on zero sizes, and we end up setting FTLBP=3 for a 3:1 FTLB:VTLB write ratio in all cases. This will make abysmal use of the available FTLB resources in the affected cores. Fix this by configuring the FTLB probability after having decoded config4. However we do need to have enabled the FTLB before that point such that fields in config4 actually reflect that an FTLB is present. So set_ftlb_enable is now called twice, with flags indicating that it should configure the write probability only the second time. Signed-off-by: Paul Burton Fixes: cf0a8aa0226d ("MIPS: cpu-probe: Set the FTLB probability bit on supported cores") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14022/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index ae290506873b..5069b5b1488f 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -352,7 +352,12 @@ __setup("nohtw", htw_disable); static int mips_ftlb_disabled; static int mips_has_ftlb_configured; -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable); +enum ftlb_flags { + FTLB_EN = 1 << 0, + FTLB_SET_PROB = 1 << 1, +}; + +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags); static int __init ftlb_disable(char *s) { @@ -531,7 +536,7 @@ static unsigned int calculate_ftlb_probability(struct cpuinfo_mips *c) return 3; } -static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) +static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) { unsigned int config; @@ -542,28 +547,32 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) case CPU_P6600: /* proAptiv & related cores use Config6 to enable the FTLB */ config = read_c0_config6(); - /* Clear the old probability value */ - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); - if (enable) - /* Enable FTLB */ - write_c0_config6(config | - (calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT) - | MIPS_CONF6_FTLBEN); + + if (flags & FTLB_EN) + config |= MIPS_CONF6_FTLBEN; else - /* Disable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBEN); + config &= ~MIPS_CONF6_FTLBEN; + + if (flags & FTLB_SET_PROB) { + config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config |= calculate_ftlb_probability(c) + << MIPS_CONF6_FTLBP_SHIFT; + } + + write_c0_config6(config); break; case CPU_I6400: /* There's no way to disable the FTLB */ - return !enable; + if (!(flags & FTLB_EN)) + return 1; + return 0; case CPU_LOONGSON3: /* Flush ITLB, DTLB, VTLB and FTLB */ write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB | LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB); /* Loongson-3 cores use Config6 to enable the FTLB */ config = read_c0_config6(); - if (enable) + if (flags & FTLB_EN) /* Enable FTLB */ write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); else @@ -783,6 +792,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) PAGE_SIZE, config4); /* Switch FTLB off */ set_ftlb_enable(c, 0); + mips_ftlb_disabled = 1; break; } c->tlbsizeftlbsets = 1 << @@ -847,7 +857,7 @@ static void decode_configs(struct cpuinfo_mips *c) c->scache.flags = MIPS_CACHE_NOT_PRESENT; /* Enable FTLB if present and not disabled */ - set_ftlb_enable(c, !mips_ftlb_disabled); + set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN); ok = decode_config0(c); /* Read Config registers. */ BUG_ON(!ok); /* Arch spec violation! */ @@ -897,6 +907,9 @@ static void decode_configs(struct cpuinfo_mips *c) } } + /* configure the FTLB write probability */ + set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB); + mips_probe_watch_registers(c); #ifndef CONFIG_MIPS_CPS -- cgit v1.2.3 From 67acd8d5c606cf42e6726767d705851dec9f6a34 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:18:28 +0100 Subject: MIPS: clear execution hazard after changing FTLB enable On current P-series cores from Imagination the FTLB can be enabled or disabled via a bit in the Config6 register, and an execution hazard is created by changing the value of bit. The ftlb_disable function already cleared that hazard but that does no good for other callers. Clear the hazard in the set_ftlb_enable function that creates it, and only for the cores where it applies. This has the effect of reverting c982c6d6c48b ("MIPS: cpu-probe: Remove cp0 hazard barrier when enabling the FTLB") which was incorrect. Signed-off-by: Paul Burton Fixes: c982c6d6c48b ("MIPS: cpu-probe: Remove cp0 hazard barrier when enabling the FTLB") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14023/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 5069b5b1488f..dd3175442c9e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -376,8 +376,6 @@ static int __init ftlb_disable(char *s) return 1; } - back_to_back_c0_hazard(); - config4 = read_c0_config4(); /* Check that FTLB has been disabled */ @@ -560,6 +558,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) } write_c0_config6(config); + back_to_back_c0_hazard(); break; case CPU_I6400: /* There's no way to disable the FTLB */ -- cgit v1.2.3 From 1eefcbc89cf3a8e252e5aeb25825594699b47360 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:15:40 +0100 Subject: MIPS: Fix BUILD_ROLLBACK_PROLOGUE for microMIPS When the kernel is built for microMIPS, branches targets need to be known to be microMIPS code in order to result in bit 0 of the PC being set. The branch target in the BUILD_ROLLBACK_PROLOGUE macro was simply the end of the macro, which may be pointing at padding rather than at code. This results in recent enough GNU linkers complaining like so: mips-img-linux-gnu-ld: arch/mips/built-in.o: .text+0x3e3c: Unsupported branch between ISA modes. mips-img-linux-gnu-ld: final link failed: Bad value Makefile:936: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Fix this by changing the branch target to be the start of the appropriate handler, skipping over any padding. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14019/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/genex.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 17326a90d53c..dc0b29612891 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -142,9 +142,8 @@ LEAF(__r4k_wait) PTR_LA k1, __r4k_wait ori k0, 0x1f /* 32 byte rollback region */ xori k0, 0x1f - bne k0, k1, 9f + bne k0, k1, \handler MTC0 k0, CP0_EPC -9: .set pop .endm -- cgit v1.2.3 From 305723ab439e14debc1d339aa04e835d488b8253 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 2 Sep 2016 16:07:10 +0100 Subject: MIPS: Malta: Fix IOCU disable switch read for MIPS64 Malta boards used with CPU emulators feature a switch to disable use of an IOCU. Software has to check this switch & ignore any present IOCU if the switch is closed. The read used to do this was unsafe for 64 bit kernels, as it simply casted the address 0xbf403000 to a pointer & dereferenced it. Whilst in a 32 bit kernel this would access kseg1, in a 64 bit kernel this attempts to access xuseg & results in an address error exception. Fix by accessing a correctly formed ckseg1 address generated using the CKSEG1ADDR macro. Whilst modifying this code, define the name of the register and the bit we care about within it, which indicates whether PCI DMA is routed to the IOCU or straight to DRAM. The code previously checked that bit 0 was also set, but the least significant 7 bits of the CONFIG_GEN0 register contain the value of the MReqInfo signal provided to the IOCU OCP bus, so singling out bit 0 makes little sense & that part of the check is dropped. Signed-off-by: Paul Burton Fixes: b6d92b4a6bdb ("MIPS: Add option to disable software I/O coherency.") Cc: Matt Redfearn Cc: Masahiro Yamada Cc: Kees Cook Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14187/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-setup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index ec5b21678fad..7e7364b0501e 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -39,6 +39,9 @@ #include #endif +#define ROCIT_CONFIG_GEN0 0x1f403000 +#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7) + extern void malta_be_init(void); extern int malta_be_handler(struct pt_regs *regs, int is_fixup); @@ -107,6 +110,8 @@ static void __init fd_activate(void) static int __init plat_enable_iocoherency(void) { int supported = 0; + u32 cfg; + if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) { BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN; @@ -129,7 +134,8 @@ static int __init plat_enable_iocoherency(void) } else if (mips_cm_numiocu() != 0) { /* Nothing special needs to be done to enable coherency */ pr_info("CMP IOCU detected\n"); - if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) { + cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0)); + if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) { pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n"); return 0; } -- cgit v1.2.3 From 058effe7fdc5776b017356f690976a857eea473f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 2 Sep 2016 15:17:31 +0100 Subject: MIPS: Fix detection of unsupported highmem with cache aliases The paging_init() function contains code which detects that highmem is in use but unsupported due to dcache aliasing. However this code was ineffective because it was being run before the caches are probed, meaning that cpu_has_dc_aliases would always evaluate to false (unless a platform overrides it to a compile-time constant) and the detection of the unsupported case is never triggered. The kernel would then go on to attempt to use highmem & either hit coherency issues or trigger the BUG_ON in flush_kernel_dcache_page(). Fix this by running paging_init() later than cpu_cache_init(), such that the cpu_has_dc_aliases macro will evaluate correctly & the unsupported highmem case will be detected successfully. This then leads to a formerly hidden issue in that mem_init_free_highmem() will attempt to free all highmem pages, even though we're avoiding use of them & don't have valid page structs for them. This leads to an invalid pointer dereference & a TLB exception. Avoid this by skipping the loop in mem_init_free_highmem() if cpu_has_dc_aliases evaluates true. Signed-off-by: Paul Burton Cc: Rabin Vincent Cc: Matt Redfearn Cc: Jerome Marchand Cc: Alexander Sverdlin Cc: Aurelien Jarno Cc: Jaedon Shin Cc: Toshi Kani Cc: James Hogan Cc: Sergey Ryazanov Cc: Jonas Gorski Cc: Kirill A. Shutemov Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14184/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 2 +- arch/mips/mm/init.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 3be0e6ba2797..0d57909d9026 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -764,7 +764,6 @@ static void __init arch_mem_init(char **cmdline_p) device_tree_init(); sparse_init(); plat_swiotlb_setup(); - paging_init(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); /* Tell bootmem about cma reserved memblock section */ @@ -877,6 +876,7 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); cpu_cache_init(); + paging_init(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2c3749d98f04..72f7478ee068 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -440,6 +440,9 @@ static inline void mem_init_free_highmem(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; + if (cpu_has_dc_aliases) + return; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { struct page *page = pfn_to_page(tmp); -- cgit v1.2.3 From e1bfc11c5a6f40222a698a818dc269113245820e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 28 Sep 2016 12:34:14 -0700 Subject: x86/init: Fix cr4_init_shadow() on CR4-less machines cr4_init_shadow() will panic on 486-like machines without CR4. Fix it using __read_cr4_safe(). Reported-by: david@saggiorato.net Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") Link: http://lkml.kernel.org/r/43a20f81fb504013bf613913dc25574b45336a61.1475091074.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..dee8a70382ba 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { - this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); + this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); } /* Set in this cpu's CR4. */ -- cgit v1.2.3 From 1b0ff89852d79354e8a091c81a88df21f5aa9f0a Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 29 Sep 2016 13:24:08 -0300 Subject: tg3: Avoid NULL pointer dereference in tg3_io_error_detected() While the driver is probing the adapter, an error may occur before the netdev structure is allocated and attached to pci_dev. In this case, not only netdev isn't available, but the tg3 private structure is also not available as it is just math from the NULL pointer, so dereferences must be skipped. The following trace is seen when the error is triggered: [1.402247] Unable to handle kernel paging request for data at address 0x00001a99 [1.402410] Faulting instruction address: 0xc0000000007e33f8 [1.402450] Oops: Kernel access of bad area, sig: 11 [#1] [1.402481] SMP NR_CPUS=2048 NUMA PowerNV [1.402513] Modules linked in: [1.402545] CPU: 0 PID: 651 Comm: eehd Not tainted 4.4.0-36-generic #55-Ubuntu [1.402591] task: c000001fe4e42a20 ti: c000001fe4e88000 task.ti: c000001fe4e88000 [1.402742] NIP: c0000000007e33f8 LR: c0000000007e3164 CTR: c000000000595ea0 [1.402787] REGS: c000001fe4e8b790 TRAP: 0300 Not tainted (4.4.0-36-generic) [1.402832] MSR: 9000000100009033 CR: 28000422 XER: 20000000 [1.403058] CFAR: c000000000008468 DAR: 0000000000001a99 DSISR: 42000000 SOFTE: 1 GPR00: c0000000007e3164 c000001fe4e8ba10 c0000000015c5e00 0000000000000000 GPR04: 0000000000000001 0000000000000000 0000000000000039 0000000000000299 GPR08: 0000000000000000 0000000000000001 c000001fe4e88000 0000000000000006 GPR12: 0000000000000000 c00000000fb40000 c0000000000e6558 c000003ca1bffd00 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000000d52768 GPR24: c000000000d52740 0000000000000100 c000003ca1b52000 0000000000000002 GPR28: 0000000000000900 0000000000000000 c00000000152a0c0 c000003ca1b52000 [1.404226] NIP [c0000000007e33f8] tg3_io_error_detected+0x308/0x340 [1.404265] LR [c0000000007e3164] tg3_io_error_detected+0x74/0x340 This patch avoids the NULL pointer dereference by moving the access after the netdev NULL pointer check on tg3_io_error_detected(). Also, we add a check for netdev being NULL on tg3_io_resume() [suggested by Michael Chan]. Fixes: 0486a063b1ff ("tg3: prevent ifup/ifdown during PCI error recovery") Fixes: dfc8f370316b ("net/tg3: Release IRQs on permanent error") Tested-by: Guilherme G. Piccoli Signed-off-by: Milton Miller Signed-off-by: Guilherme G. Piccoli Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a2551bcd1027..ea967df4b202 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18122,14 +18122,14 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - /* We needn't recover from permanent error */ - if (state == pci_channel_io_frozen) - tp->pcierr_recovery = true; - /* We probably don't have netdev yet */ if (!netdev || !netif_running(netdev)) goto done; + /* We needn't recover from permanent error */ + if (state == pci_channel_io_frozen) + tp->pcierr_recovery = true; + tg3_phy_stop(tp); tg3_netif_stop(tp); @@ -18226,7 +18226,7 @@ static void tg3_io_resume(struct pci_dev *pdev) rtnl_lock(); - if (!netif_running(netdev)) + if (!netdev || !netif_running(netdev)) goto done; tg3_full_lock(tp, 0); -- cgit v1.2.3 From 73dca124cdbad2d67d47d6196c08325f18447d07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:26 +0800 Subject: sctp: move sent_count to the memory hole in sctp_chunk Now pahole sctp_chunk, it has 2 memory holes: struct sctp_chunk { struct list_head list; atomic_t refcnt; /* XXX 4 bytes hole, try to pack */ ... long unsigned int prsctp_param; int sent_count; /* XXX 4 bytes hole, try to pack */ This patch is to move up sent_count to fill the 1st one and eliminate the 2nd one. It's not just another struct compaction, it also fixes the "netperf- Throughput_Mbps -37.2% regression" issue when overloading the CPU. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b10d26..4f097f538fff 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -554,6 +554,9 @@ struct sctp_chunk { atomic_t refcnt; + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* This is our link to the per-transport transmitted list. */ struct list_head transmitted_list; @@ -610,9 +613,6 @@ struct sctp_chunk { */ unsigned long prsctp_param; - /* How many times this chunk have been sent, for prsctp RTX policy */ - int sent_count; - /* Which association does this belong to? */ struct sctp_association *asoc; -- cgit v1.2.3 From 0605483f6ace1f6b63e397c819a115ddcd13af0d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:27 +0800 Subject: sctp: remove prsctp_param from sctp_chunk Now sctp uses chunk->prsctp_param to save the prsctp param for all the prsctp polices, we didn't need to introduce prsctp_param to sctp_chunk. We can just use chunk->sinfo.sinfo_timetolive for RTX and BUF polices, and reuse msg->expires_at for TTL policy, as the prsctp polices and old expires policy are mutual exclusive. This patch is to remove prsctp_param from sctp_chunk, and reuse msg's expires_at for TTL and chunk's sinfo.sinfo_timetolive for RTX and BUF polices. Note that sctp can't use chunk's sinfo.sinfo_timetolive for TTL policy, as it needs a u64 variables to save the expires_at time. This one also fixes the "netperf-Throughput_Mbps -37.2% regression" issue. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 ------- net/sctp/chunk.c | 9 +++++++-- net/sctp/outqueue.c | 4 ++-- net/sctp/sm_make_chunk.c | 15 --------------- 4 files changed, 9 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4f097f538fff..ced0df374e60 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -606,13 +606,6 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; - /* We use this field to record param for prsctp policies, - * for TTL policy, it is the time_to_drop of this chunk, - * for RTX policy, it is the max_sent_count of this chunk, - * for PRIO policy, it is the priority of this chunk. - */ - unsigned long prsctp_param; - /* Which association does this belong to? */ struct sctp_association *asoc; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e54738b81..14990e21cf55 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -179,6 +179,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg, msg->expires_at, jiffies); } + if (asoc->prsctp_enable && + SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) + msg->expires_at = + jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); + /* This is the biggest possible DATA chunk that can fit into * the packet */ @@ -349,14 +354,14 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && - time_after(jiffies, chunk->prsctp_param)) { + time_after(jiffies, chunk->msg->expires_at)) { if (chunk->sent_count) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; else chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && - chunk->sent_count > chunk->prsctp_param) { + chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 72e54a416af6..e084f35d40d2 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -383,7 +383,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->transmitted_list); @@ -418,7 +418,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..46ffecc57214 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -706,20 +706,6 @@ nodata: return retval; } -static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, - const struct sctp_sndrcvinfo *sinfo) -{ - if (!chunk->asoc->prsctp_enable) - return; - - if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) || - SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = sinfo->sinfo_timetolive; -} - /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ @@ -753,7 +739,6 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); - sctp_set_prsctp_policy(retval, sinfo); nodata: return retval; -- cgit v1.2.3 From be4947bf46cb0e7a7d089e03c61bab35f1e695ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:37:28 +0800 Subject: sctp: change to check peer prsctp_capable when using prsctp polices Now before using prsctp polices, sctp uses asoc->prsctp_enable to check if prsctp is enabled. However asoc->prsctp_enable is set only means local host support prsctp, sctp should not abandon packet if peer host doesn't enable prsctp. So this patch is to use asoc->peer.prsctp_capable to check if prsctp is enabled on both side, instead of asoc->prsctp_enable, as asoc's peer.prsctp_capable is set only when local and peer both enable prsctp. Fixes: a6c2f792873a ("sctp: implement prsctp TTL policy") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 ++-- net/sctp/outqueue.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 14990e21cf55..0a3dbec0a8fb 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -179,7 +179,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg, msg->expires_at, jiffies); } - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); @@ -340,7 +340,7 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->prsctp_enable || + if (!chunk->asoc->peer.prsctp_capable || !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { struct sctp_datamsg *msg = chunk->msg; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e084f35d40d2..107233da5cc9 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -326,7 +326,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && + if (chunk->asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -442,7 +442,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, { struct sctp_transport *transport; - if (!asoc->prsctp_enable || !asoc->sent_cnt_removable) + if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable) return; msg_len = sctp_prsctp_prune_sent(asoc, sinfo, @@ -1055,7 +1055,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(chunk); @@ -1347,7 +1347,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); -- cgit v1.2.3 From 1cceda7849809a8857fd9f26efe8846506c710e1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Sep 2016 02:55:44 +0800 Subject: sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock When sctp dumps all the ep->assocs, it needs to lock_sock first, but now it locks sock in rcu_read_lock, and lock_sock may sleep, which would break rcu_read_lock. This patch is to get and hold one sock when traversing the list. After that and get out of rcu_read_lock, lock and dump it. Then it will traverse the list again to get the next one until all sctp socks are dumped. For sctp_diag_dump_one, it fixes this issue by holding asoc and moving cb() out of rcu_read_lock in sctp_transport_lookup_process. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sctp_diag.c | 58 ++++++++++++++++++++++++++++++++++++---------------- net/sctp/socket.c | 10 ++++++--- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f3508aa75815..cef0cee182d4 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -272,28 +272,17 @@ out: return err; } -static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sock *sk, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_comm_param *commp = p; - struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); + struct sctp_association *assoc; int err = 0; - /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) - goto out; - - if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - lock_sock(sk); - if (sk != assoc->base.sk) - goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -312,7 +301,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh) < 0) { cb->args[3] = 1; - err = 2; + err = 1; goto release; } cb->args[3] = 1; @@ -321,7 +310,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { - err = 2; + err = 1; goto release; } next: @@ -333,10 +322,35 @@ next: cb->args[4] = 0; release: release_sock(sk); + sock_put(sk); return err; +} + +static int sctp_get_sock(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + sock_hold(sk); + cb->args[5] = (long)sk; + + return 1; + out: cb->args[2]++; - return err; + return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -472,10 +486,18 @@ skip: * 2 : to record the transport pos of this time's traversal * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list + * 5 : to save the sk we get from travelsing the tsp list. */ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); + +next: + cb->args[5] = 0; + sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); + + if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) + goto next; + done: cb->args[1] = cb->args[4]; cb->args[4] = 0; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a8b476..8ed2d99bde6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4469,17 +4469,21 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *paddr, void *p) { struct sctp_transport *transport; - int err = 0; + int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); if (!transport || !sctp_transport_hold(transport)) goto out; - err = cb(transport, p); + + sctp_association_hold(transport->asoc); sctp_transport_put(transport); -out: rcu_read_unlock(); + err = cb(transport, p); + sctp_association_put(transport->asoc); + +out: return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -- cgit v1.2.3 From 192d1dccbfc5b901b66527df9df80304693cf06e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 29 Sep 2016 12:48:11 -0700 Subject: x86/boot: Fix another __read_cr4() case on 486 The condition for reading CR4 was wrong: there are some CPUs with CPUID but not CR4. Rather than trying to make the condition exact, use __read_cr4_safe(). Fixes: 18bc7bd523e0 ("x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly") Reported-by: david@saggiorato.net Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Brian Gerst Link: http://lkml.kernel.org/r/8c453a61c4f44ab6ff43c29780ba04835234d2e5.1475178369.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa60f5f5a16..98c9cd6f3b5d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1137,9 +1137,7 @@ void __init setup_arch(char **cmdline_p) * auditing all the early-boot CR4 manipulation would be needed to * rule it out. */ - if (boot_cpu_data.cpuid_level >= 0) - /* A CPU has %cr4 if and only if it has CPUID. */ - mmu_cr4_features = __read_cr4(); + mmu_cr4_features = __read_cr4_safe(); memblock_set_current_limit(get_max_mapped()); -- cgit v1.2.3 From e4aad64597d7a2455a541f904365b48d607916db Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 29 Sep 2016 11:51:00 +0000 Subject: x86/vdso: Fix building on big endian host We need to call GET_LE to read hdr->e_type. Fixes: 57f90c3dfc75 ("x86/vdso: Error out if the vDSO isn't a valid DSO") Reported-by: Paul Gortmaker Signed-off-by: Segher Boessenkool Acked-by: Andy Lutomirski Cc: Stephen Rothwell Cc: linux-next@vger.kernel.org Link: http://lkml.kernel.org/r/20160929193442.GA16617@gate.crashing.org Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vdso2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 4f741192846d..3dab75f2a673 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,7 +22,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); - if (hdr->e_type != ET_DYN) + if (GET_LE(&hdr->e_type) != ET_DYN) fail("input is not a shared object\n"); /* Walk the segment table. */ -- cgit v1.2.3 From 05fb3c199bb09f5b85de56cc3ede194ac95c5e1f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 28 Sep 2016 16:06:33 -0700 Subject: x86/boot: Initialize FPU and X86_FEATURE_ALWAYS even if we don't have CPUID Otherwise arch_task_struct_size == 0 and we die. While we're at it, set X86_FEATURE_ALWAYS, too. Reported-by: David Saggiorato Tested-by: David Saggiorato Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: aaeb5c01c5b ("x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86") Link: http://lkml.kernel.org/r/8de723afbf0811071185039f9088733188b606c9.1475103911.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 809eda03c527..bcc9ccc220c9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -804,21 +804,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) - return; + if (have_cpuid_p()) { + cpu_detect(c); + get_cpu_vendor(c); + get_cpu_cap(c); - cpu_detect(c); - get_cpu_vendor(c); - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); - c->cpu_index = 0; - filter_cpuid_features(c, false); + c->cpu_index = 0; + filter_cpuid_features(c, false); - if (this_cpu->c_bsp_init) - this_cpu->c_bsp_init(c); + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); + } setup_force_cpu_cap(X86_FEATURE_ALWAYS); fpu__init_system(c); -- cgit v1.2.3 From 2fa5f04f85730d0c4f49f984b7efeb4f8d5bd1fc Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 30 Sep 2016 09:01:06 +0800 Subject: x86/entry/64: Fix context tracking state warning when load_gs_index fails This warning: WARNING: CPU: 0 PID: 3331 at arch/x86/entry/common.c:45 enter_from_user_mode+0x32/0x50 CPU: 0 PID: 3331 Comm: ldt_gdt_64 Not tainted 4.8.0-rc7+ #13 Call Trace: dump_stack+0x99/0xd0 __warn+0xd1/0xf0 warn_slowpath_null+0x1d/0x20 enter_from_user_mode+0x32/0x50 error_entry+0x6d/0xc0 ? general_protection+0x12/0x30 ? native_load_gs_index+0xd/0x20 ? do_set_thread_area+0x19c/0x1f0 SyS_set_thread_area+0x24/0x30 do_int80_syscall_32+0x7c/0x220 entry_INT80_compat+0x38/0x50 ... can be reproduced by running the GS testcase of the ldt_gdt test unit in the x86 selftests. do_int80_syscall_32() will call enter_form_user_mode() to convert context tracking state from user state to kernel state. The load_gs_index() call can fail with user gsbase, gsbase will be fixed up and proceed if this happen. However, enter_from_user_mode() will be called again in the fixed up path though it is context tracking kernel state currently. This patch fixes it by just fixing up gsbase and telling lockdep that IRQs are off once load_gs_index() failed with user gsbase. Signed-off-by: Wanpeng Li Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1475197266-3440-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d172c619c449..02fff3ebfb87 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1002,7 +1002,6 @@ ENTRY(error_entry) testb $3, CS+8(%rsp) jz .Lerror_kernelspace -.Lerror_entry_from_usermode_swapgs: /* * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. @@ -1045,7 +1044,8 @@ ENTRY(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - jmp .Lerror_entry_from_usermode_swapgs + SWAPGS + jmp .Lerror_entry_done .Lbstep_iret: /* Fix truncated RIP */ -- cgit v1.2.3 From 9a2172a8d52cf14ce44e9cadd8c9df84cf832d75 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 30 Sep 2016 13:14:28 +0100 Subject: MAINTAINERS: Switch to kernel.org email address for Javi Merino Change my email address to my kernel.org account instead of the ARM one. Signed-off-by: Javi Merino Signed-off-by: Linus Torvalds --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index de22daefd9da..1dab0a156489 100644 --- a/.mailmap +++ b/.mailmap @@ -69,6 +69,7 @@ James Bottomley James Bottomley James E Wilson James Ketrenos +Javi Merino Jean Tourrilhes Jeff Garzik diff --git a/MAINTAINERS b/MAINTAINERS index b003d0ca6238..f593300e310b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11626,7 +11626,7 @@ F: Documentation/devicetree/bindings/thermal/ THERMAL/CPU_COOLING M: Amit Daniel Kachhap M: Viresh Kumar -M: Javi Merino +M: Javi Merino L: linux-pm@vger.kernel.org S: Supported F: Documentation/thermal/cpu-cooling-api.txt -- cgit v1.2.3 From 22f2ac51b6d643666f4db093f13144f773ff3f3a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 30 Sep 2016 15:11:29 -0700 Subject: mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page() Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic #55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/20160919155822.29498-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Antonio SJ Musumeci Debugged-by: Miklos Szeredi Cc: [3.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 + mm/filemap.c | 114 +++++++++++++++++++++++++-------------------------- mm/workingset.c | 10 ++--- 3 files changed, 63 insertions(+), 63 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index b17cc4830fa6..4a529c984a3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -257,6 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_pages(node)); node->count--; } @@ -272,6 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } diff --git a/mm/filemap.c b/mm/filemap.c index 8a287dfc5372..2d0986a64f1f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,6 +110,62 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page, void **shadowp) +{ + struct radix_tree_node *node; + void **slot; + int error; + + error = __radix_tree_create(&mapping->page_tree, page->index, 0, + &node, &slot); + if (error) + return error; + if (*slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + + mapping->nrexceptional--; + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } + } + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + if (node) { + workingset_node_pages_inc(node); + /* + * Don't track node that contains actual pages. + * + * Avoid acquiring the list_lru lock if already + * untracked. The list_empty() test is safe as + * node->private_list is protected by + * mapping->tree_lock. + */ + if (!list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + } + return 0; +} + static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { @@ -561,7 +617,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) spin_lock_irqsave(&mapping->tree_lock, flags); __delete_from_page_cache(old, NULL); - error = radix_tree_insert(&mapping->page_tree, offset, new); + error = page_cache_tree_insert(mapping, new, NULL); BUG_ON(error); mapping->nrpages++; @@ -584,62 +640,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static int page_cache_tree_insert(struct address_space *mapping, - struct page *page, void **shadowp) -{ - struct radix_tree_node *node; - void **slot; - int error; - - error = __radix_tree_create(&mapping->page_tree, page->index, 0, - &node, &slot); - if (error) - return error; - if (*slot) { - void *p; - - p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); - if (!radix_tree_exceptional_entry(p)) - return -EEXIST; - - mapping->nrexceptional--; - if (!dax_mapping(mapping)) { - if (shadowp) - *shadowp = p; - if (node) - workingset_node_shadows_dec(node); - } else { - /* DAX can replace empty locked entry with a hole */ - WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); - /* DAX accounts exceptional entries as normal pages */ - if (node) - workingset_node_pages_dec(node); - /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, - false); - } - } - radix_tree_replace_slot(slot, page); - mapping->nrpages++; - if (node) { - workingset_node_pages_inc(node); - /* - * Don't track node that contains actual pages. - * - * Avoid acquiring the list_lru lock if already - * untracked. The list_empty() test is safe as - * node->private_list is protected by - * mapping->tree_lock. - */ - if (!list_empty(&node->private_list)) - list_lru_del(&workingset_shadow_nodes, - &node->private_list); - } - return 0; -} - static int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask, diff --git a/mm/workingset.c b/mm/workingset.c index 69551cfae97b..617475f529f4 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ - - BUG_ON(!node->count); - BUG_ON(node->count & RADIX_TREE_COUNT_MASK); + BUG_ON(!workingset_node_shadows(node)); + BUG_ON(workingset_node_pages(node)); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); node->slots[i] = NULL; - BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); - node->count -= 1U << RADIX_TREE_COUNT_SHIFT; + workingset_node_shadows_dec(node); BUG_ON(!mapping->nrexceptional); mapping->nrexceptional--; } } - BUG_ON(node->count); + BUG_ON(workingset_node_shadows(node)); inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); if (!__radix_tree_delete_node(&mapping->page_tree, node)) BUG(); -- cgit v1.2.3 From c33f0785bf292cf1d15f4fbe42869c63e205b21c Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Fri, 30 Sep 2016 15:11:32 -0700 Subject: ocfs2: fix deadlock on mmapped page in ocfs2_write_begin_nolock() The testcase "mmaptruncate" of ocfs2-test deadlocks occasionally. In this testcase, we create a 2*CLUSTER_SIZE file and mmap() on it; there are 2 process repeatedly performing the following operations respectively: one is doing memset(mmaped_addr + 2*CLUSTER_SIZE - 1, 'a', 1), while the another is playing ftruncate(fd, 2*CLUSTER_SIZE) and then ftruncate(fd, CLUSTER_SIZE) again and again. This is the backtrace when the deadlock happens: __wait_on_bit_lock+0x50/0xa0 __lock_page+0xb7/0xc0 ocfs2_write_begin_nolock+0x163f/0x1790 [ocfs2] ocfs2_page_mkwrite+0x1c7/0x2a0 [ocfs2] do_page_mkwrite+0x66/0xc0 handle_mm_fault+0x685/0x1350 __do_page_fault+0x1d8/0x4d0 trace_do_page_fault+0x37/0xf0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 In ocfs2_write_begin_nolock(), we first grab the pages and then allocate disk space for this write; ocfs2_try_to_free_truncate_log() will be called if -ENOSPC is returned; if we're lucky to get enough clusters, which is usually the case, we start over again. But in ocfs2_free_write_ctxt() the target page isn't unlocked, so we will deadlock when trying to grab the target page again. Also, -ENOMEM might be returned in ocfs2_grab_pages_for_write(). Another deadlock will happen in __do_page_mkwrite() if ocfs2_page_mkwrite() returns non-VM_FAULT_LOCKED, and along with a locked target page. These two errors fail on the same path, so fix them by unlocking the target page manually before ocfs2_free_write_ctxt(). Jan Kara helps me clear out the JBD2 part, and suggest the hint for root cause. Changes since v1: 1. Also put ENOMEM error case into consideration. Link: http://lkml.kernel.org/r/1474173902-32075-1-git-send-email-zren@suse.com Signed-off-by: Eric Ren Reviewed-by: He Gang Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 98d36548153d..bbb4b3e5b4ff 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1842,6 +1842,16 @@ out_commit: ocfs2_commit_trans(osb, handle); out: + /* + * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(), + * even in case of error here like ENOSPC and ENOMEM. So, we need + * to unlock the target page manually to prevent deadlocks when + * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED + * to VM code. + */ + if (wc->w_target_locked) + unlock_page(mmap_page); + ocfs2_free_write_ctxt(inode, wc); if (data_ac) { -- cgit v1.2.3 From 37aa7271d9742b574763e5ce019bde9c49aa8bfe Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 30 Sep 2016 15:11:35 -0700 Subject: include/linux/property.h: fix typo/compile error This fixes commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4"). With that commit we get the following compile error when using the PROPERTY_ENTRY_INTEGER_ARRAY macro. include/linux/property.h:201:39: error: `u32_data' undeclared (first use in this function) PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, _val_) ^ include/linux/property.h:193:17: note: in definition of macro `PROPERTY_ENTRY_INTEGER_ARRAY' { .pointer = { _type_##_data = _val_ } }, \ ^ This needs a '.' to reference the union member. It seems this was just overlooked here since it is done correctly in similar constructs in other parts of the original commit. This fix is in preparation of upcoming commits that will use this macro. Fixes: commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4") Link: http://lkml.kernel.org/r/2de3b929290d88a723ed829a3e3cbd02044714df.1475114627.git.johnyoun@synopsys.com Signed-off-by: John Youn Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/property.h b/include/linux/property.h index 3a2f9ae25c86..856e50b2140c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -190,7 +190,7 @@ struct property_entry { .length = ARRAY_SIZE(_val_) * sizeof(_type_), \ .is_array = true, \ .is_string = false, \ - { .pointer = { _type_##_data = _val_ } }, \ + { .pointer = { ._type_##_data = _val_ } }, \ } #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ -- cgit v1.2.3 From 6605d156bdfbb2502ba301bc4fbd8db696ae4b6d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Sep 2016 17:25:01 +0100 Subject: MIPS: CM: Fix mips_cm_max_vp_width for non-MT kernels on MT systems When discovering the number of VPEs per core, smp_num_siblings will be incorrect for kernels built without support for the MIPS MultiThreading (MT) ASE running on systems which implement said ASE. This leads to accesses to VPEs in secondary cores being performed incorrectly since mips_cm_vp_id calculates the wrong ID to write to the local "other" registers. Fix this by examining the number of VPEs in the core as reported by the CM. This patch presumes that the number of VPEs will be the same in each core of the system. As this path only applies to systems with CM version 2.5 or lower, and this property is true of all such known systems, this is likely to be fine but is described in a comment for good measure. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14338/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mips-cm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 58e7874e9347..4fafeefe65c2 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -458,10 +458,21 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; + uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + if (mips_cm_present()) { + /* + * We presume that all cores in the system will have the same + * number of VP(E)s, and if that ever changes then this will + * need revisiting. + */ + cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK; + return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1; + } + if (IS_ENABLED(CONFIG_SMP)) return smp_num_siblings; -- cgit v1.2.3 From 117e5e9c4cfcb7628f08de074fbfefec1bb678b7 Mon Sep 17 00:00:00 2001 From: Srinivas Ramana Date: Fri, 30 Sep 2016 15:03:31 +0100 Subject: ARM: 8618/1: decompressor: reset ttbcr fields to use TTBR0 on ARMv7 If the bootloader uses the long descriptor format and jumps to kernel decompressor code, TTBCR may not be in a right state. Before enabling the MMU, it is required to clear the TTBCR.PD0 field to use TTBR0 for translation table walks. The commit dbece45894d3a ("ARM: 7501/1: decompressor: reset ttbcr for VMSA ARMv7 cores") does the reset of TTBCR.N, but doesn't consider all the bits for the size of TTBCR.N. Clear TTBCR.PD0 field and reset all the three bits of TTBCR.N to indicate the use of TTBR0 and the correct base address width. Fixes: dbece45894d3 ("ARM: 7501/1: decompressor: reset ttbcr for VMSA ARMv7 cores") Acked-by: Robin Murphy Signed-off-by: Srinivas Ramana Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index af11c2f8f3b7..fc6d541549a2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -779,7 +779,7 @@ __armv7_mmu_cache_on: orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client bic r6, r6, #1 << 31 @ 32-bit translation system - bic r6, r6, #3 << 0 @ use only ttbr0 + bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control -- cgit v1.2.3 From c8d2bc9bc39ebea8437fd974fdbc21847bb897a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Oct 2016 16:24:33 -0700 Subject: Linux 4.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce2ddb3fec98..80b8671d5c46 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.2.3