From f02be9002c480cd3ec0fcf184ad27cf531bd6ece Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 8 Nov 2022 18:34:34 +0800 Subject: block, bfq: fix null pointer dereference in bfq_bio_bfqg() Out test found a following problem in kernel 5.10, and the same problem should exist in mainline: BUG: kernel NULL pointer dereference, address: 0000000000000094 PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 7 PID: 155 Comm: kworker/7:1 Not tainted 5.10.0-01932-g19e0ace2ca1d-dirty 4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-b4 Workqueue: kthrotld blk_throtl_dispatch_work_fn RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bfq_bic_update_cgroup+0x3c/0x350 ? ioc_create_icq+0x42/0x270 bfq_init_rq+0xfd/0x1060 bfq_insert_requests+0x20f/0x1cc0 ? ioc_create_icq+0x122/0x270 blk_mq_sched_insert_requests+0x86/0x1d0 blk_mq_flush_plug_list+0x193/0x2a0 blk_flush_plug_list+0x127/0x170 blk_finish_plug+0x31/0x50 blk_throtl_dispatch_work_fn+0x151/0x190 process_one_work+0x27c/0x5f0 worker_thread+0x28b/0x6b0 ? rescuer_thread+0x590/0x590 kthread+0x153/0x1b0 ? kthread_flush_work+0x170/0x170 ret_from_fork+0x1f/0x30 Modules linked in: CR2: 0000000000000094 ---[ end trace e2e59ac014314547 ]--- RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Root cause is quite complex: 1) use bfq elevator for the test device. 2) create a cgroup CG 3) config blk throtl in CG blkg_conf_prep blkg_create 4) create a thread T1 and issue async io in CG: bio_init bio_associate_blkg ... submit_bio submit_bio_noacct blk_throtl_bio -> io is throttled // io submit is done 5) switch elevator: bfq_exit_queue blkcg_deactivate_policy list_for_each_entry(blkg, &q->blkg_list, q_node) blkg->pd[] = NULL // bfq policy is removed 5) thread t1 exist, then remove the cgroup CG: blkcg_unpin_online blkcg_destroy_blkgs blkg_destroy list_del_init(&blkg->q_node) // blkg is removed from queue list 6) switch elevator back to bfq bfq_init_queue bfq_create_group_hierarchy blkcg_activate_policy list_for_each_entry_reverse(blkg, &q->blkg_list) // blkg is removed from list, hence bfq policy is still NULL 7) throttled io is dispatched to bfq: bfq_insert_requests bfq_init_rq bfq_bic_update_cgroup bfq_bio_bfqg bfqg = blkg_to_bfqg(blkg) // bfqg is NULL because bfq policy is NULL The problem is only possible in bfq because only bfq can be deactivated and activated while queue is online, while others can only be deactivated while the device is removed. Fix the problem in bfq by checking if blkg is online before calling blkg_to_bfqg(). Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221108103434.2853269-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 144bca006463..7d624a3a3f0f 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -610,6 +610,10 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) struct bfq_group *bfqg; while (blkg) { + if (!blkg->online) { + blkg = blkg->parent; + continue; + } bfqg = blkg_to_bfqg(blkg); if (bfqg->online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); -- cgit v1.2.3 From f829230dd51974c1f4478900ed30bb77ba530b40 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 7 Nov 2022 23:39:44 +0300 Subject: block: sed-opal: kmalloc the cmd/resp buffers In accordance with [1] the DMA-able memory buffers must be cacheline-aligned otherwise the cache writing-back and invalidation performed during the mapping may cause the adjacent data being lost. It's specifically required for the DMA-noncoherent platforms [2]. Seeing the opal_dev.{cmd,resp} buffers are implicitly used for DMAs in the NVME and SCSI/SD drivers in framework of the nvme_sec_submit() and sd_sec_submit() methods respectively they must be cacheline-aligned to prevent the denoted problem. One of the option to guarantee that is to kmalloc the buffers [2]. Let's explicitly allocate them then instead of embedding into the opal_dev structure instance. Note this fix was inspired by the commit c94b7f9bab22 ("nvme-hwmon: kmalloc the NVME SMART log buffer"). [1] Documentation/core-api/dma-api.rst [2] Documentation/core-api/dma-api-howto.rst Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Signed-off-by: Serge Semin Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221107203944.31686-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Jens Axboe --- block/sed-opal.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 2c5327a0543a..9bdb833e5817 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -87,8 +87,8 @@ struct opal_dev { u64 lowest_lba; size_t pos; - u8 cmd[IO_BUFFER_LENGTH]; - u8 resp[IO_BUFFER_LENGTH]; + u8 *cmd; + u8 *resp; struct parsed_resp parsed; size_t prev_d_len; @@ -2175,6 +2175,8 @@ void free_opal_dev(struct opal_dev *dev) return; clean_opal_dev(dev); + kfree(dev->resp); + kfree(dev->cmd); kfree(dev); } EXPORT_SYMBOL(free_opal_dev); @@ -2187,6 +2189,18 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) if (!dev) return NULL; + /* + * Presumably DMA-able buffers must be cache-aligned. Kmalloc makes + * sure the allocated buffer is DMA-safe in that regard. + */ + dev->cmd = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); + if (!dev->cmd) + goto err_free_dev; + + dev->resp = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); + if (!dev->resp) + goto err_free_cmd; + INIT_LIST_HEAD(&dev->unlk_lst); mutex_init(&dev->dev_lock); dev->flags = 0; @@ -2194,11 +2208,21 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) dev->send_recv = send_recv; if (check_opal_support(dev) != 0) { pr_debug("Opal is not supported on this device\n"); - kfree(dev); - return NULL; + goto err_free_resp; } return dev; + +err_free_resp: + kfree(dev->resp); + +err_free_cmd: + kfree(dev->cmd); + +err_free_dev: + kfree(dev); + + return NULL; } EXPORT_SYMBOL(init_opal_dev); -- cgit v1.2.3 From d7ac8dca938cd60cf7bd9a89a229a173c6bcba87 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 28 Oct 2022 13:14:15 -0700 Subject: nvme: quiet user passthrough command errors The driver is spamming the kernel logs for entirely harmless errors from user space submitting unsupported commands. Just silence the errors. The application has direct access to command status, so there's no need to log these. And since every passthrough command now uses the quiet flag, move the setting to the common initializer. Signed-off-by: Keith Busch Reviewed-by: Alan Adamson Reviewed-by: Jens Axboe Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Daniel Wagner Tested-by: Alan Adamson Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 +-- drivers/nvme/host/pci.c | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc4220600585..da55ce45ac70 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -675,6 +675,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) if (req->mq_hctx->type == HCTX_TYPE_POLL) req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); + req->rq_flags |= RQF_QUIET; memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } EXPORT_SYMBOL_GPL(nvme_init_request); @@ -1037,7 +1038,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - req->rq_flags |= RQF_QUIET; ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; @@ -1227,7 +1227,6 @@ static void nvme_keep_alive_work(struct work_struct *work) rq->timeout = ctrl->kato * HZ; rq->end_io = nvme_keep_alive_end_io; rq->end_io_data = ctrl; - rq->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(rq, false); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 31e577b01257..02b5578773a1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1436,7 +1436,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) abort_req->end_io = abort_endio; abort_req->end_io_data = NULL; - abort_req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(abort_req, false); /* @@ -2490,7 +2489,6 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); - req->rq_flags |= RQF_QUIET; blk_execute_rq_nowait(req, false); return 0; } -- cgit v1.2.3 From becc4cac309dc867571f0080fde4426a6c2222e0 Mon Sep 17 00:00:00 2001 From: Aleksandr Miloserdov Date: Wed, 26 Oct 2022 12:31:33 +0400 Subject: nvmet: fix memory leak in nvmet_subsys_attr_model_store_locked Since model_number is allocated before it needs to be freed before kmemdump_nul. Reviewed-by: Konstantin Shelekhin Reviewed-by: Dmitriy Bogdanov Signed-off-by: Aleksandr Miloserdov Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 9443ee1d4ae3..d0b9eea15ff8 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1215,6 +1215,7 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { int pos = 0, len; + char *val; if (subsys->subsys_discovered) { pr_err("Can't set model number. %s is already assigned\n", @@ -1237,9 +1238,11 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, return -EINVAL; } - subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL); - if (!subsys->model_number) + val = kmemdup_nul(page, len, GFP_KERNEL); + if (!val) return -ENOMEM; + kfree(subsys->model_number); + subsys->model_number = val; return count; } -- cgit v1.2.3 From e65fdf530f55c5e387db14470a59a399faa29613 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 9 Nov 2022 05:29:57 +0200 Subject: nvmet: fix a memory leak We need to also free the dhchap_ctrl_secret when releasing nvmet_host. kmemleak complaint: -- unreferenced object 0xffff99b1cbca5140 (size 64): comm "check", pid 4864, jiffies 4305092436 (age 2913.583s) hex dump (first 32 bytes): 44 48 48 43 2d 31 3a 30 30 3a 65 36 2b 41 63 44 DHHC-1:00:e6+AcD 39 76 47 4d 52 57 59 78 67 54 47 44 51 59 47 78 9vGMRWYxgTGDQYGx backtrace: [<00000000c07d369d>] kstrdup+0x2e/0x60 [<000000001372171c>] 0xffffffffc0cceec6 [<0000000010dbf50b>] 0xffffffffc0cc6783 [<000000007465e93c>] configfs_write_iter+0xb1/0x120 [<0000000039c23f62>] vfs_write+0x2be/0x3c0 [<000000002da4351c>] ksys_write+0x5f/0xe0 [<00000000d5011e32>] do_syscall_64+0x38/0x90 [<00000000503870cf>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d0b9eea15ff8..6a2816f3b4e8 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1839,6 +1839,7 @@ static void nvmet_host_release(struct config_item *item) #ifdef CONFIG_NVME_TARGET_AUTH kfree(host->dhchap_secret); + kfree(host->dhchap_ctrl_secret); #endif kfree(host); } -- cgit v1.2.3