From 6bf74cddcffac0bc5ee0fad724aac778d2e53f75 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:45:53 -0400 Subject: filemap: Don't release a locked folio We must hold a reference over the call to filemap_release_folio(), otherwise the page cache will put the last reference to the folio before we unlock it, leading to splats like this: BUG: Bad page state in process u8:5 pfn:1ab1f4 page:ffffea0006ac7d00 refcount:0 mapcount:0 mapping:0000000000000000 index:0x28b1de pfn:0x1ab1f4 flags: 0x17ff80000040001(locked|reclaim|node=0|zone=2|lastcpupid=0xfff) raw: 017ff80000040001 dead000000000100 dead000000000122 0000000000000000 raw: 000000000028b1de 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set It's an error path, so it doesn't see much testing. Reported-by: Darrick J. Wong Fixes: a42634a6c07d ("readahead: Use a folio in read_pages()") Signed-off-by: Matthew Wilcox (Oracle) --- mm/readahead.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/readahead.c b/mm/readahead.c index 415c39d764ea..57a015108254 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -164,12 +164,14 @@ static void read_pages(struct readahead_control *rac) while ((folio = readahead_folio(rac)) != NULL) { unsigned long nr = folio_nr_pages(folio); + folio_get(folio); rac->ra->size -= nr; if (rac->ra->async_size >= nr) { rac->ra->async_size -= nr; filemap_remove_folio(folio); } folio_unlock(folio); + folio_put(folio); } } else { while ((folio = readahead_folio(rac)) != NULL) -- cgit v1.2.3 From dcfa24ba68991ab69a48254a18377b45180ae664 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 25 May 2022 14:23:45 -0400 Subject: filemap: Cache the value of vm_flags After we have unlocked the mmap_lock for I/O, the file is pinned, but the VMA is not. Checking this flag after that can be a use-after-free. It's not a terribly interesting use-after-free as it can only read one bit, and it's used to decide whether to read 2MB or 4MB. But it upsets the automated tools and it's generally bad practice anyway, so let's fix it. Reported-by: syzbot+5b96d55e5b54924c77ad@syzkaller.appspotmail.com Fixes: 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 9daeaab36081..ac3775c1ce4c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2991,11 +2991,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; + unsigned long vm_flags = vmf->vma->vm_flags; unsigned int mmap_miss; #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if (vmf->vma->vm_flags & VM_HUGEPAGE) { + if (vm_flags & VM_HUGEPAGE) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3003,7 +3004,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) * Fetch two PMD folios, so we get the chance to actually * readahead, unless we've been told not to. */ - if (!(vmf->vma->vm_flags & VM_RAND_READ)) + if (!(vm_flags & VM_RAND_READ)) ra->size *= 2; ra->async_size = HPAGE_PMD_NR; page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER); @@ -3012,12 +3013,12 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) #endif /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vm_flags & VM_RAND_READ) return fpin; if (!ra->ra_pages) return fpin; - if (vmf->vma->vm_flags & VM_SEQ_READ) { + if (vm_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_sync_ra(&ractl, ra->ra_pages); return fpin; -- cgit v1.2.3 From 69a37a8ba1b408a1c7616494aa7018e4b3844cbe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 8 Jun 2022 15:18:34 -0400 Subject: mm/huge_memory: Fix xarray node memory leak If xas_split_alloc() fails to allocate the necessary nodes to complete the xarray entry split, it sets the xa_state to -ENOMEM, which xas_nomem() then interprets as "Please allocate more memory", not as "Please free any unnecessary memory" (which was the intended outcome). It's confusing to use xas_nomem() to free memory in this context, so call xas_destroy() instead. Reported-by: syzbot+9e27a75a8c24f3fe75c1@syzkaller.appspotmail.com Fixes: 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 1 + lib/xarray.c | 5 +++-- mm/huge_memory.c | 3 +-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 72feab5ea8d4..c29e11b2c073 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1508,6 +1508,7 @@ void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t); void xas_init_marks(const struct xa_state *); bool xas_nomem(struct xa_state *, gfp_t); +void xas_destroy(struct xa_state *); void xas_pause(struct xa_state *); void xas_create_range(struct xa_state *); diff --git a/lib/xarray.c b/lib/xarray.c index 54e646e8e6ee..ea9ce1f0b386 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -264,9 +264,10 @@ static void xa_node_free(struct xa_node *node) * xas_destroy() - Free any resources allocated during the XArray operation. * @xas: XArray operation state. * - * This function is now internal-only. + * Most users will not need to call this function; it is called for you + * by xas_nomem(). */ -static void xas_destroy(struct xa_state *xas) +void xas_destroy(struct xa_state *xas) { struct xa_node *next, *node = xas->xa_alloc; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a77c78a2b6b5..f7248002dad9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2672,8 +2672,7 @@ out_unlock: if (mapping) i_mmap_unlock_read(mapping); out: - /* Free any memory we didn't use */ - xas_nomem(&xas, 0); + xas_destroy(&xas); count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); return ret; } -- cgit v1.2.3 From 334f6f53abcf57782bd2fe81da1cbd893e4ef05c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Jun 2022 09:13:57 -0400 Subject: mm: Add kernel-doc for folio->mlock_count Fix "./include/linux/mm_types.h:279: warning: Function parameter or member 'mlock_count' not described in 'folio'". Also neaten the html by hiding the anon struct. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..c29ab4c0cd5c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -227,6 +227,7 @@ struct page { * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mlock_count: Number of times this folio has been pinned by mlock(). * @mapping: The file this page belongs to, or refers to the anon_vma for * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, @@ -255,10 +256,14 @@ struct folio { unsigned long flags; union { struct list_head lru; + /* private: avoid cluttering the output */ struct { void *__filler; + /* public: */ unsigned int mlock_count; + /* private: */ }; + /* public: */ }; struct address_space *mapping; pgoff_t index; -- cgit v1.2.3