From d1fe111fb62a1cf0446a2919f5effbb33ad0702c Mon Sep 17 00:00:00 2001 From: luofei Date: Tue, 22 Mar 2022 14:44:38 -0700 Subject: mm/hwpoison: avoid the impact of hwpoison_filter() return value on mce handler When the hwpoison page meets the filter conditions, it should not be regarded as successful memory_failure() processing for mce handler, but should return a distinct value, otherwise mce handler regards the error page has been identified and isolated, which may lead to calling set_mce_nospec() to change page attribute, etc. Here memory_failure() return -EOPNOTSUPP to indicate that the error event is filtered, mce handler should not take any action for this situation and hwpoison injector should treat as correct. Link: https://lkml.kernel.org/r/20220223082135.2769649-1-luofei@unicloud.com Signed-off-by: luofei Acked-by: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 365cd4a7f239..abf407e45467 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -555,6 +555,8 @@ static ssize_t hard_offline_page_store(struct device *dev, return -EINVAL; pfn >>= PAGE_SHIFT; ret = memory_failure(pfn, 0); + if (ret == -EOPNOTSUPP) + ret = 0; return ret ? ret : count; } -- cgit v1.2.3 From 7ea0d2d79da09d1f7d71c96a9c9bc1b5229360b5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:09 -0700 Subject: drivers/base/memory: add memory block to memory group after registration succeeded If register_memory() fails, we freed the memory block but already added the memory block to the group list, not good. Let's defer adding the block to the memory group to after registering the memory block device. We do handle it properly during unregister_memory(), but that's not called when the registration fails. Link: https://lkml.kernel.org/r/20220128144540.153902-1-david@redhat.com Fixes: 028fc57a1c36 ("drivers/base/memory: introduce "memory groups" to logically group memory blocks") Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index abf407e45467..6ee2181adc3f 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -665,14 +665,16 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->nr_vmemmap_pages = nr_vmemmap_pages; INIT_LIST_HEAD(&mem->group_next); + ret = register_memory(mem); + if (ret) + return ret; + if (group) { mem->group = group; list_add(&mem->group_next, &group->memory_blocks); } - ret = register_memory(mem); - - return ret; + return 0; } static int add_memory_block(unsigned long base_section_nr) -- cgit v1.2.3 From 2848a28b0a6052a4c8450397d2647d7d8e3f6f06 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:13 -0700 Subject: drivers/base/node: consolidate node device subsystem initialization in node_dev_init() ... and call node_dev_init() after memory_dev_init() from driver_init(), so before any of the existing arch/subsys calls. All online nodes should be known at that point: early during boot, arch code determines node and zone ranges and sets the relevant nodes online; usually this happens in setup_arch(). This is in line with memory_dev_init(), which initializes the memory device subsystem and creates all memory block devices. Similar to memory_dev_init(), panic() if anything goes wrong, we don't want to continue with such basic initialization errors. The important part is that node_dev_init() gets called after memory_dev_init() and after cpu_dev_init(), but before any of the relevant archs call register_cpu() to register the new cpu device under the node device. The latter should be the case for the current users of topology_init(). Link: https://lkml.kernel.org/r/20220203105212.30385-1-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Tested-by: Anatoly Pugachev (sparc64) Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: Oscar Salvador Cc: Mike Rapoport Cc: Catalin Marinas Cc: Will Deacon Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/init.c | 1 + drivers/base/node.c | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/init.c b/drivers/base/init.c index a9f57c22fb9e..d8d0fe687111 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -35,5 +35,6 @@ void __init driver_init(void) auxiliary_bus_init(); cpu_dev_init(); memory_dev_init(); + node_dev_init(); container_dev_init(); } diff --git a/drivers/base/node.c b/drivers/base/node.c index 87acc47e8951..a133981a12fc 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -1065,26 +1065,30 @@ static const struct attribute_group *cpu_root_attr_groups[] = { }; #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ -static int __init register_node_type(void) +void __init node_dev_init(void) { - int ret; + static struct notifier_block node_memory_callback_nb = { + .notifier_call = node_memory_callback, + .priority = NODE_CALLBACK_PRI, + }; + int ret, i; BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); - if (!ret) { - static struct notifier_block node_memory_callback_nb = { - .notifier_call = node_memory_callback, - .priority = NODE_CALLBACK_PRI, - }; - register_hotmemory_notifier(&node_memory_callback_nb); - } + if (ret) + panic("%s() failed to register subsystem: %d\n", __func__, ret); + + register_hotmemory_notifier(&node_memory_callback_nb); /* - * Note: we're not going to unregister the node class if we fail - * to register the node state class attribute files. + * Create all node devices, which will properly link the node + * to applicable memory block devices and already created cpu devices. */ - return ret; + for_each_online_node(i) { + ret = register_one_node(i); + if (ret) + panic("%s() failed to add node: %d\n", __func__, ret); + } } -postcore_initcall(register_node_type); -- cgit v1.2.3 From cc6515591b25f08ce199e9379844a964f52a27f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:28 -0700 Subject: drivers/base/node: rename link_mem_sections() to register_memory_block_under_node() Patch series "drivers/base/memory: determine and store zone for single-zone memory blocks", v2. I remember talking to Michal in the past about removing test_pages_in_a_zone(), which we use for: * verifying that a memory block we intend to offline is really only managed by a single zone. We don't support offlining of memory blocks that are managed by multiple zones (e.g., multiple nodes, DMA and DMA32) * exposing that zone to user space via /sys/devices/system/memory/memory*/valid_zones Now that I identified some more cases where test_pages_in_a_zone() might go wrong, and we received an UBSAN report (see patch #3), let's get rid of this PFN walker. So instead of detecting the zone at runtime with test_pages_in_a_zone() by scanning the memmap, let's determine and remember for each memory block if it's managed by a single zone. The stored zone can then be used for the above two cases, avoiding a manual lookup using test_pages_in_a_zone(). This avoids eventually stumbling over uninitialized memmaps in corner cases, especially when ZONE_DEVICE ranges partly fall into memory block (that are responsible for managing System RAM). Handling memory onlining is easy, because we online to exactly one zone. Handling boot memory is more tricky, because we want to avoid scanning all zones of all nodes to detect possible zones that overlap with the physical memory region of interest. Fortunately, we already have code that determines the applicable nodes for a memory block, to create sysfs links -- we'll hook into that. Patch #1 is a simple cleanup I had laying around for a longer time. Patch #2 contains the main logic to remove test_pages_in_a_zone() and further details. [1] https://lkml.kernel.org/r/20220128144540.153902-1-david@redhat.com [2] https://lkml.kernel.org/r/20220203105212.30385-1-david@redhat.com This patch (of 2): Let's adjust the stale terminology, making it match unregister_memory_block_under_nodes() and do_register_memory_block_under_node(). We're dealing with memory block devices, which span 1..X memory sections. Link: https://lkml.kernel.org/r/20220210184359.235565-1-david@redhat.com Link: https://lkml.kernel.org/r/20220210184359.235565-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oscar Salvador Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Rafael Parra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/node.c b/drivers/base/node.c index a133981a12fc..5d75341413ce 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -892,8 +892,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, - enum meminit_context context) +void register_memory_blocks_under_node(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context) { walk_memory_blocks_func_t func; -- cgit v1.2.3 From 395f6081bad49f9c54abafebab49ee23aa985bbd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:31 -0700 Subject: drivers/base/memory: determine and store zone for single-zone memory blocks test_pages_in_a_zone() is just another nasty PFN walker that can easily stumble over ZONE_DEVICE memory ranges falling into the same memory block as ordinary system RAM: the memmap of parts of these ranges might possibly be uninitialized. In fact, we observed (on an older kernel) with UBSAN: UBSAN: Undefined behaviour in ./include/linux/mm.h:1133:50 index 7 is out of range for type 'zone [5]' CPU: 121 PID: 35603 Comm: read_all Kdump: loaded Tainted: [...] Hardware name: Dell Inc. PowerEdge R7425/08V001, BIOS 1.12.2 11/15/2019 Call Trace: dump_stack+0x9a/0xf0 ubsan_epilogue+0x9/0x7a __ubsan_handle_out_of_bounds+0x13a/0x181 test_pages_in_a_zone+0x3c4/0x500 show_valid_zones+0x1fa/0x380 dev_attr_show+0x43/0xb0 sysfs_kf_seq_show+0x1c5/0x440 seq_read+0x49d/0x1190 vfs_read+0xff/0x300 ksys_read+0xb8/0x170 do_syscall_64+0xa5/0x4b0 entry_SYSCALL_64_after_hwframe+0x6a/0xdf RIP: 0033:0x7f01f4439b52 We seem to stumble over a memmap that contains a garbage zone id. While we could try inserting pfn_to_online_page() calls, it will just make memory offlining slower, because we use test_pages_in_a_zone() to make sure we're offlining pages that all belong to the same zone. Let's just get rid of this PFN walker and determine the single zone of a memory block -- if any -- for early memory blocks during boot. For memory onlining, we know the single zone already. Let's avoid any additional memmap scanning and just rely on the zone information available during boot. For memory hot(un)plug, we only really care about memory blocks that: * span a single zone (and, thereby, a single node) * are completely System RAM (IOW, no holes, no ZONE_DEVICE) If one of these conditions is not met, we reject memory offlining. Hotplugged memory blocks (starting out offline), always meet both conditions. There are three scenarios to handle: (1) Memory hot(un)plug A memory block with zone == NULL cannot be offlined, corresponding to our previous test_pages_in_a_zone() check. After successful memory onlining/offlining, we simply set the zone accordingly. * Memory onlining: set the zone we just used for onlining * Memory offlining: set zone = NULL So a hotplugged memory block starts with zone = NULL. Once memory onlining is done, we set the proper zone. (2) Boot memory with !CONFIG_NUMA We know that there is just a single pgdat, so we simply scan all zones of that pgdat for an intersection with our memory block PFN range when adding the memory block. If more than one zone intersects (e.g., DMA and DMA32 on x86 for the first memory block) we set zone = NULL and consequently mimic what test_pages_in_a_zone() used to do. (3) Boot memory with CONFIG_NUMA At the point in time we create the memory block devices during boot, we don't know yet which nodes *actually* span a memory block. While we could scan all zones of all nodes for intersections, overlapping nodes complicate the situation and scanning all nodes is possibly expensive. But that problem has already been solved by the code that sets the node of a memory block and creates the link in the sysfs -- do_register_memory_block_under_node(). So, we hook into the code that sets the node id for a memory block. If we already have a different node id set for the memory block, we know that multiple nodes *actually* have PFNs falling into our memory block: we set zone = NULL and consequently mimic what test_pages_in_a_zone() used to do. If there is no node id set, we do the same as (2) for the given node. Note that the call order in driver_init() is: -> memory_dev_init(): create memory block devices -> node_dev_init(): link memory block devices to the node and set the node id So in summary, we detect if there is a single zone responsible for this memory block and we consequently store the zone in that case in the memory block, updating it during memory onlining/offlining. Link: https://lkml.kernel.org/r/20220210184359.235565-3-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: Rafael Parra Reviewed-by: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: Rafael Parra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++--- drivers/base/node.c | 13 +++---- 2 files changed, 101 insertions(+), 13 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 6ee2181adc3f..f75e3467cb59 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -215,6 +215,7 @@ static int memory_block_online(struct memory_block *mem) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, nr_vmemmap_pages); + mem->zone = zone; return ret; } @@ -225,6 +226,9 @@ static int memory_block_offline(struct memory_block *mem) unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; int ret; + if (!mem->zone) + return -EINVAL; + /* * Unaccount before offlining, such that unpopulated zone and kthreads * can properly be torn down in offline_pages(). @@ -234,7 +238,7 @@ static int memory_block_offline(struct memory_block *mem) -nr_vmemmap_pages); ret = offline_pages(start_pfn + nr_vmemmap_pages, - nr_pages - nr_vmemmap_pages, mem->group); + nr_pages - nr_vmemmap_pages, mem->zone, mem->group); if (ret) { /* offline_pages() failed. Account back. */ if (nr_vmemmap_pages) @@ -246,6 +250,7 @@ static int memory_block_offline(struct memory_block *mem) if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); + mem->zone = NULL; return ret; } @@ -411,11 +416,10 @@ static ssize_t valid_zones_show(struct device *dev, */ if (mem->state == MEM_ONLINE) { /* - * The block contains more than one zone can not be offlined. - * This can happen e.g. for ZONE_DMA and ZONE_DMA32 + * If !mem->zone, the memory block spans multiple zones and + * cannot get offlined. */ - default_zone = test_pages_in_a_zone(start_pfn, - start_pfn + nr_pages); + default_zone = mem->zone; if (!default_zone) return sysfs_emit(buf, "%s\n", "none"); len += sysfs_emit_at(buf, len, "%s", default_zone->name); @@ -643,6 +647,82 @@ int register_memory(struct memory_block *memory) return ret; } +static struct zone *early_node_zone_for_memory_block(struct memory_block *mem, + int nid) +{ + const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); + const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; + struct zone *zone, *matching_zone = NULL; + pg_data_t *pgdat = NODE_DATA(nid); + int i; + + /* + * This logic only works for early memory, when the applicable zones + * already span the memory block. We don't expect overlapping zones on + * a single node for early memory. So if we're told that some PFNs + * of a node fall into this memory block, we can assume that all node + * zones that intersect with the memory block are actually applicable. + * No need to look at the memmap. + */ + for (i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdat->node_zones + i; + if (!populated_zone(zone)) + continue; + if (!zone_intersects(zone, start_pfn, nr_pages)) + continue; + if (!matching_zone) { + matching_zone = zone; + continue; + } + /* Spans multiple zones ... */ + matching_zone = NULL; + break; + } + return matching_zone; +} + +#ifdef CONFIG_NUMA +/** + * memory_block_add_nid() - Indicate that system RAM falling into this memory + * block device (partially) belongs to the given node. + * @mem: The memory block device. + * @nid: The node id. + * @context: The memory initialization context. + * + * Indicate that system RAM falling into this memory block (partially) belongs + * to the given node. If the context indicates ("early") that we are adding the + * node during node device subsystem initialization, this will also properly + * set/adjust mem->zone based on the zone ranges of the given node. + */ +void memory_block_add_nid(struct memory_block *mem, int nid, + enum meminit_context context) +{ + if (context == MEMINIT_EARLY && mem->nid != nid) { + /* + * For early memory we have to determine the zone when setting + * the node id and handle multiple nodes spanning a single + * memory block by indicate via zone == NULL that we're not + * dealing with a single zone. So if we're setting the node id + * the first time, determine if there is a single zone. If we're + * setting the node id a second time to a different node, + * invalidate the single detected zone. + */ + if (mem->nid == NUMA_NO_NODE) + mem->zone = early_node_zone_for_memory_block(mem, nid); + else + mem->zone = NULL; + } + + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. If we span multiple nodes (not applicable + * to hotplugged memory), zone == NULL will prohibit memory offlining + * and consequently unplug. + */ + mem->nid = nid; +} +#endif + static int init_memory_block(unsigned long block_id, unsigned long state, unsigned long nr_vmemmap_pages, struct memory_group *group) @@ -665,6 +745,17 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->nr_vmemmap_pages = nr_vmemmap_pages; INIT_LIST_HEAD(&mem->group_next); +#ifndef CONFIG_NUMA + if (state == MEM_ONLINE) + /* + * MEM_ONLINE at this point implies early memory. With NUMA, + * we'll determine the zone when setting the node id via + * memory_block_add_nid(). Memory hotplug updated the zone + * manually when memory onlining/offlining succeeds. + */ + mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE); +#endif /* CONFIG_NUMA */ + ret = register_memory(mem); if (ret) return ret; diff --git a/drivers/base/node.c b/drivers/base/node.c index 5d75341413ce..ec8bb24a5a22 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -796,15 +796,12 @@ static int __ref get_nid_for_pfn(unsigned long pfn) } static void do_register_memory_block_under_node(int nid, - struct memory_block *mem_blk) + struct memory_block *mem_blk, + enum meminit_context context) { int ret; - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. - */ - mem_blk->nid = nid; + memory_block_add_nid(mem_blk, nid, context); ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, @@ -857,7 +854,7 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk, if (page_nid != nid) continue; - do_register_memory_block_under_node(nid, mem_blk); + do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY); return 0; } /* mem section does not span the specified node */ @@ -873,7 +870,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, { int nid = *(int *)arg; - do_register_memory_block_under_node(nid, mem_blk); + do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG); return 0; } -- cgit v1.2.3 From 2aa065f7afb28aabb475cc27f24cb18c5141173d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Mar 2022 14:47:34 -0700 Subject: drivers/base/memory: clarify adding and removing of memory blocks Let's make it clearer at which places we actually add and remove memory blocks -- streamlining the terminology -- and highlight which memory block start out online and which start out as offline. * rename add_memory_block -> add_boot_memory_block * rename init_memory_block -> add_memory_block * rename unregister_memory -> remove_memory_block * rename register_memory -> __add_memory_block * add add_hotplug_memory_block * mark add_boot_memory_block with __init (suggested by Oscar) __add_memory_block() is a pure helper for add_memory_block(), remove the somewhat obvious comment. Link: https://lkml.kernel.org/r/20220221154531.11382-1-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f75e3467cb59..7222ff9b5e05 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -619,11 +619,7 @@ static const struct attribute_group *memory_memblk_attr_groups[] = { NULL, }; -/* - * register_memory - Setup a sysfs device for a memory block - */ -static -int register_memory(struct memory_block *memory) +static int __add_memory_block(struct memory_block *memory) { int ret; @@ -723,9 +719,9 @@ void memory_block_add_nid(struct memory_block *mem, int nid, } #endif -static int init_memory_block(unsigned long block_id, unsigned long state, - unsigned long nr_vmemmap_pages, - struct memory_group *group) +static int add_memory_block(unsigned long block_id, unsigned long state, + unsigned long nr_vmemmap_pages, + struct memory_group *group) { struct memory_block *mem; int ret = 0; @@ -756,7 +752,7 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE); #endif /* CONFIG_NUMA */ - ret = register_memory(mem); + ret = __add_memory_block(mem); if (ret) return ret; @@ -768,7 +764,7 @@ static int init_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int add_memory_block(unsigned long base_section_nr) +static int __init add_boot_memory_block(unsigned long base_section_nr) { int section_count = 0; unsigned long nr; @@ -780,11 +776,18 @@ static int add_memory_block(unsigned long base_section_nr) if (section_count == 0) return 0; - return init_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, 0, NULL); + return add_memory_block(memory_block_id(base_section_nr), + MEM_ONLINE, 0, NULL); +} + +static int add_hotplug_memory_block(unsigned long block_id, + unsigned long nr_vmemmap_pages, + struct memory_group *group) +{ + return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group); } -static void unregister_memory(struct memory_block *memory) +static void remove_memory_block(struct memory_block *memory) { if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) return; @@ -823,8 +826,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, return -EINVAL; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages, - group); + ret = add_hotplug_memory_block(block_id, vmemmap_pages, group); if (ret) break; } @@ -835,7 +837,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, mem = find_memory_block_by_id(block_id); if (WARN_ON_ONCE(!mem)) continue; - unregister_memory(mem); + remove_memory_block(mem); } } return ret; @@ -864,7 +866,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size) if (WARN_ON_ONCE(!mem)) continue; unregister_memory_block_under_nodes(mem); - unregister_memory(mem); + remove_memory_block(mem); } } @@ -924,7 +926,7 @@ void __init memory_dev_init(void) */ for (nr = 0; nr <= __highest_present_section_nr; nr += sections_per_block) { - ret = add_memory_block(nr); + ret = add_boot_memory_block(nr); if (ret) panic("%s() failed to add memory block: %d\n", __func__, ret); -- cgit v1.2.3