Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c394
1 files changed, 165 insertions, 229 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..dcb47074ae03 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
#include <trace/events/vmscan.h>
struct scan_control {
- /* Incremented by the number of inactive pages that were scanned */
- unsigned long nr_scanned;
-
- /* Number of pages freed so far during a call to shrink_zones() */
- unsigned long nr_reclaimed;
-
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
- unsigned long hibernation_mode;
-
/* This context's GFP mask */
gfp_t gfp_mask;
- int may_writepage;
-
- /* Can mapped pages be reclaimed? */
- int may_unmap;
-
- /* Can pages be swapped as part of reclaim? */
- int may_swap;
-
+ /* Allocation order */
int order;
- /* Scan (total_size >> priority) pages at once */
- int priority;
-
- /* anon vs. file LRUs scanning "ratio" */
- int swappiness;
+ /*
+ * Nodemask of nodes allowed by the caller. If NULL, all nodes
+ * are scanned.
+ */
+ nodemask_t *nodemask;
/*
* The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
*/
struct mem_cgroup *target_mem_cgroup;
- /*
- * Nodemask of nodes allowed by the caller. If NULL, all nodes
- * are scanned.
- */
- nodemask_t *nodemask;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
+
+ unsigned int may_writepage:1;
+
+ /* Can mapped pages be reclaimed? */
+ unsigned int may_unmap:1;
+
+ /* Can pages be swapped as part of reclaim? */
+ unsigned int may_swap:1;
+
+ unsigned int hibernation_mode:1;
+
+ /* One of the zones is ready for compaction */
+ unsigned int compaction_ready:1;
+
+ /* Incremented by the number of inactive pages that were scanned */
+ unsigned long nr_scanned;
+
+ /* Number of pages freed so far during a call to shrink_zones() */
+ unsigned long nr_reclaimed;
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
-unsigned long vm_total_pages; /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
bool zone_reclaimable(struct zone *zone)
{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+ return zone_page_state(zone, NR_PAGES_SCANNED) <
+ zone_reclaimable_pages(zone) * 6;
}
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -571,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
+ mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
spin_unlock_irq(&mapping->tree_lock);
- swapcache_free(swap, page);
+ swapcache_free(swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@@ -594,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
@@ -816,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
cond_resched();
- mem_cgroup_uncharge_start();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
@@ -915,7 +920,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
/* Case 1 above */
if (current_is_kswapd() &&
PageReclaim(page) &&
- zone_is_reclaim_writeback(zone)) {
+ test_bit(ZONE_WRITEBACK, &zone->flags)) {
nr_immediate++;
goto keep_locked;
@@ -997,7 +1002,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
if (page_is_file_cache(page) &&
(!current_is_kswapd() ||
- !zone_is_reclaim_dirty(zone))) {
+ !test_bit(ZONE_DIRTY, &zone->flags))) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
@@ -1127,11 +1132,12 @@ keep:
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
+ mem_cgroup_uncharge_list(&free_pages);
free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- mem_cgroup_uncharge_end();
+
*ret_nr_dirty += nr_dirty;
*ret_nr_congested += nr_congested;
*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1431,6 +1437,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1503,7 +1510,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
if (global_reclaim(sc)) {
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
@@ -1538,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&page_list);
free_hot_cold_page_list(&page_list, true);
/*
@@ -1555,7 +1563,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* are encountered in the nr_immediate check below.
*/
if (nr_writeback && nr_writeback == nr_taken)
- zone_set_flag(zone, ZONE_WRITEBACK);
+ set_bit(ZONE_WRITEBACK, &zone->flags);
/*
* memcg will stall in page writeback so only consider forcibly
@@ -1567,16 +1575,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* backed by a congested BDI and wait_iff_congested will stall.
*/
if (nr_dirty && nr_dirty == nr_congested)
- zone_set_flag(zone, ZONE_CONGESTED);
+ set_bit(ZONE_CONGESTED, &zone->flags);
/*
* If dirty pages are scanned that are not queued for IO, it
* implies that flushers are not keeping up. In this case, flag
- * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing
- * pages from reclaim context.
+ * the zone ZONE_DIRTY and kswapd will start writing pages from
+ * reclaim context.
*/
if (nr_unqueued_dirty == nr_taken)
- zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
+ set_bit(ZONE_DIRTY, &zone->flags);
/*
* If kswapd scans pages marked marked for immediate
@@ -1652,6 +1660,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1693,7 +1702,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1750,7 +1759,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
* Count referenced pages from currently used mappings as rotated,
* even though only some of them are actually re-activated. This
* helps balance scan pressure between file and anonymous pages in
- * get_scan_ratio.
+ * get_scan_count.
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
@@ -1759,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
}
@@ -1865,8 +1875,8 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
- unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc, unsigned long *nr)
{
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
@@ -1909,7 +1919,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !sc->swappiness) {
+ if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1919,16 +1929,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && sc->swappiness) {
+ if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
- anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
- get_lru_size(lruvec, LRU_INACTIVE_ANON);
- file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
- get_lru_size(lruvec, LRU_INACTIVE_FILE);
-
/*
* Prevent the reclaimer from falling into the cache trap: as
* cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1944,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* anon pages. Try to detect this based on file LRU size.
*/
if (global_reclaim(sc)) {
- unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+ unsigned long zonefile;
+ unsigned long zonefree;
- if (unlikely(file + free <= high_wmark_pages(zone))) {
+ zonefree = zone_page_state(zone, NR_FREE_PAGES);
+ zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+
+ if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
scan_balance = SCAN_ANON;
goto out;
}
@@ -1962,7 +1972,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = sc->swappiness;
+ anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
@@ -1976,6 +1986,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
+
+ anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+ get_lru_size(lruvec, LRU_INACTIVE_ANON);
+ file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+ get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
spin_lock_irq(&zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2068,8 @@ out:
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2080,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, sc, nr);
+ get_scan_count(lruvec, swappiness, sc, nr);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2258,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
+ bool reclaimable = false;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2277,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct lruvec *lruvec;
+ int swappiness;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ swappiness = mem_cgroup_swappiness(memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc);
+ shrink_lruvec(lruvec, swappiness, sc);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2306,24 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
+ if (sc->nr_reclaimed - nr_reclaimed)
+ reclaimable = true;
+
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return reclaimable;
}
-/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+/*
+ * Returns true if compaction should go ahead for a high-order request, or
+ * the high-order allocation would succeed without compaction.
+ */
+static inline bool compaction_ready(struct zone *zone, int order)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
/*
* Compaction takes time to run and there are potentially other
* callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2332,21 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
*/
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, sc->order))
+ if (compaction_deferred(zone, order))
return watermark_ok;
- /* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
+ /*
+ * If compaction is not ready to start and allocation is not likely
+ * to succeed without it, then keep reclaiming.
+ */
+ if (compaction_suitable(zone, order) == COMPACT_SKIPPED)
return false;
return watermark_ok;
@@ -2342,10 +2368,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
*/
static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
@@ -2354,13 +2377,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long lru_pages = 0;
- bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+ bool reclaimable = false;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2414,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (sc->priority != DEF_PRIORITY &&
!zone_reclaimable(zone))
continue; /* Let kswapd poll it */
- if (IS_ENABLED(CONFIG_COMPACTION)) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticeable problem, like transparent huge
- * page allocations.
- */
- if ((zonelist_zone_idx(z) <= requested_highidx)
- && compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
+
+ /*
+ * If we already have plenty of memory free for
+ * compaction in this zone, don't free any more.
+ * Even though compaction is invoked for any
+ * non-zero order, only frequent costly order
+ * reclamation is disruptive enough to become a
+ * noticeable problem, like transparent huge
+ * page allocations.
+ */
+ if (IS_ENABLED(CONFIG_COMPACTION) &&
+ sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ zonelist_zone_idx(z) <= requested_highidx &&
+ compaction_ready(zone, sc->order)) {
+ sc->compaction_ready = true;
+ continue;
}
+
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
@@ -2419,10 +2444,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
+ if (nr_soft_reclaimed)
+ reclaimable = true;
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(zone, sc);
+ if (shrink_zone(zone, sc))
+ reclaimable = true;
+
+ if (global_reclaim(sc) &&
+ !reclaimable && zone_reclaimable(zone))
+ reclaimable = true;
}
/*
@@ -2445,27 +2477,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
*/
sc->gfp_mask = orig_mask;
- return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
- struct scan_control *sc)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(sc->gfp_mask), sc->nodemask) {
- if (!populated_zone(zone))
- continue;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- continue;
- if (zone_reclaimable(zone))
- return false;
- }
-
- return true;
+ return reclaimable;
}
/*
@@ -2489,7 +2501,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
{
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ bool zones_reclaimable;
delayacct_freepages_start();
@@ -2500,11 +2512,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(zonelist, sc);
+ zones_reclaimable = shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
- goto out;
+ break;
+
+ if (sc->compaction_ready)
+ break;
/*
* If we're getting trouble reclaiming, start doing
@@ -2526,28 +2541,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
WB_REASON_TRY_TO_FREE_PAGES);
sc->may_writepage = 1;
}
- } while (--sc->priority >= 0 && !aborted_reclaim);
+ } while (--sc->priority >= 0);
-out:
delayacct_freepages_end();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
- /*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
- * check.
- */
- if (oom_killer_disabled)
- return 0;
-
/* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
+ if (sc->compaction_ready)
return 1;
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+ /* Any of the zones still reclaimable? Don't OOM. */
+ if (zones_reclaimable)
return 1;
return 0;
@@ -2684,15 +2690,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
{
unsigned long nr_reclaimed;
struct scan_control sc = {
+ .nr_to_reclaim = SWAP_CLUSTER_MAX,
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+ .order = order,
+ .nodemask = nodemask,
+ .priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
- .nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_unmap = 1,
.may_swap = 1,
- .order = order,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = NULL,
- .nodemask = nodemask,
};
/*
@@ -2722,17 +2727,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
unsigned long *nr_scanned)
{
struct scan_control sc = {
- .nr_scanned = 0,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
+ .target_mem_cgroup = memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = !noswap,
- .order = 0,
- .priority = 0,
- .swappiness = mem_cgroup_swappiness(memcg),
- .target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ int swappiness = mem_cgroup_swappiness(memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2750,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, &sc);
+ shrink_lruvec(lruvec, swappiness, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2757,23 +2759,22 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
}
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+ unsigned long nr_pages,
gfp_t gfp_mask,
- bool noswap)
+ bool may_swap)
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
int nid;
struct scan_control sc = {
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- .may_swap = !noswap,
- .nr_to_reclaim = SWAP_CLUSTER_MAX,
- .order = 0,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = memcg,
- .nodemask = NULL, /* we don't care the placement */
+ .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ .target_mem_cgroup = memcg,
+ .priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = may_swap,
};
/*
@@ -2824,7 +2825,7 @@ static bool zone_balanced(struct zone *zone, int order,
return false;
if (IS_ENABLED(CONFIG_COMPACTION) && order &&
- !compaction_suitable(zone, order))
+ compaction_suitable(zone, order) == COMPACT_SKIPPED)
return false;
return true;
@@ -2984,7 +2985,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
/* Account for the number of pages attempted to reclaim */
*nr_attempted += sc->nr_to_reclaim;
- zone_clear_flag(zone, ZONE_WRITEBACK);
+ clear_bit(ZONE_WRITEBACK, &zone->flags);
/*
* If a zone reaches its high watermark, consider it to be no longer
@@ -2994,8 +2995,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
*/
if (zone_reclaimable(zone) &&
zone_balanced(zone, testorder, 0, classzone_idx)) {
- zone_clear_flag(zone, ZONE_CONGESTED);
- zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+ clear_bit(ZONE_CONGESTED, &zone->flags);
+ clear_bit(ZONE_DIRTY, &zone->flags);
}
return sc->nr_scanned >= sc->nr_to_reclaim;
@@ -3031,12 +3032,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
+ .order = order,
.priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
- .may_writepage = !laptop_mode,
- .order = order,
- .target_mem_cgroup = NULL,
};
count_vm_event(PAGEOUTRUN);
@@ -3087,8 +3087,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
* If balanced, clear the dirty and congested
* flags
*/
- zone_clear_flag(zone, ZONE_CONGESTED);
- zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+ clear_bit(ZONE_CONGESTED, &zone->flags);
+ clear_bit(ZONE_DIRTY, &zone->flags);
}
}
@@ -3417,14 +3417,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
.gfp_mask = GFP_HIGHUSER_MOVABLE,
- .may_swap = 1,
- .may_unmap = 1,
+ .priority = DEF_PRIORITY,
.may_writepage = 1,
- .nr_to_reclaim = nr_to_reclaim,
+ .may_unmap = 1,
+ .may_swap = 1,
.hibernation_mode = 1,
- .order = 0,
- .priority = DEF_PRIORITY,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
@@ -3604,13 +3603,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
struct task_struct *p = current;
struct reclaim_state reclaim_state;
struct scan_control sc = {
- .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
- .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
- .may_swap = 1,
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
.priority = ZONE_RECLAIM_PRIORITY,
+ .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+ .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+ .may_swap = 1,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -3716,11 +3715,11 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
if (node_state(node_id, N_CPU) && node_id != numa_node_id())
return ZONE_RECLAIM_NOSCAN;
- if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
+ if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags))
return ZONE_RECLAIM_NOSCAN;
ret = __zone_reclaim(zone, gfp_mask, order);
- zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+ clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
@@ -3799,66 +3798,3 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
}
}
#endif /* CONFIG_SHMEM */
-
-static void warn_scan_unevictable_pages(void)
-{
- printk_once(KERN_WARNING
- "%s: The scan_unevictable_pages sysctl/node-interface has been "
- "disabled for lack of a legitimate use case. If you have "
- "one, please send an email to linux-mm@kvack.org.\n",
- current->comm);
-}
-
-/*
- * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of
- * all nodes' unevictable lists for evictable pages
- */
-unsigned long scan_unevictable_pages;
-
-int scan_unevictable_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *length, loff_t *ppos)
-{
- warn_scan_unevictable_pages();
- proc_doulongvec_minmax(table, write, buffer, length, ppos);
- scan_unevictable_pages = 0;
- return 0;
-}
-
-#ifdef CONFIG_NUMA
-/*
- * per node 'scan_unevictable_pages' attribute. On demand re-scan of
- * a specified node's per zone unevictable lists for evictable pages.
- */
-
-static ssize_t read_scan_unevictable_node(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- warn_scan_unevictable_pages();
- return sprintf(buf, "0\n"); /* always zero; should fit... */
-}
-
-static ssize_t write_scan_unevictable_node(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- warn_scan_unevictable_pages();
- return 1;
-}
-
-
-static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
- read_scan_unevictable_node,
- write_scan_unevictable_node);
-
-int scan_unevictable_register_node(struct node *node)
-{
- return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-
-void scan_unevictable_unregister_node(struct node *node)
-{
- device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-#endif