summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c840
1 files changed, 475 insertions, 365 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f54a05b7a61d..c52b23552659 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
-#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
@@ -103,8 +102,11 @@ struct scan_control {
*/
reclaim_mode_t reclaim_mode;
- /* Which cgroup do we reclaim from */
- struct mem_cgroup *mem_cgroup;
+ /*
+ * The memory cgroup that hit its limit and as a result is the
+ * primary target of this reclaim invocation.
+ */
+ struct mem_cgroup *target_mem_cgroup;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -113,6 +115,11 @@ struct scan_control {
nodemask_t *nodemask;
};
+struct mem_cgroup_zone {
+ struct mem_cgroup *mem_cgroup;
+ struct zone *zone;
+};
+
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
@@ -153,28 +160,45 @@ static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
+static bool global_reclaim(struct scan_control *sc)
+{
+ return !sc->target_mem_cgroup;
+}
+
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
+{
+ return !mz->mem_cgroup;
+}
#else
-#define scanning_global_lru(sc) (1)
+static bool global_reclaim(struct scan_control *sc)
+{
+ return true;
+}
+
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
+{
+ return true;
+}
#endif
-static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
- struct scan_control *sc)
+static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
{
- if (!scanning_global_lru(sc))
- return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
- return &zone->reclaim_stat;
+ return &mz->zone->reclaim_stat;
}
-static unsigned long zone_nr_lru_pages(struct zone *zone,
- struct scan_control *sc, enum lru_list lru)
+static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
+ enum lru_list lru)
{
- if (!scanning_global_lru(sc))
- return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup,
- zone_to_nid(zone), zone_idx(zone), BIT(lru));
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
+ zone_to_nid(mz->zone),
+ zone_idx(mz->zone),
+ BIT(lru));
- return zone_page_state(zone, NR_LRU_BASE + lru);
+ return zone_page_state(mz->zone, NR_LRU_BASE + lru);
}
@@ -636,7 +660,7 @@ redo:
* When racing with an mlock or AS_UNEVICTABLE clearing
* (page is unlocked) make sure that if the other thread
* does not observe our setting of PG_lru and fails
- * isolation/check_move_unevictable_page,
+ * isolation/check_move_unevictable_pages,
* we see PG_mlocked/AS_UNEVICTABLE cleared below and move
* the page back to the evictable list.
*
@@ -677,12 +701,13 @@ enum page_references {
};
static enum page_references page_check_references(struct page *page,
+ struct mem_cgroup_zone *mz,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
unsigned long vm_flags;
- referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
+ referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
referenced_page = TestClearPageReferenced(page);
/* Lumpy reclaim - ignore references */
@@ -715,7 +740,13 @@ static enum page_references page_check_references(struct page *page,
*/
SetPageReferenced(page);
- if (referenced_page)
+ if (referenced_page || referenced_ptes > 1)
+ return PAGEREF_ACTIVATE;
+
+ /*
+ * Activate file-backed executable pages after first usage.
+ */
+ if (vm_flags & VM_EXEC)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
@@ -728,29 +759,11 @@ static enum page_references page_check_references(struct page *page,
return PAGEREF_RECLAIM;
}
-static noinline_for_stack void free_page_list(struct list_head *free_pages)
-{
- struct pagevec freed_pvec;
- struct page *page, *tmp;
-
- pagevec_init(&freed_pvec, 1);
-
- list_for_each_entry_safe(page, tmp, free_pages, lru) {
- list_del(&page->lru);
- if (!pagevec_add(&freed_pvec, page)) {
- __pagevec_free(&freed_pvec);
- pagevec_reinit(&freed_pvec);
- }
- }
-
- pagevec_free(&freed_pvec);
-}
-
/*
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
- struct zone *zone,
+ struct mem_cgroup_zone *mz,
struct scan_control *sc,
int priority,
unsigned long *ret_nr_dirty,
@@ -781,7 +794,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;
VM_BUG_ON(PageActive(page));
- VM_BUG_ON(page_zone(page) != zone);
+ VM_BUG_ON(page_zone(page) != mz->zone);
sc->nr_scanned++;
@@ -815,7 +828,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
}
- references = page_check_references(page, sc);
+ references = page_check_references(page, mz, sc);
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
@@ -1006,10 +1019,10 @@ keep_lumpy:
* back off and wait for congestion to clear because further reclaim
* will encounter the same problem
*/
- if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
- zone_set_flag(zone, ZONE_CONGESTED);
+ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
+ zone_set_flag(mz->zone, ZONE_CONGESTED);
- free_page_list(&free_pages);
+ free_hot_cold_page_list(&free_pages, 1);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
@@ -1061,8 +1074,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
ret = -EBUSY;
- if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
- return ret;
+ /*
+ * To minimise LRU disruption, the caller can indicate that it only
+ * wants to isolate pages it will be able to operate on without
+ * blocking - clean pages for the most part.
+ *
+ * ISOLATE_CLEAN means that only clean pages should be isolated. This
+ * is used by reclaim when it is cannot write to backing storage
+ *
+ * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+ * that it is possible to migrate without blocking
+ */
+ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+ /* All the caller can do on PageWriteback is block */
+ if (PageWriteback(page))
+ return ret;
+
+ if (PageDirty(page)) {
+ struct address_space *mapping;
+
+ /* ISOLATE_CLEAN means only clean pages */
+ if (mode & ISOLATE_CLEAN)
+ return ret;
+
+ /*
+ * Only pages without mappings or that have a
+ * ->migratepage callback are possible to migrate
+ * without blocking
+ */
+ mapping = page_mapping(page);
+ if (mapping && !mapping->a_ops->migratepage)
+ return ret;
+ }
+ }
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
@@ -1091,25 +1135,36 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @src: The LRU list to pull pages off.
+ * @mz: The mem_cgroup_zone to pull pages from.
* @dst: The temp list to put pages on to.
- * @scanned: The number of pages that were scanned.
+ * @nr_scanned: The number of pages that were scanned.
* @order: The caller's attempted allocation order
* @mode: One of the LRU isolation modes
+ * @active: True [1] if isolating active pages
* @file: True [1] if isolating file [!anon] pages
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- struct list_head *src, struct list_head *dst,
- unsigned long *scanned, int order, isolate_mode_t mode,
- int file)
+ struct mem_cgroup_zone *mz, struct list_head *dst,
+ unsigned long *nr_scanned, int order, isolate_mode_t mode,
+ int active, int file)
{
+ struct lruvec *lruvec;
+ struct list_head *src;
unsigned long nr_taken = 0;
unsigned long nr_lumpy_taken = 0;
unsigned long nr_lumpy_dirty = 0;
unsigned long nr_lumpy_failed = 0;
unsigned long scan;
+ int lru = LRU_BASE;
+
+ lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
+ if (active)
+ lru += LRU_ACTIVE;
+ if (file)
+ lru += LRU_FILE;
+ src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
@@ -1125,15 +1180,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
switch (__isolate_lru_page(page, mode, file)) {
case 0:
+ mem_cgroup_lru_del(page);
list_move(&page->lru, dst);
- mem_cgroup_del_lru(page);
nr_taken += hpage_nr_pages(page);
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
- mem_cgroup_rotate_lru_list(page, page_lru(page));
continue;
default:
@@ -1178,18 +1232,22 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* anon page which don't already have a swap slot is
* pointless.
*/
- if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
+ if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
!PageSwapCache(cursor_page))
break;
if (__isolate_lru_page(cursor_page, mode, file) == 0) {
+ unsigned int isolated_pages;
+
+ mem_cgroup_lru_del(cursor_page);
list_move(&cursor_page->lru, dst);
- mem_cgroup_del_lru(cursor_page);
- nr_taken += hpage_nr_pages(page);
- nr_lumpy_taken++;
+ isolated_pages = hpage_nr_pages(cursor_page);
+ nr_taken += isolated_pages;
+ nr_lumpy_taken += isolated_pages;
if (PageDirty(cursor_page))
- nr_lumpy_dirty++;
+ nr_lumpy_dirty += isolated_pages;
scan++;
+ pfn += isolated_pages - 1;
} else {
/*
* Check if the page is freed already.
@@ -1215,57 +1273,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
nr_lumpy_failed++;
}
- *scanned = scan;
+ *nr_scanned = scan;
trace_mm_vmscan_lru_isolate(order,
nr_to_scan, scan,
nr_taken,
nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
- mode);
+ mode, file);
return nr_taken;
}
-static unsigned long isolate_pages_global(unsigned long nr,
- struct list_head *dst,
- unsigned long *scanned, int order,
- isolate_mode_t mode,
- struct zone *z, int active, int file)
-{
- int lru = LRU_BASE;
- if (active)
- lru += LRU_ACTIVE;
- if (file)
- lru += LRU_FILE;
- return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
- mode, file);
-}
-
-/*
- * clear_active_flags() is a helper for shrink_active_list(), clearing
- * any active bits from the pages in the list.
- */
-static unsigned long clear_active_flags(struct list_head *page_list,
- unsigned int *count)
-{
- int nr_active = 0;
- int lru;
- struct page *page;
-
- list_for_each_entry(page, page_list, lru) {
- int numpages = hpage_nr_pages(page);
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- lru += LRU_ACTIVE;
- ClearPageActive(page);
- nr_active += numpages;
- }
- if (count)
- count[lru] += numpages;
- }
-
- return nr_active;
-}
-
/**
* isolate_lru_page - tries to isolate a page from its LRU list
* @page: page to isolate from its LRU list
@@ -1325,7 +1342,7 @@ static int too_many_isolated(struct zone *zone, int file,
if (current_is_kswapd())
return 0;
- if (!scanning_global_lru(sc))
+ if (!global_reclaim(sc))
return 0;
if (file) {
@@ -1339,27 +1356,21 @@ static int too_many_isolated(struct zone *zone, int file,
return isolated > inactive;
}
-/*
- * TODO: Try merging with migrations version of putback_lru_pages
- */
static noinline_for_stack void
-putback_lru_pages(struct zone *zone, struct scan_control *sc,
- unsigned long nr_anon, unsigned long nr_file,
- struct list_head *page_list)
+putback_inactive_pages(struct mem_cgroup_zone *mz,
+ struct list_head *page_list)
{
- struct page *page;
- struct pagevec pvec;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
-
- pagevec_init(&pvec, 1);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone *zone = mz->zone;
+ LIST_HEAD(pages_to_free);
/*
* Put back any unfreeable pages.
*/
- spin_lock(&zone->lru_lock);
while (!list_empty(page_list)) {
+ struct page *page = lru_to_page(page_list);
int lru;
- page = lru_to_page(page_list);
+
VM_BUG_ON(PageLRU(page));
list_del(&page->lru);
if (unlikely(!page_evictable(page, NULL))) {
@@ -1376,30 +1387,53 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
}
- if (!pagevec_add(&pvec, page)) {
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, &pages_to_free);
}
}
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
+ /*
+ * To save our caller's stack, now use input list for pages to free.
+ */
+ list_splice(&pages_to_free, page_list);
}
-static noinline_for_stack void update_isolated_counts(struct zone *zone,
- struct scan_control *sc,
- unsigned long *nr_anon,
- unsigned long *nr_file,
- struct list_head *isolated_list)
+static noinline_for_stack void
+update_isolated_counts(struct mem_cgroup_zone *mz,
+ struct list_head *page_list,
+ unsigned long *nr_anon,
+ unsigned long *nr_file)
{
- unsigned long nr_active;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone *zone = mz->zone;
unsigned int count[NR_LRU_LISTS] = { 0, };
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ unsigned long nr_active = 0;
+ struct page *page;
+ int lru;
+
+ /*
+ * Count pages and clear active flags
+ */
+ list_for_each_entry(page, page_list, lru) {
+ int numpages = hpage_nr_pages(page);
+ lru = page_lru_base_type(page);
+ if (PageActive(page)) {
+ lru += LRU_ACTIVE;
+ ClearPageActive(page);
+ nr_active += numpages;
+ }
+ count[lru] += numpages;
+ }
- nr_active = clear_active_flags(isolated_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
__mod_zone_page_state(zone, NR_ACTIVE_FILE,
@@ -1413,8 +1447,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
*nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
*nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
reclaim_stat->recent_scanned[0] += *nr_anon;
reclaim_stat->recent_scanned[1] += *nr_file;
@@ -1466,8 +1498,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
* of reclaimed pages
*/
static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
- struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
+ struct scan_control *sc, int priority, int file)
{
LIST_HEAD(page_list);
unsigned long nr_scanned;
@@ -1478,6 +1510,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
unsigned long nr_dirty = 0;
unsigned long nr_writeback = 0;
isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
+ struct zone *zone = mz->zone;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1500,9 +1533,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
spin_lock_irq(&zone->lru_lock);
- if (scanning_global_lru(sc)) {
- nr_taken = isolate_pages_global(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone, 0, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
+ &nr_scanned, sc->order,
+ reclaim_mode, 0, file);
+ if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone,
@@ -1510,14 +1544,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
else
__count_zone_vm_events(PGSCAN_DIRECT, zone,
nr_scanned);
- } else {
- nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone,
- sc->mem_cgroup, 0, file);
- /*
- * mem_cgroup_isolate_pages() keeps track of
- * scanned pages on its own.
- */
}
if (nr_taken == 0) {
@@ -1525,26 +1551,37 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
return 0;
}
- update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list);
+ update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
spin_unlock_irq(&zone->lru_lock);
- nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority,
+ nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
&nr_dirty, &nr_writeback);
/* Check if we should syncronously wait for writeback */
if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
set_reclaim_mode(priority, sc, true);
- nr_reclaimed += shrink_page_list(&page_list, zone, sc,
+ nr_reclaimed += shrink_page_list(&page_list, mz, sc,
priority, &nr_dirty, &nr_writeback);
}
- local_irq_disable();
+ spin_lock_irq(&zone->lru_lock);
+
if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
__count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
- putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
+ putback_inactive_pages(mz, &page_list);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
+ spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&page_list, 1);
/*
* If reclaim is isolating dirty pages under writeback, it implies
@@ -1600,30 +1637,47 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
static void move_active_pages_to_lru(struct zone *zone,
struct list_head *list,
+ struct list_head *pages_to_free,
enum lru_list lru)
{
unsigned long pgmoved = 0;
- struct pagevec pvec;
struct page *page;
- pagevec_init(&pvec, 1);
+ if (buffer_heads_over_limit) {
+ spin_unlock_irq(&zone->lru_lock);
+ list_for_each_entry(page, list, lru) {
+ if (page_has_private(page) && trylock_page(page)) {
+ if (page_has_private(page))
+ try_to_release_page(page, 0);
+ unlock_page(page);
+ }
+ }
+ spin_lock_irq(&zone->lru_lock);
+ }
while (!list_empty(list)) {
+ struct lruvec *lruvec;
+
page = lru_to_page(list);
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- list_move(&page->lru, &zone->lru[lru].list);
- mem_cgroup_add_lru_list(page, lru);
+ lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+ list_move(&page->lru, &lruvec->lists[lru]);
pgmoved += hpage_nr_pages(page);
- if (!pagevec_add(&pvec, page) || list_empty(list)) {
- spin_unlock_irq(&zone->lru_lock);
- if (buffer_heads_over_limit)
- pagevec_strip(&pvec);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, pages_to_free);
}
}
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1631,19 +1685,22 @@ static void move_active_pages_to_lru(struct zone *zone,
__count_vm_events(PGDEACTIVATE, pgmoved);
}
-static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
- struct scan_control *sc, int priority, int file)
+static void shrink_active_list(unsigned long nr_to_scan,
+ struct mem_cgroup_zone *mz,
+ struct scan_control *sc,
+ int priority, int file)
{
unsigned long nr_taken;
- unsigned long pgscanned;
+ unsigned long nr_scanned;
unsigned long vm_flags;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
unsigned long nr_rotated = 0;
isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
+ struct zone *zone = mz->zone;
lru_add_drain();
@@ -1653,26 +1710,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
reclaim_mode |= ISOLATE_CLEAN;
spin_lock_irq(&zone->lru_lock);
- if (scanning_global_lru(sc)) {
- nr_taken = isolate_pages_global(nr_pages, &l_hold,
- &pgscanned, sc->order,
- reclaim_mode, zone,
- 1, file);
- zone->pages_scanned += pgscanned;
- } else {
- nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
- &pgscanned, sc->order,
- reclaim_mode, zone,
- sc->mem_cgroup, 1, file);
- /*
- * mem_cgroup_isolate_pages() keeps track of
- * scanned pages on its own.
- */
- }
+
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
+ &nr_scanned, sc->order,
+ reclaim_mode, 1, file);
+ if (global_reclaim(sc))
+ zone->pages_scanned += nr_scanned;
reclaim_stat->recent_scanned[file] += nr_taken;
- __count_zone_vm_events(PGREFILL, zone, pgscanned);
+ __count_zone_vm_events(PGREFILL, zone, nr_scanned);
if (file)
__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
else
@@ -1690,7 +1737,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
continue;
}
- if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+ if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
@@ -1723,12 +1770,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(zone, &l_active,
+ move_active_pages_to_lru(zone, &l_active, &l_hold,
LRU_ACTIVE + file * LRU_FILE);
- move_active_pages_to_lru(zone, &l_inactive,
+ move_active_pages_to_lru(zone, &l_inactive, &l_hold,
LRU_BASE + file * LRU_FILE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&l_hold, 1);
}
#ifdef CONFIG_SWAP
@@ -1753,10 +1802,8 @@ static int inactive_anon_is_low_global(struct zone *zone)
* Returns true if the zone does not have enough inactive anon pages,
* meaning some active anon pages need to be deactivated.
*/
-static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
{
- int low;
-
/*
* If we don't have swap space, anonymous page deactivation
* is pointless.
@@ -1764,15 +1811,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
if (!total_swap_pages)
return 0;
- if (scanning_global_lru(sc))
- low = inactive_anon_is_low_global(zone);
- else
- low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
- return low;
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
+ mz->zone);
+
+ return inactive_anon_is_low_global(mz->zone);
}
#else
-static inline int inactive_anon_is_low(struct zone *zone,
- struct scan_control *sc)
+static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
{
return 0;
}
@@ -1790,8 +1836,7 @@ static int inactive_file_is_low_global(struct zone *zone)
/**
* inactive_file_is_low - check if file pages need to be deactivated
- * @zone: zone to check
- * @sc: scan control of this context
+ * @mz: memory cgroup and zone to check
*
* When the system is doing streaming IO, memory pressure here
* ensures that active file pages get deactivated, until more
@@ -1803,45 +1848,44 @@ static int inactive_file_is_low_global(struct zone *zone)
* This uses a different ratio than the anonymous pages, because
* the page cache uses a use-once replacement algorithm.
*/
-static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_file_is_low(struct mem_cgroup_zone *mz)
{
- int low;
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
+ mz->zone);
- if (scanning_global_lru(sc))
- low = inactive_file_is_low_global(zone);
- else
- low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
- return low;
+ return inactive_file_is_low_global(mz->zone);
}
-static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
- int file)
+static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
{
if (file)
- return inactive_file_is_low(zone, sc);
+ return inactive_file_is_low(mz);
else
- return inactive_anon_is_low(zone, sc);
+ return inactive_anon_is_low(mz);
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct zone *zone, struct scan_control *sc, int priority)
+ struct mem_cgroup_zone *mz,
+ struct scan_control *sc, int priority)
{
int file = is_file_lru(lru);
if (is_active_lru(lru)) {
- if (inactive_list_is_low(zone, sc, file))
- shrink_active_list(nr_to_scan, zone, sc, priority, file);
+ if (inactive_list_is_low(mz, file))
+ shrink_active_list(nr_to_scan, mz, sc, priority, file);
return 0;
}
- return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+ return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
}
-static int vmscan_swappiness(struct scan_control *sc)
+static int vmscan_swappiness(struct mem_cgroup_zone *mz,
+ struct scan_control *sc)
{
- if (scanning_global_lru(sc))
+ if (global_reclaim(sc))
return vm_swappiness;
- return mem_cgroup_swappiness(sc->mem_cgroup);
+ return mem_cgroup_swappiness(mz->mem_cgroup);
}
/*
@@ -1852,15 +1896,15 @@ static int vmscan_swappiness(struct scan_control *sc)
*
* nr[0] = anon pages to scan; nr[1] = file pages to scan
*/
-static void get_scan_count(struct zone *zone, struct scan_control *sc,
- unsigned long *nr, int priority)
+static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
+ unsigned long *nr, int priority)
{
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
u64 fraction[2], denominator;
- enum lru_list l;
+ enum lru_list lru;
int noswap = 0;
bool force_scan = false;
@@ -1874,9 +1918,9 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (scanning_global_lru(sc) && current_is_kswapd())
+ if (current_is_kswapd() && mz->zone->all_unreclaimable)
force_scan = true;
- if (!scanning_global_lru(sc))
+ if (!global_reclaim(sc))
force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
@@ -1888,16 +1932,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
goto out;
}
- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+ anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
+ zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+ file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
+ zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
- if (scanning_global_lru(sc)) {
- free = zone_page_state(zone, NR_FREE_PAGES);
+ if (global_reclaim(sc)) {
+ free = zone_page_state(mz->zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
force-scan anon pages. */
- if (unlikely(file + free <= high_wmark_pages(zone))) {
+ if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
@@ -1909,8 +1953,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(sc);
- file_prio = 200 - vmscan_swappiness(sc);
+ anon_prio = vmscan_swappiness(mz, sc);
+ file_prio = 200 - vmscan_swappiness(mz, sc);
/*
* OK, so we have swap space and a fair amount of page cache
@@ -1923,7 +1967,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irq(&mz->zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
@@ -1944,24 +1988,24 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irq(&mz->zone->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
- for_each_evictable_lru(l) {
- int file = is_file_lru(l);
+ for_each_evictable_lru(lru) {
+ int file = is_file_lru(lru);
unsigned long scan;
- scan = zone_nr_lru_pages(zone, sc, l);
+ scan = zone_nr_lru_pages(mz, lru);
if (priority || noswap) {
scan >>= priority;
if (!scan && force_scan)
scan = SWAP_CLUSTER_MAX;
scan = div64_u64(scan * fraction[file], denominator);
}
- nr[l] = scan;
+ nr[lru] = scan;
}
}
@@ -1972,7 +2016,7 @@ out:
* back to the allocator and call try_to_compact_zone(), we ensure that
* there are enough free pages for it to be likely successful
*/
-static inline bool should_continue_reclaim(struct zone *zone,
+static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
unsigned long nr_reclaimed,
unsigned long nr_scanned,
struct scan_control *sc)
@@ -2012,14 +2056,15 @@ static inline bool should_continue_reclaim(struct zone *zone,
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
- inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+ inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ if (nr_swap_pages > 0)
+ inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction)
return true;
/* If compaction would go ahead or the allocation would succeed, stop */
- switch (compaction_suitable(zone, sc->order)) {
+ switch (compaction_suitable(mz->zone, sc->order)) {
case COMPACT_PARTIAL:
case COMPACT_CONTINUE:
return false;
@@ -2031,12 +2076,12 @@ static inline bool should_continue_reclaim(struct zone *zone,
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_zone(int priority, struct zone *zone,
- struct scan_control *sc)
+static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
- enum lru_list l;
+ enum lru_list lru;
unsigned long nr_reclaimed, nr_scanned;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct blk_plug plug;
@@ -2044,19 +2089,19 @@ static void shrink_zone(int priority, struct zone *zone,
restart:
nr_reclaimed = 0;
nr_scanned = sc->nr_scanned;
- get_scan_count(zone, sc, nr, priority);
+ get_scan_count(mz, sc, nr, priority);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
- for_each_evictable_lru(l) {
- if (nr[l]) {
+ for_each_evictable_lru(lru) {
+ if (nr[lru]) {
nr_to_scan = min_t(unsigned long,
- nr[l], SWAP_CLUSTER_MAX);
- nr[l] -= nr_to_scan;
+ nr[lru], SWAP_CLUSTER_MAX);
+ nr[lru] -= nr_to_scan;
- nr_reclaimed += shrink_list(l, nr_to_scan,
- zone, sc, priority);
+ nr_reclaimed += shrink_list(lru, nr_to_scan,
+ mz, sc, priority);
}
}
/*
@@ -2077,17 +2122,89 @@ restart:
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_anon_is_low(zone, sc))
- shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+ if (inactive_anon_is_low(mz))
+ shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
/* reclaim/compaction might need reclaim to continue */
- if (should_continue_reclaim(zone, nr_reclaimed,
+ if (should_continue_reclaim(mz, nr_reclaimed,
sc->nr_scanned - nr_scanned, sc))
goto restart;
throttle_vm_writeout(sc->gfp_mask);
}
+static void shrink_zone(int priority, struct zone *zone,
+ struct scan_control *sc)
+{
+ struct mem_cgroup *root = sc->target_mem_cgroup;
+ struct mem_cgroup_reclaim_cookie reclaim = {
+ .zone = zone,
+ .priority = priority,
+ };
+ struct mem_cgroup *memcg;
+
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = memcg,
+ .zone = zone,
+ };
+
+ shrink_mem_cgroup_zone(priority, &mz, sc);
+ /*
+ * Limit reclaim has historically picked one memcg and
+ * scanned it with decreasing priority levels until
+ * nr_to_reclaim had been reclaimed. This priority
+ * cycle is thus over after a single memcg.
+ *
+ * Direct reclaim and kswapd, on the other hand, have
+ * to scan all memory cgroups to fulfill the overall
+ * scan target for the zone.
+ */
+ if (!global_reclaim(sc)) {
+ mem_cgroup_iter_break(root, memcg);
+ break;
+ }
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ } while (memcg);
+}
+
+/* Returns true if compaction should go ahead for a high-order request */
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+{
+ unsigned long balance_gap, watermark;
+ bool watermark_ok;
+
+ /* Do not consider compaction for orders reclaim is meant to satisfy */
+ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
+ return false;
+
+ /*
+ * Compaction takes time to run and there are potentially other
+ * callers using the pages just freed. Continue reclaiming until
+ * there is a buffer of free pages available to give compaction
+ * a reasonable chance of completing and allocating the page
+ */
+ balance_gap = min(low_wmark_pages(zone),
+ (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+ KSWAPD_ZONE_BALANCE_GAP_RATIO);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+
+ /*
+ * If compaction is deferred, reclaim up to a point where
+ * compaction will have a chance of success when re-enabled
+ */
+ if (compaction_deferred(zone))
+ return watermark_ok;
+
+ /* If compaction is not ready to start, keep reclaiming */
+ if (!compaction_suitable(zone, sc->order))
+ return false;
+
+ return watermark_ok;
+}
+
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2105,8 +2222,9 @@ restart:
* scan then give up on it.
*
* This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is either ready to begin or deferred.
- * This indicates to the caller that it should retry the allocation or fail.
+ * high-order allocation and compaction is ready to begin. This indicates to
+ * the caller that it should consider retrying the allocation instead of
+ * further reclaim.
*/
static bool shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc)
@@ -2115,7 +2233,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
struct zone *zone;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
- bool should_abort_reclaim = false;
+ bool aborted_reclaim = false;
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2125,7 +2243,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
* Take care memory controller reclaiming has small influence
* to global LRU.
*/
- if (scanning_global_lru(sc)) {
+ if (global_reclaim(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
@@ -2140,10 +2258,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
* noticable problem, like transparent huge page
* allocations.
*/
- if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
- (compaction_suitable(zone, sc->order) ||
- compaction_deferred(zone))) {
- should_abort_reclaim = true;
+ if (compaction_ready(zone, sc)) {
+ aborted_reclaim = true;
continue;
}
}
@@ -2165,7 +2281,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
shrink_zone(priority, zone, sc);
}
- return should_abort_reclaim;
+ return aborted_reclaim;
}
static bool zone_reclaimable(struct zone *zone)
@@ -2219,25 +2335,25 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct zoneref *z;
struct zone *zone;
unsigned long writeback_threshold;
+ bool aborted_reclaim;
get_mems_allowed();
delayacct_freepages_start();
- if (scanning_global_lru(sc))
+ if (global_reclaim(sc))
count_vm_event(ALLOCSTALL);
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
sc->nr_scanned = 0;
if (!priority)
- disable_swap_token(sc->mem_cgroup);
- if (shrink_zones(priority, zonelist, sc))
- break;
+ disable_swap_token(sc->target_mem_cgroup);
+ aborted_reclaim = shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
*/
- if (scanning_global_lru(sc)) {
+ if (global_reclaim(sc)) {
unsigned long lru_pages = 0;
for_each_zone_zonelist(zone, z, zonelist,
gfp_zone(sc->gfp_mask)) {
@@ -2298,8 +2414,12 @@ out:
if (oom_killer_disabled)
return 0;
+ /* Aborted reclaim to try compaction? don't OOM, then */
+ if (aborted_reclaim)
+ return 1;
+
/* top priority shrink_zones still had more to do? don't OOM, then */
- if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
+ if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
return 1;
return 0;
@@ -2316,7 +2436,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_unmap = 1,
.may_swap = 1,
.order = order,
- .mem_cgroup = NULL,
+ .target_mem_cgroup = NULL,
.nodemask = nodemask,
};
struct shrink_control shrink = {
@@ -2336,7 +2456,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
gfp_t gfp_mask, bool noswap,
struct zone *zone,
unsigned long *nr_scanned)
@@ -2348,7 +2468,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
.may_unmap = 1,
.may_swap = !noswap,
.order = 0,
- .mem_cgroup = mem,
+ .target_mem_cgroup = memcg,
+ };
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = memcg,
+ .zone = zone,
};
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2365,7 +2489,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_zone(0, zone, &sc);
+ shrink_mem_cgroup_zone(0, &mz, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2373,7 +2497,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
return sc.nr_reclaimed;
}
-unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
gfp_t gfp_mask,
bool noswap)
{
@@ -2386,7 +2510,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.order = 0,
- .mem_cgroup = mem_cont,
+ .target_mem_cgroup = memcg,
.nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2400,7 +2524,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
* take care of from where we get pages. So the node where we start the
* scan does not need to be the current node.
*/
- nid = mem_cgroup_select_victim_node(mem_cont);
+ nid = mem_cgroup_select_victim_node(memcg);
zonelist = NODE_DATA(nid)->node_zonelists;
@@ -2416,6 +2540,29 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
}
#endif
+static void age_active_anon(struct zone *zone, struct scan_control *sc,
+ int priority)
+{
+ struct mem_cgroup *memcg;
+
+ if (!total_swap_pages)
+ return;
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = memcg,
+ .zone = zone,
+ };
+
+ if (inactive_anon_is_low(&mz))
+ shrink_active_list(SWAP_CLUSTER_MAX, &mz,
+ sc, priority, 0);
+
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ } while (memcg);
+}
+
/*
* pgdat_balanced is used when checking if a node is balanced for high-order
* allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2536,7 +2683,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
*/
.nr_to_reclaim = ULONG_MAX,
.order = order,
- .mem_cgroup = NULL,
+ .target_mem_cgroup = NULL,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
@@ -2575,9 +2722,7 @@ loop_again:
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming.
*/
- if (inactive_anon_is_low(zone, &sc))
- shrink_active_list(SWAP_CLUSTER_MAX, zone,
- &sc, priority, 0);
+ age_active_anon(zone, &sc, priority);
if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), 0, 0)) {
@@ -3353,104 +3498,69 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
return 1;
}
+#ifdef CONFIG_SHMEM
/**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
- *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages: array of pages to check
+ * @nr_pages: number of pages to check
*
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
- */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
-{
- VM_BUG_ON(PageActive(page));
-
-retry:
- ClearPageUnevictable(page);
- if (page_evictable(page, NULL)) {
- enum lru_list l = page_lru_base_type(page);
-
- __dec_zone_state(zone, NR_UNEVICTABLE);
- list_move(&page->lru, &zone->lru[l].list);
- mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
- __inc_zone_state(zone, NR_INACTIVE_ANON + l);
- __count_vm_event(UNEVICTABLE_PGRESCUED);
- } else {
- /*
- * rotate unevictable list
- */
- SetPageUnevictable(page);
- list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
- mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
- if (page_evictable(page, NULL))
- goto retry;
- }
-}
-
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
+ * Checks pages for evictability and moves them to the appropriate lru list.
*
- * Scan all pages in mapping. Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
+ * This function is only used for SysV IPC SHM_UNLOCK.
*/
-void scan_mapping_unevictable_pages(struct address_space *mapping)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
{
- pgoff_t next = 0;
- pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- struct zone *zone;
- struct pagevec pvec;
-
- if (mapping->nrpages == 0)
- return;
-
- pagevec_init(&pvec, 0);
- while (next < end &&
- pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
- int i;
- int pg_scanned = 0;
-
- zone = NULL;
+ struct lruvec *lruvec;
+ struct zone *zone = NULL;
+ int pgscanned = 0;
+ int pgrescued = 0;
+ int i;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- pgoff_t page_index = page->index;
- struct zone *pagezone = page_zone(page);
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pages[i];
+ struct zone *pagezone;
- pg_scanned++;
- if (page_index > next)
- next = page_index;
- next++;
+ pgscanned++;
+ pagezone = page_zone(page);
+ if (pagezone != zone) {
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ zone = pagezone;
+ spin_lock_irq(&zone->lru_lock);
+ }
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
- }
+ if (!PageLRU(page) || !PageUnevictable(page))
+ continue;
- if (PageLRU(page) && PageUnevictable(page))
- check_move_unevictable_page(page, zone);
+ if (page_evictable(page, NULL)) {
+ enum lru_list lru = page_lru_base_type(page);
+
+ VM_BUG_ON(PageActive(page));
+ ClearPageUnevictable(page);
+ __dec_zone_state(zone, NR_UNEVICTABLE);
+ lruvec = mem_cgroup_lru_move_lists(zone, page,
+ LRU_UNEVICTABLE, lru);
+ list_move(&page->lru, &lruvec->lists[lru]);
+ __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+ pgrescued++;
}
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
-
- count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
}
+ if (zone) {
+ __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+ __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+ spin_unlock_irq(&zone->lru_lock);
+ }
}
+#endif /* CONFIG_SHMEM */
static void warn_scan_unevictable_pages(void)
{
printk_once(KERN_WARNING
- "The scan_unevictable_pages sysctl/node-interface has been "
+ "%s: The scan_unevictable_pages sysctl/node-interface has been "
"disabled for lack of a legitimate use case. If you have "
- "one, please send an email to linux-mm@kvack.org.\n");
+ "one, please send an email to linux-mm@kvack.org.\n",
+ current->comm);
}
/*
@@ -3475,16 +3585,16 @@ int scan_unevictable_handler(struct ctl_table *table, int write,
* a specified node's per zone unevictable lists for evictable pages.
*/
-static ssize_t read_scan_unevictable_node(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t read_scan_unevictable_node(struct device *dev,
+ struct device_attribute *attr,
char *buf)
{
warn_scan_unevictable_pages();
return sprintf(buf, "0\n"); /* always zero; should fit... */
}
-static ssize_t write_scan_unevictable_node(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t write_scan_unevictable_node(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
warn_scan_unevictable_pages();
@@ -3492,17 +3602,17 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev,
}
-static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
read_scan_unevictable_node,
write_scan_unevictable_node);
int scan_unevictable_register_node(struct node *node)
{
- return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+ return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
}
void scan_unevictable_unregister_node(struct node *node)
{
- sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+ device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
}
#endif