summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--mm/page_alloc.c138
-rw-r--r--mm/vmstat.c1
3 files changed, 135 insertions, 10 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b86cfa3313cf..d3bafe4ff32b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,6 +39,8 @@ enum {
MIGRATE_UNMOVABLE,
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
+ MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
+ MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
/*
* MIGRATE_CMA migration type is designed to mimic the way
@@ -61,8 +63,6 @@ enum {
MIGRATE_TYPES
};
-#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
#else
@@ -334,6 +334,8 @@ struct zone {
/* zone watermarks, access with *_wmark_pages(zone) macros */
unsigned long watermark[NR_WMARK];
+ unsigned long nr_reserved_highatomic;
+
/*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 588812614377..55e9c56dfe54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1615,6 +1615,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
return -1;
}
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+ unsigned int alloc_order)
+{
+ int mt;
+ unsigned long max_managed, flags;
+
+ /*
+ * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+ * Check is race-prone but harmless.
+ */
+ max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+ if (zone->nr_reserved_highatomic >= max_managed)
+ return;
+
+ spin_lock_irqsave(&zone->lock, flags);
+
+ /* Recheck the nr_reserved_highatomic limit under the lock */
+ if (zone->nr_reserved_highatomic >= max_managed)
+ goto out_unlock;
+
+ /* Yoink! */
+ mt = get_pageblock_migratetype(page);
+ if (mt != MIGRATE_HIGHATOMIC &&
+ !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+ zone->nr_reserved_highatomic += pageblock_nr_pages;
+ set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+ move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+ struct zonelist *zonelist = ac->zonelist;
+ unsigned long flags;
+ struct zoneref *z;
+ struct zone *zone;
+ struct page *page;
+ int order;
+
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+ ac->nodemask) {
+ /* Preserve at least one pageblock */
+ if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++) {
+ struct free_area *area = &(zone->free_area[order]);
+
+ if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ continue;
+
+ page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+ struct page, lru);
+
+ /*
+ * It should never happen but changes to locking could
+ * inadvertently allow a per-cpu drain to add pages
+ * to MIGRATE_HIGHATOMIC while unreserving so be safe
+ * and watch for underflows.
+ */
+ zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+ zone->nr_reserved_highatomic);
+
+ /*
+ * Convert to ac->migratetype and avoid the normal
+ * pageblock stealing heuristics. Minimally, the caller
+ * is doing the work and needs the pages. More
+ * importantly, if the block was always converted to
+ * MIGRATE_UNMOVABLE or another type then the number
+ * of pageblocks that cannot be completely freed
+ * may increase.
+ */
+ set_pageblock_migratetype(page, ac->migratetype);
+ move_freepages_block(zone, page, ac->migratetype);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+}
+
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1670,7 +1765,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
- int migratetype)
+ int migratetype, gfp_t gfp_flags)
{
struct page *page;
@@ -1700,7 +1795,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype);
+ struct page *page = __rmqueue(zone, order, migratetype, 0);
if (unlikely(page == NULL))
break;
@@ -2072,7 +2167,7 @@ int split_free_page(struct page *page)
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype)
+ gfp_t gfp_flags, int alloc_flags, int migratetype)
{
unsigned long flags;
struct page *page;
@@ -2115,7 +2210,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
WARN_ON_ONCE(order > 1);
}
spin_lock_irqsave(&zone->lock, flags);
- page = __rmqueue(zone, order, migratetype);
+
+ page = NULL;
+ if (alloc_flags & ALLOC_HARDER) {
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (page)
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ }
+ if (!page)
+ page = __rmqueue(zone, order, migratetype, gfp_flags);
spin_unlock(&zone->lock);
if (!page)
goto failed;
@@ -2226,15 +2329,24 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, int alloc_flags,
long free_pages)
{
- /* free_pages may go negative - that's OK */
long min = mark;
int o;
long free_cma = 0;
+ /* free_pages may go negative - that's OK */
free_pages -= (1 << order) - 1;
+
if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
- if (alloc_flags & ALLOC_HARDER)
+
+ /*
+ * If the caller does not have rights to ALLOC_HARDER then subtract
+ * the high-atomic reserves. This will over-estimate the size of the
+ * atomic reserve but it avoids a search.
+ */
+ if (likely(!(alloc_flags & ALLOC_HARDER)))
+ free_pages -= z->nr_reserved_highatomic;
+ else
min -= min / 4;
#ifdef CONFIG_CMA
@@ -2419,10 +2531,18 @@ zonelist_scan:
try_this_zone:
page = buffered_rmqueue(ac->preferred_zone, zone, order,
- gfp_mask, ac->migratetype);
+ gfp_mask, alloc_flags, ac->migratetype);
if (page) {
if (prep_new_page(page, order, gfp_mask, alloc_flags))
goto try_this_zone;
+
+ /*
+ * If this is a high-order atomic allocation then check
+ * if the pageblock should be reserved for the future
+ */
+ if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+ reserve_highatomic_pageblock(page, zone, order);
+
return page;
}
}
@@ -2695,9 +2815,11 @@ retry:
/*
* If an allocation failed after direct reclaim, it could be because
- * pages are pinned on the per-cpu lists. Drain them and try again
+ * pages are pinned on the per-cpu lists or in high alloc reserves.
+ * Shrink them them and try again
*/
if (!page && !drained) {
+ unreserve_highatomic_pageblock(ac);
drain_all_pages(NULL);
drained = true;
goto retry;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5b289dcdcccf..879a2be23325 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,6 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
"Reclaimable",
"Movable",
+ "HighAtomic",
#ifdef CONFIG_CMA
"CMA",
#endif