path: root/mm
diff options
authorPaul Jackson <>2006-12-13 00:34:25 -0800
committerLinus Torvalds <>2006-12-13 09:05:49 -0800
commit02a0e53d8227aff5e62e0433f82c12c1c2805fd6 (patch)
treefe32435308e5f1afe8bd12357bd8c5ff3b4133c7 /mm
parent55935a34a428a1497e3b37982e2782c09c6f914d (diff)
[PATCH] cpuset: rework cpuset_zone_allowed api
Elaborate the API for calling cpuset_zone_allowed(), so that users have to explicitly choose between the two variants: cpuset_zone_allowed_hardwall() cpuset_zone_allowed_softwall() Until now, whether or not you got the hardwall flavor depended solely on whether or not you or'd in the __GFP_HARDWALL gfp flag to the gfp_mask argument. If you didn't specify __GFP_HARDWALL, you implicitly got the softwall version. Unfortunately, this meant that users would end up with the softwall version without thinking about it. Since only the softwall version might sleep, this led to bugs with possible sleeping in interrupt context on more than one occassion. The hardwall version requires that the current tasks mems_allowed allows the node of the specified zone (or that you're in interrupt or that __GFP_THISNODE is set or that you're on a one cpuset system.) The softwall version, depending on the gfp_mask, might allow a node if it was allowed in the nearest enclusing cpuset marked mem_exclusive (which requires taking the cpuset lock 'callback_mutex' to evaluate.) This patch removes the cpuset_zone_allowed() call, and forces the caller to explicitly choose between the hardwall and the softwall case. If the caller wants the gfp_mask to determine this choice, they should (1) be sure they can sleep or that __GFP_HARDWALL is set, and (2) invoke the cpuset_zone_allowed_softwall() routine. This adds another 100 or 200 bytes to the kernel text space, due to the few lines of nearly duplicate code at the top of both cpuset_zone_allowed_* routines. It should save a few instructions executed for the calls that turned into calls of cpuset_zone_allowed_hardwall, thanks to not having to set (before the call) then check (within the call) the __GFP_HARDWALL flag. For the most critical call, from get_page_from_freelist(), the same instructions are executed as before -- the old cpuset_zone_allowed() routine it used to call is the same code as the cpuset_zone_allowed_softwall() routine that it calls now. Not a perfect win, but seems worth it, to reduce this chance of hitting a sleeping with irq off complaint again. Signed-off-by: Paul Jackson <> Signed-off-by: Andrew Morton <> Signed-off-by: Linus Torvalds <>
Diffstat (limited to 'mm')
5 files changed, 8 insertions, 8 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0ccc7f230252..089092d152ab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
for (z = zonelist->zones; *z; z++) {
nid = zone_to_nid(*z);
- if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
+ if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 223d9ccb7d64..64cf3c214634 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,7 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
nodemask_t nodes = node_online_map;
for (z = zonelist->zones; *z; z++)
- if (cpuset_zone_allowed(*z, gfp_mask))
+ if (cpuset_zone_allowed_softwall(*z, gfp_mask))
node_clear(zone_to_nid(*z), nodes);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e6b17b2989e0..8c1a116875bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,7 +1162,7 @@ zonelist_scan:
zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
if ((alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(zone, gfp_mask))
+ !cpuset_zone_allowed_softwall(zone, gfp_mask))
goto try_next_zone;
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
diff --git a/mm/slab.c b/mm/slab.c
index 9d3550086c93..b856786a3a30 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3262,7 +3262,7 @@ retry:
for (z = zonelist->zones; *z && !obj; z++) {
nid = zone_to_nid(*z);
- if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) &&
+ if (cpuset_zone_allowed_hardwall(*z, flags) &&
cache->nodelists[nid] &&
obj = ____cache_alloc_node(cache,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 093f5fe6dd77..e9813b06c7a3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
if (!populated_zone(zone))
- if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
note_zone_scanning_priority(zone, priority);
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
- if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
lru_pages += zone->nr_active + zone->nr_inactive;
@@ -1089,7 +1089,7 @@ out:
for (i = 0; zones[i] != 0; i++) {
struct zone *zone = zones[i];
- if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
zone->prev_priority = priority;
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order)
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
- if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
if (!waitqueue_active(&pgdat->kswapd_wait))