summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorDavid Brown <davidb@codeaurora.org>2011-03-16 22:13:16 -0700
committerDavid Brown <davidb@codeaurora.org>2011-03-16 22:13:16 -0700
commit92c260f755c42337c550d8ac1f8ccd1b32bffb20 (patch)
tree6d04fefc1adeecabfb2b00c201e0db78fa2b5529 /mm/memcontrol.c
parent8e76a80960bf06c245160a484d5a363ca6b520bb (diff)
parent05e34754518b6a90d5c392790c032575fab12d66 (diff)
Merge remote branch 'rmk/for-linus' into for-linus
* rmk/for-linus: (1557 commits) ARM: 6806/1: irq: introduce entry and exit functions for chained handlers ARM: 6781/1: Thumb-2: Work around buggy Thumb-2 short branch relocations in gas ARM: 6747/1: P2V: Thumb2 support ARM: 6798/1: aout-core: zero thread debug registers in a.out core dump ARM: 6796/1: Footbridge: Fix I/O mappings for NOMMU mode ARM: 6784/1: errata: no automatic Store Buffer drain on Cortex-A9 ARM: 6772/1: errata: possible fault MMU translations following an ASID switch ARM: 6776/1: mach-ux500: activate fix for errata 753970 ARM: 6794/1: SPEAr: Append UL to device address macros. ARM: 6793/1: SPEAr: Remove unused *_SIZE macros from spear*.h files ARM: 6792/1: SPEAr: Replace SIZE macro's with SZ_4K macros ARM: 6791/1: SPEAr3xx: Declare device structures after shirq code ARM: 6790/1: SPEAr: Clock Framework: Rename usbd clock and align apb_clk entry ARM: 6789/1: SPEAr3xx: Rename sdio to sdhci ARM: 6788/1: SPEAr: Include mach/hardware.h instead of mach/spear.h ARM: 6787/1: SPEAr: Reorder #includes in .h & .c files. ARM: 6681/1: SPEAr: add debugfs support to clk API ARM: 6703/1: SPEAr: update clk API support ARM: 6679/1: SPEAr: make clk API functions more generic ARM: 6737/1: SPEAr: formalized timer support ... Conflicts: arch/arm/mach-msm/board-msm7x27.c arch/arm/mach-msm/board-msm7x30.c arch/arm/mach-msm/board-qsd8x50.c arch/arm/mach-msm/board-sapphire.c arch/arm/mach-msm/include/mach/memory.h
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c98
1 files changed, 73 insertions, 25 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db76ef726293..da53a252b259 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
/* pagein of a big page is an event. So, ignore page size */
if (nr_pages > 0)
__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
- else
+ else {
__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
+ nr_pages = -nr_pages; /* for event */
+ }
__this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
return false;
}
+/**
+ * mem_cgroup_check_margin - check if the memory cgroup allows charging
+ * @mem: memory cgroup to check
+ * @bytes: the number of bytes the caller intends to charge
+ *
+ * Returns a boolean value on whether @mem can be charged @bytes or
+ * whether this would exceed the limit.
+ */
+static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes)
+{
+ if (!res_counter_check_margin(&mem->res, bytes))
+ return false;
+ if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes))
+ return false;
+ return true;
+}
+
static unsigned int get_swappiness(struct mem_cgroup *memcg)
{
struct cgroup *cgrp = memcg->css.cgroup;
@@ -1832,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
if (likely(!ret))
return CHARGE_OK;
+ res_counter_uncharge(&mem->res, csize);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
-
- if (csize > PAGE_SIZE) /* change csize and retry */
+ /*
+ * csize can be either a huge page (HPAGE_SIZE), a batch of
+ * regular pages (CHARGE_SIZE), or a single regular page
+ * (PAGE_SIZE).
+ *
+ * Never reclaim on behalf of optional batching, retry with a
+ * single page instead.
+ */
+ if (csize == CHARGE_SIZE)
return CHARGE_RETRY;
if (!(gfp_mask & __GFP_WAIT))
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
- gfp_mask, flags);
+ gfp_mask, flags);
+ if (mem_cgroup_check_margin(mem_over_limit, csize))
+ return CHARGE_RETRY;
/*
- * try_to_free_mem_cgroup_pages() might not give us a full
- * picture of reclaim. Some pages are reclaimed and might be
- * moved to swap cache or just unmapped from the cgroup.
- * Check the limit again to see if the reclaim reduced the
- * current usage of the cgroup before giving up
+ * Even though the limit is exceeded at this point, reclaim
+ * may have been able to free some pages. Retry the charge
+ * before killing the task.
+ *
+ * Only for regular pages, though: huge pages are rather
+ * unlikely to succeed so close to the limit, and we fall back
+ * to regular pages anyway in case of failure.
*/
- if (ret || mem_cgroup_check_under_limit(mem_over_limit))
+ if (csize == PAGE_SIZE && ret)
return CHARGE_RETRY;
/*
@@ -2144,6 +2175,8 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
unsigned long flags;
+ if (mem_cgroup_disabled())
+ return;
/*
* We have no races with charge/uncharge but will have races with
* page state accounting.
@@ -2233,7 +2266,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
{
int ret = -EINVAL;
unsigned long flags;
-
+ /*
+ * The page is isolated from LRU. So, collapse function
+ * will not handle this page. But page splitting can happen.
+ * Do this check under compound_page_lock(). The caller should
+ * hold it.
+ */
if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
return -EBUSY;
@@ -2265,7 +2303,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
struct cgroup *cg = child->css.cgroup;
struct cgroup *pcg = cg->parent;
struct mem_cgroup *parent;
- int charge = PAGE_SIZE;
+ int page_size = PAGE_SIZE;
unsigned long flags;
int ret;
@@ -2278,23 +2316,26 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
goto out;
if (isolate_lru_page(page))
goto put;
- /* The page is isolated from LRU and we have no race with splitting */
- charge = PAGE_SIZE << compound_order(page);
+
+ if (PageTransHuge(page))
+ page_size = HPAGE_SIZE;
parent = mem_cgroup_from_cont(pcg);
- ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
+ ret = __mem_cgroup_try_charge(NULL, gfp_mask,
+ &parent, false, page_size);
if (ret || !parent)
goto put_back;
- if (charge > PAGE_SIZE)
+ if (page_size > PAGE_SIZE)
flags = compound_lock_irqsave(page);
- ret = mem_cgroup_move_account(pc, child, parent, true, charge);
+ ret = mem_cgroup_move_account(pc, child, parent, true, page_size);
if (ret)
- mem_cgroup_cancel_charge(parent, charge);
-put_back:
- if (charge > PAGE_SIZE)
+ mem_cgroup_cancel_charge(parent, page_size);
+
+ if (page_size > PAGE_SIZE)
compound_unlock_irqrestore(page, flags);
+put_back:
putback_lru_page(page);
put:
put_page(page);
@@ -2312,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype)
{
struct mem_cgroup *mem = NULL;
+ int page_size = PAGE_SIZE;
struct page_cgroup *pc;
+ bool oom = true;
int ret;
- int page_size = PAGE_SIZE;
if (PageTransHuge(page)) {
page_size <<= compound_order(page);
VM_BUG_ON(!PageTransHuge(page));
+ /*
+ * Never OOM-kill a process for a huge page. The
+ * fault handler will fall back to regular pages.
+ */
+ oom = false;
}
pc = lookup_page_cgroup(page);
@@ -2327,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
return 0;
prefetchw(pc);
- ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size);
+ ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size);
if (ret || !mem)
return ret;
@@ -5013,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = {
static int __init enable_swap_account(char *s)
{
/* consider enabled if no parameter or 1 is given */
- if (!s || !strcmp(s, "1"))
+ if (!(*s) || !strcmp(s, "=1"))
really_do_swap_account = 1;
- else if (!strcmp(s, "0"))
+ else if (!strcmp(s, "=0"))
really_do_swap_account = 0;
return 1;
}
@@ -5023,7 +5070,8 @@ __setup("swapaccount", enable_swap_account);
static int __init disable_swap_account(char *s)
{
- enable_swap_account("0");
+ printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");
+ enable_swap_account("=0");
return 1;
}
__setup("noswapaccount", disable_swap_account);