From 925b7673cce39116ce61e7a06683a4a0dad1e72a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:15 -0800 Subject: mm: make per-memcg LRU lists exclusive Now that all code that operated on global per-zone LRU lists is converted to operate on per-memory cgroup LRU lists instead, there is no reason to keep the double-LRU scheme around any longer. The pc->lru member is removed and page->lru is linked directly to the per-memory cgroup LRU lists, which removes two pointers from a descriptor that exists for every page frame in the system. Signed-off-by: Johannes Weiner Signed-off-by: Hugh Dickins Signed-off-by: Ying Han Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Reviewed-by: Kirill A. Shutemov Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Greg Thelen Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/page_cgroup.h') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 961ecc7d30bc..5bae7535c202 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -31,7 +31,6 @@ enum { struct page_cgroup { unsigned long flags; struct mem_cgroup *mem_cgroup; - struct list_head lru; /* per cgroup LRU list */ }; void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); -- cgit v1.2.3 From 6b208e3f6e35aa76d254c395bdcd984b17c6b626 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:18 -0800 Subject: mm: memcg: remove unused node/section info from pc->flags To find the page corresponding to a certain page_cgroup, the pc->flags encoded the node or section ID with the base array to compare the pc pointer to. Now that the per-memory cgroup LRU lists link page descriptors directly, there is no longer any code that knows the struct page_cgroup of a PFN but not the struct page. [hughd@google.com: remove unused node/section info from pc->flags fix] Signed-off-by: Johannes Weiner Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Reviewed-by: Kirill A. Shutemov Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Ying Han Cc: Greg Thelen Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: Christoph Hellwig Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 33 ------------------------- mm/page_cgroup.c | 59 ++++++--------------------------------------- 2 files changed, 7 insertions(+), 85 deletions(-) (limited to 'include/linux/page_cgroup.h') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 5bae7535c202..aaa60da8783c 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -121,39 +121,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, local_irq_restore(*flags); } -#ifdef CONFIG_SPARSEMEM -#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT -#else -#define PCG_ARRAYID_WIDTH NODES_SHIFT -#endif - -#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) -#error Not enough space left in pc->flags to store page_cgroup array IDs -#endif - -/* pc->flags: ARRAY-ID | FLAGS */ - -#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1) - -#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH) -/* - * Zero the shift count for non-existent fields, to prevent compiler - * warnings and ensure references are optimized away. - */ -#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) - -static inline void set_page_cgroup_array_id(struct page_cgroup *pc, - unsigned long id) -{ - pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); - pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; -} - -static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) -{ - return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; -} - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59405a8d752..f0559e049e00 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,12 +11,6 @@ #include #include -static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) -{ - pc->flags = 0; - set_page_cgroup_array_id(pc, id); - pc->mem_cgroup = NULL; -} static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) @@ -41,28 +35,13 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return base + offset; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - unsigned long pfn; - struct page *page; - pg_data_t *pgdat; - - pgdat = NODE_DATA(page_cgroup_array_id(pc)); - pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; - page = pfn_to_page(pfn); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static int __init alloc_node_page_cgroup(int nid) { - struct page_cgroup *base, *pc; + struct page_cgroup *base; unsigned long table_size; - unsigned long start_pfn, nr_pages, index; + unsigned long nr_pages; - start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; - if (!nr_pages) return 0; @@ -72,10 +51,6 @@ static int __init alloc_node_page_cgroup(int nid) table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) return -ENOMEM; - for (index = 0; index < nr_pages; index++) { - pc = base + index; - init_page_cgroup(pc, nid); - } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; return 0; @@ -116,23 +91,10 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - struct mem_section *section; - struct page *page; - unsigned long nr; - - nr = page_cgroup_array_id(pc); - section = __nr_to_section(nr); - page = pfn_to_page(pc - section->page_cgroup); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static void *__meminit alloc_page_cgroup(size_t size, int nid) { + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; void *addr = NULL; - gfp_t flags = GFP_KERNEL | __GFP_NOWARN; addr = alloc_pages_exact_nid(nid, size, flags); if (addr) { @@ -141,9 +103,9 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid) } if (node_state(nid, N_HIGH_MEMORY)) - addr = vmalloc_node(size, nid); + addr = vzalloc_node(size, nid); else - addr = vmalloc(size); + addr = vzalloc(size); return addr; } @@ -166,14 +128,11 @@ static void free_page_cgroup(void *addr) static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) { - struct page_cgroup *base, *pc; struct mem_section *section; + struct page_cgroup *base; unsigned long table_size; - unsigned long nr; - int index; - nr = pfn_to_section_nr(pfn); - section = __nr_to_section(nr); + section = __pfn_to_section(pfn); if (section->page_cgroup) return 0; @@ -193,10 +152,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) return -ENOMEM; } - for (index = 0; index < PAGES_PER_SECTION; index++) { - pc = base + index; - init_page_cgroup(pc, nr); - } /* * The passed "pfn" may not be aligned to SECTION. For the calculation * we need to apply a mask. -- cgit v1.2.3 From 9fb4b7cc0724f178d4b24a2a566ea1e7cb120b82 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Thu, 12 Jan 2012 17:18:48 -0800 Subject: page_cgroup: add helper function to get swap_cgroup There are multiple places which need to get the swap_cgroup address, so add a helper function: static struct swap_cgroup *swap_cgroup_getsc(swp_entry_t ent, struct swap_cgroup_ctrl **ctrl); to simplify the code. Signed-off-by: Bob Liu Acked-by: Michal Hocko Acked-by: KAMEZAWA Hiroyuki Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 4 ++-- mm/memcontrol.c | 4 ++-- mm/page_cgroup.c | 56 ++++++++++++++++----------------------------- 3 files changed, 24 insertions(+), 40 deletions(-) (limited to 'include/linux/page_cgroup.h') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index aaa60da8783c..1153095ee457 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -149,7 +149,7 @@ static inline void __init page_cgroup_init_flatmem(void) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); -extern unsigned short lookup_swap_cgroup(swp_entry_t ent); +extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); #else @@ -161,7 +161,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } static inline -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71a9774e6ead..4c53e971749e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2474,7 +2474,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); - id = lookup_swap_cgroup(ent); + id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); if (memcg && !css_tryget(&memcg->css)) @@ -5264,7 +5264,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma, } /* There is a swap entry and a page doesn't exist or isn't charged */ if (ent.val && !ret && - css_id(&mc.from->css) == lookup_swap_cgroup(ent)) { + css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { ret = MC_TARGET_SWAP; if (target) target->ent = ent; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index e910524e5a08..b99d19edf89b 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -334,7 +334,6 @@ struct swap_cgroup { unsigned short id; }; #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) -#define SC_POS_MASK (SC_PER_PAGE - 1) /* * SwapCgroup implements "lookup" and "exchange" operations. @@ -376,6 +375,21 @@ not_enough_page: return -ENOMEM; } +static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, + struct swap_cgroup_ctrl **ctrlp) +{ + pgoff_t offset = swp_offset(ent); + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; + if (ctrlp) + *ctrlp = ctrl; + + mappage = ctrl->map[offset / SC_PER_PAGE]; + return page_address(mappage) + offset % SC_PER_PAGE; +} + /** * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged @@ -388,21 +402,13 @@ not_enough_page: unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned long flags; unsigned short retval; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); retval = sc->id; if (retval == old) @@ -423,21 +429,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, */ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned short old; unsigned long flags; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); old = sc->id; sc->id = id; @@ -447,28 +445,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } /** - * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry + * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry * @ent: swap entry to be looked up. * * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; - struct swap_cgroup_ctrl *ctrl; - struct page *mappage; - struct swap_cgroup *sc; - unsigned short ret; - - ctrl = &swap_cgroup_ctrl[type]; - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; - ret = sc->id; - return ret; + return lookup_swap_cgroup(ent, NULL)->id; } int swap_cgroup_swapon(int type, unsigned long max_pages) -- cgit v1.2.3 From 38c5d72f3ebe5ddd57d2f08dc035070fc6c9a287 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 12 Jan 2012 17:19:01 -0800 Subject: memcg: simplify LRU handling by new rule Now, at LRU handling, memory cgroup needs to do complicated works to see valid pc->mem_cgroup, which may be overwritten. This patch is for relaxing the protocol. This patch guarantees - when pc->mem_cgroup is overwritten, page must not be on LRU. By this, LRU routine can believe pc->mem_cgroup and don't need to check bits on pc->flags. This new rule may adds small overheads to swapin. But in most case, lru handling gets faster. After this patch, PCG_ACCT_LRU bit is obsolete and removed. [akpm@linux-foundation.org: remove unneeded VM_BUG_ON(), restore hannes's christmas tree] [akpm@linux-foundation.org: clean up code comment] [hughd@google.com: fix NULL mem_cgroup_try_charge] Signed-off-by: KAMEZAWA Hiroyuki Cc: Miklos Szeredi Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Ying Han Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 8 --- mm/memcontrol.c | 123 +++++++++++++++++++------------------------- 2 files changed, 54 insertions(+), 77 deletions(-) (limited to 'include/linux/page_cgroup.h') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1153095ee457..a2d11771c84b 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -10,8 +10,6 @@ enum { /* flags for mem_cgroup and file and I/O status */ PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ PCG_FILE_MAPPED, /* page is accounted as "mapped" */ - /* No lock in page_cgroup */ - PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ __NR_PCG_FLAGS, }; @@ -75,12 +73,6 @@ TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) SETPCGFLAG(Used, USED) -SETPCGFLAG(AcctLRU, ACCT_LRU) -CLEARPCGFLAG(AcctLRU, ACCT_LRU) -TESTPCGFLAG(AcctLRU, ACCT_LRU) -TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) - - SETPCGFLAG(FileMapped, FILE_MAPPED) CLEARPCGFLAG(FileMapped, FILE_MAPPED) TESTPCGFLAG(FileMapped, FILE_MAPPED) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c74102d6eb5a..ff051ee8fb4b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1040,30 +1040,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, return &zone->lruvec; pc = lookup_page_cgroup(page); - VM_BUG_ON(PageCgroupAcctLRU(pc)); - /* - * putback: charge: - * SetPageLRU SetPageCgroupUsed - * smp_mb smp_mb - * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU - * - * Ensure that one of the two sides adds the page to the memcg - * LRU during a race. - */ - smp_mb(); - /* - * If the page is uncharged, it may be freed soon, but it - * could also be swap cache (readahead, swapoff) that needs to - * be reclaimable in the future. root_mem_cgroup will babysit - * it for the time being. - */ - if (PageCgroupUsed(pc)) { - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - memcg = pc->mem_cgroup; - SetPageCgroupAcctLRU(pc); - } else - memcg = root_mem_cgroup; + memcg = pc->mem_cgroup; mz = page_cgroup_zoneinfo(memcg, page); /* compound_order() is stabilized through lru_lock */ MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); @@ -1090,18 +1067,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) return; pc = lookup_page_cgroup(page); - /* - * root_mem_cgroup babysits uncharged LRU pages, but - * PageCgroupUsed is cleared when the page is about to get - * freed. PageCgroupAcctLRU remembers whether the - * LRU-accounting happened against pc->mem_cgroup or - * root_mem_cgroup. - */ - if (TestClearPageCgroupAcctLRU(pc)) { - VM_BUG_ON(!pc->mem_cgroup); - memcg = pc->mem_cgroup; - } else - memcg = root_mem_cgroup; + memcg = pc->mem_cgroup; + VM_BUG_ON(!memcg); mz = page_cgroup_zoneinfo(memcg, page); /* huge page split is done under lru_lock. so, we have no races. */ MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); @@ -2217,8 +2184,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, } /* - * Unlike exported interface, "oom" parameter is added. if oom==true, - * oom-killer can be invoked. + * __mem_cgroup_try_charge() does + * 1. detect memcg to be charged against from passed *mm and *ptr, + * 2. update res_counter + * 3. call memory reclaim if necessary. + * + * In some special case, if the task is fatal, fatal_signal_pending() or + * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup + * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon + * as possible without any hazards. 2: all pages should have a valid + * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg + * pointer, that is treated as a charge to root_mem_cgroup. + * + * So __mem_cgroup_try_charge() will return + * 0 ... on success, filling *ptr with a valid memcg pointer. + * -ENOMEM ... charge failure because of resource limits. + * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup. + * + * Unlike the exported interface, an "oom" parameter is added. if oom==true, + * the oom-killer can be invoked. */ static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, @@ -2247,7 +2231,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, * set, if so charge the init_mm (happens for pagecache usage). */ if (!*ptr && !mm) - goto bypass; + *ptr = root_mem_cgroup; again: if (*ptr) { /* css should be a valid one */ memcg = *ptr; @@ -2273,7 +2257,9 @@ again: * task-struct. So, mm->owner can be NULL. */ memcg = mem_cgroup_from_task(p); - if (!memcg || mem_cgroup_is_root(memcg)) { + if (!memcg) + memcg = root_mem_cgroup; + if (mem_cgroup_is_root(memcg)) { rcu_read_unlock(); goto done; } @@ -2348,8 +2334,8 @@ nomem: *ptr = NULL; return -ENOMEM; bypass: - *ptr = NULL; - return 0; + *ptr = root_mem_cgroup; + return -EINTR; } /* @@ -2457,6 +2443,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); unlock_page_cgroup(pc); + WARN_ON_ONCE(PageLRU(page)); /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. @@ -2468,7 +2455,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ - (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) + (1 << PCG_MIGRATION)) /* * Because tail pages are not marked as "used", set it. We're under * zone->lru_lock, 'splitting on pmd' and compound_lock. @@ -2478,7 +2465,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, void mem_cgroup_split_huge_fixup(struct page *head) { struct page_cgroup *head_pc = lookup_page_cgroup(head); + struct mem_cgroup_per_zone *mz; struct page_cgroup *pc; + enum lru_list lru; int i; if (mem_cgroup_disabled()) @@ -2487,23 +2476,15 @@ void mem_cgroup_split_huge_fixup(struct page *head) pc = head_pc + i; pc->mem_cgroup = head_pc->mem_cgroup; smp_wmb();/* see __commit_charge() */ - /* - * LRU flags cannot be copied because we need to add tail - * page to LRU by generic call and our hooks will be called. - */ pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; } - - if (PageCgroupAcctLRU(head_pc)) { - enum lru_list lru; - struct mem_cgroup_per_zone *mz; - /* - * We hold lru_lock, then, reduce counter directly. - */ - lru = page_lru(head); - mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); - MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1; - } + /* + * Tail pages will be added to LRU. + * We hold lru_lock,then,reduce counter directly. + */ + lru = page_lru(head); + mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); + MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1; } #endif @@ -2620,7 +2601,7 @@ static int mem_cgroup_move_parent(struct page *page, parent = mem_cgroup_from_cont(pcg); ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); - if (ret || !parent) + if (ret) goto put_back; if (nr_pages > 1) @@ -2667,9 +2648,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, pc = lookup_page_cgroup(page); ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); - if (ret || !memcg) + if (ret == -ENOMEM) return ret; - __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype); return 0; } @@ -2736,10 +2716,9 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, if (!page_is_file_cache(page)) type = MEM_CGROUP_CHARGE_TYPE_SHMEM; - if (!PageSwapCache(page)) { + if (!PageSwapCache(page)) ret = mem_cgroup_charge_common(page, mm, gfp_mask, type); - WARN_ON_ONCE(PageLRU(page)); - } else { /* page is swapcache/shmem */ + else { /* page is swapcache/shmem */ ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg); if (!ret) __mem_cgroup_commit_charge_swapin(page, memcg, type); @@ -2781,11 +2760,16 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, *memcgp = memcg; ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true); css_put(&memcg->css); + if (ret == -EINTR) + ret = 0; return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); + ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); + if (ret == -EINTR) + ret = 0; + return ret; } static void @@ -3245,7 +3229,7 @@ int mem_cgroup_prepare_migration(struct page *page, *memcgp = memcg; ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false); css_put(&memcg->css);/* drop extra refcnt */ - if (ret || *memcgp == NULL) { + if (ret) { if (PageAnon(page)) { lock_page_cgroup(pc); ClearPageCgroupMigration(pc); @@ -3255,6 +3239,7 @@ int mem_cgroup_prepare_migration(struct page *page, */ mem_cgroup_uncharge_page(page); } + /* we'll need to revisit this error code (we have -EINTR) */ return -ENOMEM; } /* @@ -3674,7 +3659,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, pc = lookup_page_cgroup(page); ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL); - if (ret == -ENOMEM) + if (ret == -ENOMEM || ret == -EINTR) break; if (ret == -EBUSY || ret == -EINVAL) { @@ -5065,9 +5050,9 @@ one_by_one: } ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &memcg, false); - if (ret || !memcg) + if (ret) /* mem_cgroup_clear_mc() will do uncharge later */ - return -ENOMEM; + return ret; mc.precharge++; } return ret; -- cgit v1.2.3