From 6168d0da2b479ce25a4647de194045de1bdd1f1d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:29 -0800 Subject: mm/lru: replace pgdat lru_lock with lruvec lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Rong Chen Cc: Michal Hocko Cc: Vladimir Davydov Cc: Yang Shi Cc: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Daniel Jordan Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'mm/mlock.c') diff --git a/mm/mlock.c b/mm/mlock.c index 7b0e6334be6f..ab164a675c25 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -262,12 +262,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) int nr = pagevec_count(pvec); int delta_munlocked = -nr; struct pagevec pvec_putback; + struct lruvec *lruvec = NULL; int pgrescued = 0; pagevec_init(&pvec_putback); /* Phase 1: page isolation */ - spin_lock_irq(&zone->zone_pgdat->lru_lock); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -277,10 +277,16 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * so we can spare the get_page() here. */ if (TestClearPageLRU(page)) { - struct lruvec *lruvec; + struct lruvec *new_lruvec; + + new_lruvec = mem_cgroup_page_lruvec(page, + page_pgdat(page)); + if (new_lruvec != lruvec) { + if (lruvec) + unlock_page_lruvec_irq(lruvec); + lruvec = lock_page_lruvec_irq(page); + } - lruvec = mem_cgroup_page_lruvec(page, - page_pgdat(page)); del_page_from_lru_list(page, lruvec, page_lru(page)); continue; @@ -299,8 +305,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); - spin_unlock_irq(&zone->zone_pgdat->lru_lock); + if (lruvec) { + __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); + unlock_page_lruvec_irq(lruvec); + } else if (delta_munlocked) { + mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); + } /* Now we can release pins of pages that we are not munlocking */ pagevec_release(&pvec_putback); -- cgit v1.2.3