From 34bbd704051c9d053d69e90569a3a2365f4c7b50 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 28 Feb 2007 20:13:49 -0800 Subject: [PATCH] adapt page_lock_anon_vma() to PREEMPT_RCU page_lock_anon_vma() uses spin_lock() to block RCU. This doesn't work with PREEMPT_RCU, we have to do rcu_read_lock() explicitely. Otherwise, it is theoretically possible that slab returns anon_vma's memory to the system before we do spin_unlock(&anon_vma->lock). [ Hugh points out that this only matters for PREEMPT_RCU, which isn't merged yet, and may never be. Regardless, this patch is conceptually the right thing to do, even if it doesn't matter at this point. - Linus ] Signed-off-by: Oleg Nesterov Cc: Paul McKenney Cc: Nick Piggin Cc: Christoph Lameter Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 669acb22b572..22ed3f71a674 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -183,7 +183,7 @@ void __init anon_vma_init(void) */ static struct anon_vma *page_lock_anon_vma(struct page *page) { - struct anon_vma *anon_vma = NULL; + struct anon_vma *anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -195,9 +195,16 @@ static struct anon_vma *page_lock_anon_vma(struct page *page) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); spin_lock(&anon_vma->lock); + return anon_vma; out: rcu_read_unlock(); - return anon_vma; + return NULL; +} + +static void page_unlock_anon_vma(struct anon_vma *anon_vma) +{ + spin_unlock(&anon_vma->lock); + rcu_read_unlock(); } /* @@ -333,7 +340,8 @@ static int page_referenced_anon(struct page *page) if (!mapcount) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return referenced; } @@ -802,7 +810,8 @@ static int try_to_unmap_anon(struct page *page, int migration) if (ret == SWAP_FAIL || !page_mapped(page)) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return ret; } -- cgit v1.2.3 From 6e1beb3c22496f6e1f1feba8ae74da16f131684c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 4 Apr 2007 14:37:10 +0200 Subject: [S390] page_mkclean data corruption. The git commit c2fda5fed81eea077363b285b66eafce20dfd45a which added the page_test_and_clear_dirty call to page_mkclean and the git commit 7658cc289288b8ae7dd2c2224549a048431222b3 which fixes the "nasty and subtle race in shared mmap'ed page writeback" problem in clear_page_dirty_for_io cause data corruption on s390. The effect of the two changes is that for every call to clear_page_dirty_for_io a page_test_and_clear_dirty is done. If the per page dirty bit is set set_page_dirty is called. Strangly clear_page_dirty_for_io is called for not-uptodate pages, e.g. over this call-chain: [<000000000007c0f2>] clear_page_dirty_for_io+0x12a/0x130 [<000000000007c494>] generic_writepages+0x258/0x3e0 [<000000000007c692>] do_writepages+0x76/0x7c [<00000000000c7a26>] __writeback_single_inode+0xba/0x3e4 [<00000000000c831a>] sync_sb_inodes+0x23e/0x398 [<00000000000c8802>] writeback_inodes+0x12e/0x140 [<000000000007b9ee>] wb_kupdate+0xd2/0x178 [<000000000007cca2>] pdflush+0x162/0x23c The bad news now is that page_test_and_clear_dirty might claim that a not-uptodate page is dirty since SetPageUptodate which resets the per page dirty bit has not yet been called. The page writeback that follows clobbers the data on disk. The simplest solution to this problem is to move the call to page_test_and_clear_dirty under the "if (page_mapped(page))". If a file backed page is mapped it is uptodate. Signed-off-by: Martin Schwidefsky --- mm/rmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 22ed3f71a674..b82146e6dfc9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -498,9 +498,9 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) ret = page_mkclean_file(mapping, page); + if (page_test_and_clear_dirty(page)) + ret = 1; } - if (page_test_and_clear_dirty(page)) - ret = 1; return ret; } -- cgit v1.2.3 From 6c210482ae4a9a5bb9377ad250feaacec3faa3cd Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 27 Apr 2007 16:01:57 +0200 Subject: [S390] split page_test_and_clear_dirty. The page_test_and_clear_dirty primitive really consists of two operations, page_test_dirty and the page_clear_dirty. The combination of the two is not an atomic operation, so it makes more sense to have two separate operations instead of one. In addition to the improved readability of the s390 version of SetPageUptodate, it now avoids the page_test_dirty operation which is an insert-storage-key-extended (iske) instruction which is an expensive operation. Signed-off-by: Martin Schwidefsky --- include/asm-generic/pgtable.h | 11 +++++++++-- include/asm-s390/pgtable.h | 15 ++++++++------- include/linux/page-flags.h | 2 +- mm/rmap.c | 8 ++++++-- 4 files changed, 24 insertions(+), 12 deletions(-) (limited to 'mm/rmap.c') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 6d7e279b1490..dc8f99ee305f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -139,8 +139,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define pte_same(A,B) (pte_val(A) == pte_val(B)) #endif -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY -#define page_test_and_clear_dirty(page) (0) +#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY +#define page_test_dirty(page) (0) +#endif + +#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY +#define page_clear_dirty(page) do { } while (0) +#endif + +#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY #define pte_maybe_dirty(pte) pte_dirty(pte) #else #define pte_maybe_dirty(pte) (1) diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 13c16546eff5..8fe8d42e64c3 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -753,14 +753,14 @@ ptep_establish(struct vm_area_struct *vma, * should therefore only be called if it is not mapped in any * address space. */ -static inline int page_test_and_clear_dirty(struct page *page) +static inline int page_test_dirty(struct page *page) { - unsigned long physpage = page_to_phys(page); - int skey = page_get_storage_key(physpage); + return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; +} - if (skey & _PAGE_CHANGED) - page_set_storage_key(physpage, skey & ~_PAGE_CHANGED); - return skey & _PAGE_CHANGED; +static inline void page_clear_dirty(struct page *page) +{ + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); } /* @@ -953,7 +953,8 @@ extern void memmap_init(unsigned long, int, unsigned long, unsigned long); #define __HAVE_ARCH_PTEP_CLEAR_FLUSH #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PAGE_TEST_DIRTY +#define __HAVE_ARCH_PAGE_CLEAR_DIRTY #define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG #include diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 9cd0d0eaf523..96326594e55d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -133,7 +133,7 @@ static inline void SetPageUptodate(struct page *page) { if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_test_and_clear_dirty(page); + page_clear_dirty(page); } #else #define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) diff --git a/mm/rmap.c b/mm/rmap.c index b82146e6dfc9..59da5b734c80 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -498,8 +498,10 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) ret = page_mkclean_file(mapping, page); - if (page_test_and_clear_dirty(page)) + if (page_test_dirty(page)) { + page_clear_dirty(page); ret = 1; + } } return ret; @@ -605,8 +607,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - if (page_test_and_clear_dirty(page)) + if (page_test_dirty(page)) { + page_clear_dirty(page); set_page_dirty(page); + } __dec_zone_page_state(page, PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); } -- cgit v1.2.3