authorPaul Mackerras <>2006-06-15 10:45:18 +1000
committerPaul Mackerras <>2006-06-15 10:45:18 +1000
powerpc: Use 64k pages without needing cache-inhibited large pages
Some POWER5+ machines can do 64k hardware pages for normal memory but not for cache-inhibited pages. This patch lets us use 64k hardware pages for most user processes on such machines (assuming the kernel has been configured with CONFIG_PPC_64K_PAGES=y). User processes start out using 64k pages and get switched to 4k pages if they use any non-cacheable mappings. With this, we use 64k pages for the vmalloc region and 4k pages for the imalloc region. If anything creates a non-cacheable mapping in the vmalloc region, the vmalloc region will get switched to 4k pages. I don't know of any driver other than the DRM that would do this, though, and these machines don't have AGP. When a region gets switched from 64k pages to 4k pages, we do not have to clear out all the 64k HPTEs from the hash table immediately. We use the _PAGE_COMBO bit in the Linux PTE to indicate whether the page was hashed in as a 64k page or a set of 4k pages. If hash_page is trying to insert a 4k page for a Linux PTE and it sees that it has already been inserted as a 64k page, it first invalidates the 64k HPTE before inserting the 4k HPTE. The hash invalidation routines also use the _PAGE_COMBO bit, to determine whether to look for a 64k HPTE or a set of 4k HPTEs to remove. With those two changes, we can tolerate a mix of 4k and 64k HPTEs in the hash table, and they will all get removed when the address space is torn down. Signed-off-by: Paul Mackerras <>
@@ -369,6 +369,7 @@ _GLOBAL(__hash_page_4K)
rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
+ oris r30,r30,_PAGE_COMBO@h
/* Write the linux PTE atomically (setting busy) */
stdcx. r30,0,r6
bne- 1b
andi. r0,r31,_PAGE_HASHPTE
li r26,0 /* Default hidx */
beq htab_insert_pte
+ /*
+ * Check if the pte was already inserted into the hash table
+ * as a 64k HW page, and invalidate the 64k HPTE if so.
+ */
+ andis. r0,r31,_PAGE_COMBO@h
+ beq htab_inval_old_hpte
ld r6,STK_PARM(r6)(r1)
ori r26,r6,0x8000 /* Load the hidx mask */
ld r26,0(r26)
@@ -498,6 +507,19 @@ _GLOBAL(htab_call_hpte_remove)
/* Try all again */
b htab_insert_pte
+ /*
+ * Call out to C code to invalidate an 64k HW HPTE that is
+ * useless now that the segment has been switched to 4k pages.
+ */
+ mr r3,r29 /* virtual addr */
+ mr r4,r31 /* PTE.pte */
+ li r5,0 /* PTE.hidx */
+ li r6,MMU_PAGE_64K /* psize */
+ ld r7,STK_PARM(r8)(r1) /* local */
+ bl .flush_hash_page
+ b htab_insert_pte
li r3,0
b htab_bail
@@ -638,6 +660,12 @@ _GLOBAL(__hash_page_64K)
* is changing this PTE anyway and might hash it.
bne- ht64_bail_ok
+ /* Check if PTE has the cache-inhibit bit set */
+ andi. r0,r31,_PAGE_NO_CACHE
+ /* If so, bail out and refault as a 4k page */
+ bne- ht64_bail_ok
/* Prepare new PTE value (turn access RW into DIRTY, then