diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 48 |
1 files changed, 44 insertions, 4 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 464406e8da91..4ae967bcf954 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1179,8 +1179,8 @@ static struct page *new_page(struct page *page, unsigned long start) } else if (PageTransHuge(page)) { struct page *thp; - thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, - address, numa_node_id()); + thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, + HPAGE_PMD_ORDER); if (!thp) return NULL; prep_transhuge_page(thp); @@ -1405,6 +1405,7 @@ static long kernel_mbind(unsigned long start, unsigned long len, int err; unsigned short mode_flags; + start = untagged_addr(start); mode_flags = mode & MPOL_MODE_FLAGS; mode &= ~MPOL_MODE_FLAGS; if (mode >= MPOL_MAX) @@ -1558,6 +1559,8 @@ static int kernel_get_mempolicy(int __user *policy, int uninitialized_var(pval); nodemask_t nodes; + addr = untagged_addr(addr); + if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; @@ -1729,7 +1732,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ -struct mempolicy *get_vma_policy(struct vm_area_struct *vma, +static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = __get_vma_policy(vma, addr); @@ -2078,6 +2081,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual Address of the allocation. Must be inside the VMA. * @node: Which node to prefer for allocation (modulo policy). + * @hugepage: for hugepages try only the preferred node if possible * * This function allocates a page from the kernel page pool and applies * a NUMA policy associated with the VMA or the current process. @@ -2088,7 +2092,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, */ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node) + unsigned long addr, int node, bool hugepage) { struct mempolicy *pol; struct page *page; @@ -2106,6 +2110,42 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, goto out; } + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { + int hpage_node = node; + + /* + * For hugepage allocation and non-interleave policy which + * allows the current node (or other explicitly preferred + * node) we only try to allocate from the current/preferred + * node and don't fall back to other nodes, as the cost of + * remote accesses would likely offset THP benefits. + * + * If the policy is interleave, or does not allow the current + * node in its nodemask, we allocate the standard way. + */ + if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) + hpage_node = pol->v.preferred_node; + + nmask = policy_nodemask(gfp, pol); + if (!nmask || node_isset(hpage_node, *nmask)) { + mpol_cond_put(pol); + page = __alloc_pages_node(hpage_node, + gfp | __GFP_THISNODE, order); + + /* + * If hugepage allocations are configured to always + * synchronous compact or the vma has been madvised + * to prefer hugepage backing, retry allowing remote + * memory as well. + */ + if (!page && (gfp & __GFP_DIRECT_RECLAIM)) + page = __alloc_pages_node(hpage_node, + gfp | __GFP_NORETRY, order); + + goto out; + } + } + nmask = policy_nodemask(gfp, pol); preferred_nid = policy_node(gfp, pol, node); page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); |