powerpc/powernv/npu: Do a PID GPU TLB flush when invalidating a large address range

commit d0cf9b561ca97d5245bb9e0c4774b7fadd897d67 upstream. The NPU has a limited number of address translation shootdown (ATSD) registers and the GPU has limited bandwidth to process ATSDs. This can result in contention of ATSD registers leading to soft lockups on some threads, particularly when invalidating a large address range in pnv_npu2_mn_invalidate_range(). At some threshold it becomes more efficient to flush the entire GPU TLB for the given MM context (PID) than individually flushing each address in the range. This patch will result in ranges greater than 2MB being converted from 32+ ATSDs into a single ATSD which will flush the TLB for the given PID on each GPU. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alistair Popple <alistair@popple.id.au> Acked-by: Balbir Singh <bsingharora@gmail.com> Tested-by: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Alistair Popple <alistair@popple.id.au> 2018-04-17 19:11:28 +1000
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2018-05-01 12:58:22 -0700
commit: a32a944a60c854ab44cf873dfc1954828f50b046 (patch)
tree: cfaae4fa6077cda36a33d2cd8e6f7f652e3bc278 /arch
parent: f2acc8dc0644194efba8474d3a4dc13752430894 (diff)
1 files changed, 19 insertions, 4 deletions
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 2cb6cbea4b3b..4043109f4051 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -34,6 +34,13 @@
 #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
 
 /*
+ * When an address shootdown range exceeds this threshold we invalidate the
+ * entire TLB on the GPU for the given PID rather than each specific address in
+ * the range.
+ */
+#define ATSD_THRESHOLD (2*1024*1024)
+
+/*
  * Other types of TCE cache invalidation are not functional in the
  * hardware.
  */
@@ -621,11 +628,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address < end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address, false);
+	if (end - start > ATSD_THRESHOLD) {
+		/*
+		 * Just invalidate the entire PID if the address range is too
+		 * large.
+		 */
+		mmio_invalidate(npu_context, 0, 0, true);
+	} else {
+		for (address = start; address < end; address += PAGE_SIZE)
+			mmio_invalidate(npu_context, 1, address, false);
 
-	/* Do the flush only on the final addess == end */
-	mmio_invalidate(npu_context, 1, address, true);
+		/* Do the flush only on the final addess == end */
+		mmio_invalidate(npu_context, 1, address, true);
+	}
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
author	Alistair Popple <alistair@popple.id.au>	2018-04-17 19:11:28 +1000
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2018-05-01 12:58:22 -0700
commit	a32a944a60c854ab44cf873dfc1954828f50b046 (patch)
tree	cfaae4fa6077cda36a33d2cd8e6f7f652e3bc278 /arch
parent	f2acc8dc0644194efba8474d3a4dc13752430894 (diff)