From 0e39037b3165567660b0e03f67534da5269a0465 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 23 Nov 2018 13:23:25 +0000
Subject: drm/i915: Cache the error string

Currently, we convert the error state into a string every time we read
from sysfs (and sysfs reads in page size (4KiB) chunks). We do try to
window the string and only capture the portion that is being read, but
that means that we must always convert up to the window to find the
start. For a very large error state bordering on EXEC_OBJECT_CAPTURE
abuse, this is noticeable as it degrades to O(N^2)!

As we do not have a convenient hook for sysfs open(), and we would like
to keep the lazy conversion into a string, do the conversion of the
whole string on the first read and keep the string until the error state
is freed.

v2: Don't double advance simple_read_from_buffer
v3: Due to extreme pain of lack of vrealloc, use a scatterlist
v4: Keep the forward iterator loosely cached
v5: Stylistic improvements to reduce patch size

Reported-by: Jason Ekstrand <jason@jlekstrand.net>
Testcase: igt/gem_exec_capture/many*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181123132325.26541-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 335 +++++++++++++++++++++-------------
 1 file changed, 206 insertions(+), 129 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8123bf0e4807..a6885a59568b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -27,11 +27,14 @@
  *
  */
 
-#include <linux/utsname.h>
+#include <linux/ascii85.h>
+#include <linux/nmi.h>
+#include <linux/scatterlist.h>
 #include <linux/stop_machine.h>
+#include <linux/utsname.h>
 #include <linux/zlib.h>
+
 #include <drm/drm_print.h>
-#include <linux/ascii85.h>
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
@@ -77,112 +80,110 @@ static const char *purgeable_flag(int purgeable)
 	return purgeable ? " purgeable" : "";
 }
 
-static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
+static void __sg_set_buf(struct scatterlist *sg,
+			 void *addr, unsigned int len, loff_t it)
 {
-
-	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
-		e->err = -ENOSPC;
-		return false;
-	}
-
-	if (e->bytes == e->size - 1 || e->err)
-		return false;
-
-	return true;
+	sg->page_link = (unsigned long)virt_to_page(addr);
+	sg->offset = offset_in_page(addr);
+	sg->length = len;
+	sg->dma_address = it;
 }
 
-static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
-			      unsigned len)
+static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
 {
-	if (e->pos + len <= e->start) {
-		e->pos += len;
+	if (!len)
 		return false;
-	}
 
-	/* First vsnprintf needs to fit in its entirety for memmove */
-	if (len >= e->size) {
-		e->err = -EIO;
-		return false;
-	}
+	if (e->bytes + len + 1 <= e->size)
+		return true;
 
-	return true;
-}
+	if (e->bytes) {
+		__sg_set_buf(e->cur++, e->buf, e->bytes, e->iter);
+		e->iter += e->bytes;
+		e->buf = NULL;
+		e->bytes = 0;
+	}
 
-static void __i915_error_advance(struct drm_i915_error_state_buf *e,
-				 unsigned len)
-{
-	/* If this is first printf in this window, adjust it so that
-	 * start position matches start of the buffer
-	 */
+	if (e->cur == e->end) {
+		struct scatterlist *sgl;
 
-	if (e->pos < e->start) {
-		const size_t off = e->start - e->pos;
+		sgl = (typeof(sgl))__get_free_page(GFP_KERNEL);
+		if (!sgl) {
+			e->err = -ENOMEM;
+			return false;
+		}
 
-		/* Should not happen but be paranoid */
-		if (off > len || e->bytes) {
-			e->err = -EIO;
-			return;
+		if (e->cur) {
+			e->cur->offset = 0;
+			e->cur->length = 0;
+			e->cur->page_link =
+				(unsigned long)sgl | SG_CHAIN;
+		} else {
+			e->sgl = sgl;
 		}
 
-		memmove(e->buf, e->buf + off, len - off);
-		e->bytes = len - off;
-		e->pos = e->start;
-		return;
+		e->cur = sgl;
+		e->end = sgl + SG_MAX_SINGLE_ALLOC - 1;
 	}
 
-	e->bytes += len;
-	e->pos += len;
+	e->size = ALIGN(len + 1, SZ_64K);
+	e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (!e->buf) {
+		e->size = PAGE_ALIGN(len + 1);
+		e->buf = kmalloc(e->size, GFP_KERNEL);
+	}
+	if (!e->buf) {
+		e->err = -ENOMEM;
+		return false;
+	}
+
+	return true;
 }
 
 __printf(2, 0)
 static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
-			       const char *f, va_list args)
+			       const char *fmt, va_list args)
 {
-	unsigned len;
+	va_list ap;
+	int len;
 
-	if (!__i915_error_ok(e))
+	if (e->err)
 		return;
 
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		va_list tmp;
-
-		va_copy(tmp, args);
-		len = vsnprintf(NULL, 0, f, tmp);
-		va_end(tmp);
-
-		if (!__i915_error_seek(e, len))
-			return;
+	va_copy(ap, args);
+	len = vsnprintf(NULL, 0, fmt, ap);
+	va_end(ap);
+	if (len <= 0) {
+		e->err = len;
+		return;
 	}
 
-	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
+	if (!__i915_error_grow(e, len))
+		return;
 
-	__i915_error_advance(e, len);
+	GEM_BUG_ON(e->bytes >= e->size);
+	len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args);
+	if (len < 0) {
+		e->err = len;
+		return;
+	}
+	e->bytes += len;
 }
 
-static void i915_error_puts(struct drm_i915_error_state_buf *e,
-			    const char *str)
+static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str)
 {
 	unsigned len;
 
-	if (!__i915_error_ok(e))
+	if (e->err || !str)
 		return;
 
 	len = strlen(str);
+	if (!__i915_error_grow(e, len))
+		return;
 
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		if (!__i915_error_seek(e, len))
-			return;
-	}
-
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
+	GEM_BUG_ON(e->bytes + len > e->size);
 	memcpy(e->buf + e->bytes, str, len);
-
-	__i915_error_advance(e, len);
+	e->bytes += len;
 }
 
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
@@ -268,6 +269,8 @@ static int compress_page(struct compress *c,
 
 		if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
 			return -EIO;
+
+		touch_nmi_watchdog();
 	} while (zstream->avail_in);
 
 	/* Fallback to uncompressed if we increase size? */
@@ -635,21 +638,29 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
 	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
 }
 
-int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
-			    const struct i915_gpu_state *error)
+static void err_free_sgl(struct scatterlist *sgl)
 {
-	struct drm_i915_private *dev_priv = m->i915;
-	struct drm_i915_error_object *obj;
-	struct timespec64 ts;
-	int i, j;
+	while (sgl) {
+		struct scatterlist *sg;
 
-	if (!error) {
-		err_printf(m, "No error state collected\n");
-		return 0;
+		for (sg = sgl; !sg_is_chain(sg); sg++) {
+			kfree(sg_virt(sg));
+			if (sg_is_last(sg))
+				break;
+		}
+
+		sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg);
+		free_page((unsigned long)sgl);
+		sgl = sg;
 	}
+}
 
-	if (IS_ERR(error))
-		return PTR_ERR(error);
+static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
+			       struct i915_gpu_state *error)
+{
+	struct drm_i915_error_object *obj;
+	struct timespec64 ts;
+	int i, j;
 
 	if (*error->error_msg)
 		err_printf(m, "%s\n", error->error_msg);
@@ -683,12 +694,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
-	err_print_pciid(m, error->i915);
+	err_print_pciid(m, m->i915);
 
 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
 
-	if (HAS_CSR(dev_priv)) {
-		struct intel_csr *csr = &dev_priv->csr;
+	if (HAS_CSR(m->i915)) {
+		struct intel_csr *csr = &m->i915->csr;
 
 		err_printf(m, "DMC loaded: %s\n",
 			   yesno(csr->dmc_payload != NULL));
@@ -708,22 +719,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
 	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
+	err_printf(m, "Missed interrupts: 0x%08lx\n",
+		   m->i915->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < error->nfence; i++)
 		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(m->i915) >= 6) {
 		err_printf(m, "ERROR: 0x%08x\n", error->error);
 
-		if (INTEL_GEN(dev_priv) >= 8)
+		if (INTEL_GEN(m->i915) >= 8)
 			err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
 				   error->fault_data1, error->fault_data0);
 
 		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
 	}
 
-	if (IS_GEN7(dev_priv))
+	if (IS_GEN7(m->i915))
 		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
@@ -745,7 +757,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 			len += scnprintf(buf + len, sizeof(buf), "%s%s",
 					 first ? "" : ", ",
-					 dev_priv->engine[j]->name);
+					 m->i915->engine[j]->name);
 			first = 0;
 		}
 		scnprintf(buf + len, sizeof(buf), ")");
@@ -763,7 +775,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 		obj = ee->batchbuffer;
 		if (obj) {
-			err_puts(m, dev_priv->engine[i]->name);
+			err_puts(m, m->i915->engine[i]->name);
 			if (ee->context.pid)
 				err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
 					   ee->context.comm,
@@ -775,16 +787,16 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, dev_priv->engine[i], NULL, obj);
+			print_error_obj(m, m->i915->engine[i], NULL, obj);
 		}
 
 		for (j = 0; j < ee->user_bo_count; j++)
-			print_error_obj(m, dev_priv->engine[i],
+			print_error_obj(m, m->i915->engine[i],
 					"user", ee->user_bo[j]);
 
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
-				   dev_priv->engine[i]->name,
+				   m->i915->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
 				error_print_request(m, " ",
@@ -794,10 +806,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 		if (IS_ERR(ee->waiters)) {
 			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-				   dev_priv->engine[i]->name);
+				   m->i915->engine[i]->name);
 		} else if (ee->num_waiters) {
 			err_printf(m, "%s --- %d waiters\n",
-				   dev_priv->engine[i]->name,
+				   m->i915->engine[i]->name,
 				   ee->num_waiters);
 			for (j = 0; j < ee->num_waiters; j++) {
 				err_printf(m, " seqno 0x%08x for %s [%d]\n",
@@ -807,22 +819,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"ringbuffer", ee->ringbuffer);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"HW Status", ee->hws_page);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"HW context", ee->ctx);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"WA context", ee->wa_ctx);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"WA batchbuffer", ee->wa_batchbuffer);
 
-		print_error_obj(m, dev_priv->engine[i],
+		print_error_obj(m, m->i915->engine[i],
 				"NULL context", ee->default_state);
 	}
 
@@ -835,43 +847,107 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info, &error->driver_caps);
 	err_print_params(m, &error->params);
 	err_print_uc(m, &error->uc);
+}
+
+static int err_print_to_sgl(struct i915_gpu_state *error)
+{
+	struct drm_i915_error_state_buf m;
+
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
+	if (READ_ONCE(error->sgl))
+		return 0;
+
+	memset(&m, 0, sizeof(m));
+	m.i915 = error->i915;
+
+	__err_print_to_sgl(&m, error);
+
+	if (m.buf) {
+		__sg_set_buf(m.cur++, m.buf, m.bytes, m.iter);
+		m.bytes = 0;
+		m.buf = NULL;
+	}
+	if (m.cur) {
+		GEM_BUG_ON(m.end < m.cur);
+		sg_mark_end(m.cur - 1);
+	}
+	GEM_BUG_ON(m.sgl && !m.cur);
+
+	if (m.err) {
+		err_free_sgl(m.sgl);
+		return m.err;
+	}
 
-	if (m->bytes == 0 && m->err)
-		return m->err;
+	if (cmpxchg(&error->sgl, NULL, m.sgl))
+		err_free_sgl(m.sgl);
 
 	return 0;
 }
 
-int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
-			      struct drm_i915_private *i915,
-			      size_t count, loff_t pos)
+ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
+				      char *buf, loff_t off, size_t rem)
 {
-	memset(ebuf, 0, sizeof(*ebuf));
-	ebuf->i915 = i915;
+	struct scatterlist *sg;
+	size_t count;
+	loff_t pos;
+	int err;
 
-	/* We need to have enough room to store any i915_error_state printf
-	 * so that we can move it to start position.
-	 */
-	ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
-	ebuf->buf = kmalloc(ebuf->size,
-				GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (!error || !rem)
+		return 0;
 
-	if (ebuf->buf == NULL) {
-		ebuf->size = PAGE_SIZE;
-		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
-	}
+	err = err_print_to_sgl(error);
+	if (err)
+		return err;
 
-	if (ebuf->buf == NULL) {
-		ebuf->size = 128;
-		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
-	}
+	sg = READ_ONCE(error->fit);
+	if (!sg || off < sg->dma_address)
+		sg = error->sgl;
+	if (!sg)
+		return 0;
 
-	if (ebuf->buf == NULL)
-		return -ENOMEM;
+	pos = sg->dma_address;
+	count = 0;
+	do {
+		size_t len, start;
+
+		if (sg_is_chain(sg)) {
+			sg = sg_chain_ptr(sg);
+			GEM_BUG_ON(sg_is_chain(sg));
+		}
+
+		len = sg->length;
+		if (pos + len <= off) {
+			pos += len;
+			continue;
+		}
 
-	ebuf->start = pos;
+		start = sg->offset;
+		if (pos < off) {
+			GEM_BUG_ON(off - pos > len);
+			len -= off - pos;
+			start += off - pos;
+			pos = off;
+		}
 
-	return 0;
+		len = min(len, rem);
+		GEM_BUG_ON(!len || len > sg->length);
+
+		memcpy(buf, page_address(sg_page(sg)) + start, len);
+
+		count += len;
+		pos += len;
+
+		buf += len;
+		rem -= len;
+		if (!rem) {
+			WRITE_ONCE(error->fit, sg);
+			break;
+		}
+	} while (!sg_is_last(sg++));
+
+	return count;
 }
 
 static void i915_error_object_free(struct drm_i915_error_object *obj)
@@ -944,6 +1020,7 @@ void __i915_gpu_state_free(struct kref *error_ref)
 	cleanup_params(error);
 	cleanup_uc_state(error);
 
+	err_free_sgl(error->sgl);
 	kfree(error);
 }
 
-- 
cgit v1.2.3


From 5179749925933575a67f9d8f16d0cc204f98a29f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 4 Dec 2018 14:15:16 +0000
Subject: drm/i915: Allocate a common scratch page

Currently we allocate a scratch page for each engine, but since we only
ever write into it for post-sync operations, it is not exposed to
userspace nor do we care for coherency. As we then do not care about its
contents, we can use one page for all, reducing our allocations and
avoid complications by not assuming per-engine isolation.

For later use, it simplifies engine initialisation (by removing the
allocation that required struct_mutex!) and means that we can always rely
on there being a scratch page.

v2: Check that we allocated a large enough scratch for I830 w/a

Fixes: 06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.18.20+
---
 drivers/gpu/drm/i915/i915_drv.h         |  7 +++++
 drivers/gpu/drm/i915/i915_gem.c         | 50 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c  | 42 ---------------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 17 ++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ----
 7 files changed, 74 insertions(+), 86 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 23a3dc6f3907..c5f01964f0fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1983,6 +1983,8 @@ struct drm_i915_private {
 		struct delayed_work idle_work;
 
 		ktime_t last_init_time;
+
+		struct i915_vma *scratch;
 	} gt;
 
 	/* perform PHY state sanity checks? */
@@ -3713,4 +3715,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 		return I915_HWS_CSB_WRITE_INDEX;
 }
 
+static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
+{
+	return i915_ggtt_offset(i915->gt.scratch);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 35ecfea4e903..d36a9755ad91 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5498,6 +5498,44 @@ err_active:
 	goto out_ctx;
 }
 
+static int
+i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	obj = i915_gem_object_create_stolen(i915, size);
+	if (!obj)
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Failed to allocate scratch page\n");
+		return PTR_ERR(obj);
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (ret)
+		goto err_unref;
+
+	i915->gt.scratch = vma;
+	return 0;
+
+err_unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+static void i915_gem_fini_scratch(struct drm_i915_private *i915)
+{
+	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -5544,12 +5582,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
-	ret = i915_gem_contexts_init(dev_priv);
+	ret = i915_gem_init_scratch(dev_priv,
+				    IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_ggtt;
 	}
 
+	ret = i915_gem_contexts_init(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_scratch;
+	}
+
 	ret = intel_engines_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
@@ -5622,6 +5667,8 @@ err_pm:
 err_context:
 	if (ret != -EIO)
 		i915_gem_contexts_fini(dev_priv);
+err_scratch:
+	i915_gem_fini_scratch(dev_priv);
 err_ggtt:
 err_unlock:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -5673,6 +5720,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
+	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a6885a59568b..07465123c166 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			if (HAS_BROKEN_CS_TLB(i915))
 				ee->wa_batchbuffer =
 					i915_error_object_create(i915,
-								 engine->scratch);
+								 i915->gt.scratch);
 			request_record_user_bo(request, ee);
 
 			ee->ctx =
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 6b427bc52f78..af2873403009 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_cmd_parser(engine);
 }
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size)
-{
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	int ret;
-
-	WARN_ON(engine->scratch);
-
-	obj = i915_gem_object_create_stolen(engine->i915, size);
-	if (!obj)
-		obj = i915_gem_object_create_internal(engine->i915, size);
-	if (IS_ERR(obj)) {
-		DRM_ERROR("Failed to allocate scratch page\n");
-		return PTR_ERR(obj);
-	}
-
-	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unref;
-	}
-
-	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (ret)
-		goto err_unref;
-
-	engine->scratch = vma;
-	return 0;
-
-err_unref:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
-{
-	i915_vma_unpin_and_release(&engine->scratch, 0);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
@@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
-	intel_engine_cleanup_scratch(engine);
-
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87227fd9ae5f..d7fa301b5ec7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1288,9 +1288,10 @@ static int execlists_request_alloc(struct i915_request *request)
 static u32 *
 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
+	/* NB no one else is allowed to scribble over scratch + 256! */
 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	*batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1304,7 +1305,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 
 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	return batch;
@@ -1341,7 +1342,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 				       PIPE_CONTROL_GLOBAL_GTT_IVB |
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_QW_WRITE,
-				       i915_ggtt_offset(engine->scratch) +
+				       i915_scratch_offset(engine->i915) +
 				       2 * CACHELINE_BYTES);
 
 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1973,7 +1974,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 {
 	struct intel_engine_cs *engine = request->engine;
 	u32 scratch_addr =
-		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+		i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
 	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 *cs, flags = 0;
 	int len;
@@ -2292,10 +2293,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
-	if (ret)
-		goto err_cleanup_common;
-
 	ret = intel_init_workaround_bb(engine);
 	if (ret) {
 		/*
@@ -2311,10 +2308,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	intel_engine_init_workarounds(engine);
 
 	return 0;
-
-err_cleanup_common:
-	intel_engine_cleanup_common(engine);
-	return ret;
 }
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f88df5bff09..c5eb26a7ee79 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 	 */
 	if (mode & EMIT_INVALIDATE) {
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 
@@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 			*cs++ = MI_FLUSH;
 
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 	}
@@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 static int
 intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs;
 
 	cs = intel_ring_begin(rq, 6);
@@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 static int
 gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 	int ret;
 
@@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
 static int
 gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 
 	/*
@@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq,
 }
 
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT (256*1024)
+#define I830_BATCH_LIMIT SZ_256K
 #define I830_TLB_ENTRIES (2)
 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
@@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq,
 		   u64 offset, u32 len,
 		   unsigned int dispatch_flags)
 {
-	u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
+	u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+
+	GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct i915_timeline *timeline;
 	struct intel_ring *ring;
-	unsigned int size;
 	int err;
 
 	intel_engine_setup_common(engine);
@@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
-	size = PAGE_SIZE;
-	if (HAS_BROKEN_CS_TLB(engine->i915))
-		size = I830_WA_SIZE;
-	err = intel_engine_create_scratch(engine, size);
-	if (err)
-		goto err_unpin;
-
 	err = intel_engine_init_common(engine);
 	if (err)
-		goto err_scratch;
+		goto err_unpin;
 
 	return 0;
 
-err_scratch:
-	intel_engine_cleanup_scratch(engine);
 err_unpin:
 	intel_ring_unpin(ring);
 err_ring:
@@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq)
 	/* Stall until the page table load is complete */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
-	*cs++ = i915_ggtt_offset(engine->scratch);
+	*cs++ = i915_scratch_offset(rq->i915);
 	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
@@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			/* Insert a delay before the next switch! */
 			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = i915_ggtt_offset(engine->scratch);
+			*cs++ = i915_scratch_offset(rq->i915);
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 927bb21a2b0b..72edaa7ff411 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -439,7 +439,6 @@ struct intel_engine_cs {
 	struct i915_wa_list ctx_wa_list;
 	struct i915_wa_list wa_list;
 	struct i915_wa_list whitelist;
-	struct i915_vma *scratch;
 
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
@@ -896,10 +895,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size);
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
-
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-- 
cgit v1.2.3