From 74f6e183913b5dc90a004cafa84159ddb61cd0f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Sep 2018 11:47:07 +0100 Subject: drm/i915: Convert to BITS_PER_TYPE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 9144d75e22ca ("include/linux/bitops.h: introduce BITS_PER_TYPE"), we made BITS_PER_TYPE available to all and now we can use the macro to replace some open-coded computation of sizeof(T) * BITS_PER_BYTE. Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180926104707.17410-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index db9688d14912..717f4321e987 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5959,7 +5959,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, * the bits. */ BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > - sizeof(atomic_t) * BITS_PER_BYTE); + BITS_PER_TYPE(atomic_t)); if (old) { WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); -- cgit v1.2.3 From 6edafc4eb3e4ae26b1b5dbc0cabfc82d96d6b9bb Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 18 Sep 2018 13:47:10 -0700 Subject: drm/i915: Unset reset pch handshake when PCH is not present in one place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now RESET_PCH_HANDSHAKE_ENABLE is enabled all the times inside of intel_power_domains_init_hw() and if PCH is NOP it is unsed in i915_gem_init_hw(). So making skl_pch_reset_handshake() handle both cases and calling it for the missing gens in intel_power_domains_init_hw(). Ivybridge have a different register and bits but with the same objective so moving it too. v2(Rodrigo): - handling IVYBRIDGE case inside intel_pch_reset_handshake() v4(Rodrigo and Ville): - moving the enable/disable decision to callers Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180918204714.27306-2-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 12 ------------ drivers/gpu/drm/i915/intel_runtime_pm.c | 28 ++++++++++++++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 717f4321e987..627b1c8a7ea3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5301,18 +5301,6 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); - if (HAS_PCH_NOP(dev_priv)) { - if (IS_IVYBRIDGE(dev_priv)) { - u32 temp = I915_READ(GEN7_MSG_CTL); - temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); - I915_WRITE(GEN7_MSG_CTL, temp); - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); - temp &= ~RESET_PCH_HANDSHAKE_ENABLE; - I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); - } - } - intel_gt_workarounds_apply(dev_priv); i915_gem_init_swizzling(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d051b0d440c4..3cf8533e0834 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3243,14 +3243,25 @@ static void icl_mbus_init(struct drm_i915_private *dev_priv) static void intel_pch_reset_handshake(struct drm_i915_private *dev_priv, bool enable) { - u32 val = I915_READ(HSW_NDE_RSTWRN_OPT); + i915_reg_t reg; + u32 reset_bits, val; + + if (IS_IVYBRIDGE(dev_priv)) { + reg = GEN7_MSG_CTL; + reset_bits = WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK; + } else { + reg = HSW_NDE_RSTWRN_OPT; + reset_bits = RESET_PCH_HANDSHAKE_ENABLE; + } + + val = I915_READ(reg); if (enable) - val |= RESET_PCH_HANDSHAKE_ENABLE; + val |= reset_bits; else - val &= ~RESET_PCH_HANDSHAKE_ENABLE; + val &= ~reset_bits; - I915_WRITE(HSW_NDE_RSTWRN_OPT, val); + I915_WRITE(reg, val); } static void skl_display_core_init(struct drm_i915_private *dev_priv, @@ -3262,7 +3273,7 @@ static void skl_display_core_init(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); /* enable PCH reset handshake */ - intel_pch_reset_handshake(dev_priv, true); + intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); /* enable PG1 and Misc I/O */ mutex_lock(&power_domains->lock); @@ -3448,7 +3459,7 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); /* 1. Enable PCH Reset Handshake */ - intel_pch_reset_handshake(dev_priv, true); + intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); /* 2. Enable Comp */ val = I915_READ(CHICKEN_MISC_2); @@ -3531,7 +3542,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); /* 1. Enable PCH reset handshake. */ - intel_pch_reset_handshake(dev_priv, true); + intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); for (port = PORT_A; port <= PORT_B; port++) { /* 2. Enable DDI combo PHY comp. */ @@ -3763,7 +3774,8 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) mutex_lock(&power_domains->lock); vlv_cmnlane_wa(dev_priv); mutex_unlock(&power_domains->lock); - } + } else if (IS_IVYBRIDGE(dev_priv) || INTEL_GEN(dev_priv) >= 7) + intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); /* * Keep all power wells enabled for any dependent HW access during -- cgit v1.2.3 From f8e57863f81f962a1837d6a17825752de5bc23f7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 26 Sep 2018 09:03:53 +0100 Subject: drm/i915: Trim partial view sg lists Partial views are small but there can be many of them, and since the sg list space for them is allocated pessimistically, we can save some slab by trimming the unused tail entries. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180926080353.20867-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9e185f0a822a..ab52657cc3d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2325,6 +2325,8 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) +bool i915_sg_trim(struct sg_table *orig_st); + static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) { unsigned int page_sizes; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 627b1c8a7ea3..28e943ee8b5e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2491,7 +2491,7 @@ unlock: mutex_unlock(&obj->mm.lock); } -static bool i915_sg_trim(struct sg_table *orig_st) +bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f6c7ab413081..1ec721c20581 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3835,6 +3835,8 @@ intel_partial_pages(const struct i915_ggtt_view *view, count -= len >> PAGE_SHIFT; if (count == 0) { sg_mark_end(sg); + i915_sg_trim(st); /* Drop any unused tail entries. */ + return st; } -- cgit v1.2.3 From e9eaf82d97a2b05460ff5ef6a3e07446f7d049fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2018 15:47:55 +0100 Subject: drm/i915: Priority boost for waiting clients Latency is in the eye of the beholder. In the case where a client stops and waits for the gpu, give that request chain a small priority boost (not so that it overtakes higher priority clients, to preserve the external ordering) so that ideally the wait completes earlier. v2: Tvrtko recommends to keep the boost-from-user-stall as small as possible and to allow new client flows to be preferred for interactivity over stalls. Testcase: igt/gem_sync/switch-default Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Dmitry Rogozhkin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- drivers/gpu/drm/i915/i915_request.c | 2 ++ drivers/gpu/drm/i915/i915_request.h | 5 +++-- drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_scheduler.h | 7 +++++-- 5 files changed, 42 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 28e943ee8b5e..7d45e71100bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | (write_domain ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); @@ -3751,7 +3752,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) start = ktime_get(); ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + I915_WAIT_ALL, to_wait_timeout(args->timeout_ns), to_rps_client(file)); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d73ad490a261..abd4dacbab8e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq, add_wait_queue(errq, &reset); intel_wait_init(&wait); + if (flags & I915_WAIT_PRIORITY) + i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); restart: do { diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5f7361e0fca6..90e9d170a0cd 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -277,8 +277,9 @@ long i915_request_wait(struct i915_request *rq, __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(3) +#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ +#define I915_WAIT_FOR_IDLE_BOOST BIT(4) static inline bool intel_engine_has_started(struct intel_engine_cs *engine, u32 seqno); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index de9a2ba7c3bc..340faea6c08a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -239,7 +239,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +static void __i915_schedule(struct i915_request *rq, + const struct i915_sched_attr *attr) { struct list_head *uninitialized_var(pl); struct intel_engine_cs *engine, *last; @@ -248,6 +249,8 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) const int prio = attr->priority; LIST_HEAD(dfs); + /* Needed in order to use the temporary link inside i915_dependency */ + lockdep_assert_held(&schedule_lock); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); if (i915_request_completed(rq)) @@ -256,9 +259,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) if (prio <= READ_ONCE(rq->sched.attr.priority)) return; - /* Needed in order to use the temporary link inside i915_dependency */ - spin_lock(&schedule_lock); - stack.signaler = &rq->sched; list_add(&stack.dfs_link, &dfs); @@ -312,7 +312,7 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) rq->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) - goto out_unlock; + return; __list_del_entry(&stack.dfs_link); } @@ -371,7 +371,29 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) } spin_unlock_irq(&engine->timeline.lock); +} -out_unlock: +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +{ + spin_lock(&schedule_lock); + __i915_schedule(rq, attr); spin_unlock(&schedule_lock); } + +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) +{ + struct i915_sched_attr attr; + + GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); + + if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + return; + + spin_lock_bh(&schedule_lock); + + attr = rq->sched.attr; + attr.priority |= bump; + __i915_schedule(rq, &attr); + + spin_unlock_bh(&schedule_lock); +} diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 68d84a45ad7f..dbe9cb7ecd82 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -24,13 +24,14 @@ enum { I915_PRIORITY_INVALID = INT_MIN }; -#define I915_USER_PRIORITY_SHIFT 1 +#define I915_USER_PRIORITY_SHIFT 2 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) -#define I915_PRIORITY_NEWCLIENT ((u8)BIT(0)) +#define I915_PRIORITY_WAIT ((u8)BIT(0)) +#define I915_PRIORITY_NEWCLIENT ((u8)BIT(1)) struct i915_sched_attr { /** @@ -99,6 +100,8 @@ void i915_sched_node_fini(struct drm_i915_private *i915, void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); +void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); + struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); -- cgit v1.2.3 From a5e856a5348f6cd50889d125c40bbeec7328e466 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Oct 2018 15:02:28 +0100 Subject: drm/i915: Large page offsets for pread/pwrite Handle integer overflow when computing the sub-page length for shmem backed pread/pwrite. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181012140228.29783-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7d45e71100bc..93d09282710d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1127,11 +1127,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pread(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, @@ -1575,11 +1571,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pwrite(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, -- cgit v1.2.3 From e6db7f4d7c5005258b862a5ed1732756fccb6bfa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 5 Nov 2018 17:06:40 +0000 Subject: drm/i915: Break long iterations for get/put shmemfs pages As we may have to iterate a few thousand elements to acquire and release the shmemfs backing storage for a GPU object, we need to break up the long loop with cond_resched() to retain a modicum of low latency for other processes. Testcase: igt/benchmarks/gem_syslatency Signed-off-by: Chris Wilson Cc: Kuo-Hsin Yang Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20181105170640.26905-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 93d09282710d..347b3836c809 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2404,6 +2404,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, mark_page_accessed(page); put_page(page); + cond_resched(); } obj->mm.dirty = false; @@ -2574,6 +2575,7 @@ rebuild_st: gfp_t gfp = noreclaim; do { + cond_resched(); page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (likely(!IS_ERR(page))) break; @@ -2584,7 +2586,6 @@ rebuild_st: } i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); - cond_resched(); /* * We've tried hard to allocate the memory by reaping -- cgit v1.2.3 From 64e3d12f769d60eaee6d2e53a9b7f0b3814f32ed Mon Sep 17 00:00:00 2001 From: Kuo-Hsin Yang Date: Tue, 6 Nov 2018 13:23:24 +0000 Subject: mm, drm/i915: mark pinned shmemfs pages as unevictable The i915 driver uses shmemfs to allocate backing storage for gem objects. These shmemfs pages can be pinned (increased ref count) by shmem_read_mapping_page_gfp(). When a lot of pages are pinned, vmscan wastes a lot of time scanning these pinned pages. In some extreme case, all pages in the inactive anon lru are pinned, and only the inactive anon lru is scanned due to inactive_ratio, the system cannot swap and invokes the oom-killer. Mark these pinned pages as unevictable to speed up vmscan. Export pagevec API check_move_unevictable_pages(). This patch was inspired by Chris Wilson's change [1]. [1]: https://patchwork.kernel.org/patch/9768741/ Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Andrew Morton Cc: Dave Hansen Signed-off-by: Kuo-Hsin Yang Acked-by: Michal Hocko # mm part Reviewed-by: Chris Wilson Acked-by: Dave Hansen Acked-by: Andrew Morton Link: https://patchwork.freedesktop.org/patch/msgid/20181106132324.17390-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- Documentation/vm/unevictable-lru.rst | 6 +++++- drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++++++++++++++++---- include/linux/swap.h | 4 +++- mm/shmem.c | 2 +- mm/vmscan.c | 22 +++++++++++----------- 5 files changed, 49 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst index fdd84cb8d511..b8e29f977f2d 100644 --- a/Documentation/vm/unevictable-lru.rst +++ b/Documentation/vm/unevictable-lru.rst @@ -143,7 +143,7 @@ using a number of wrapper functions: Query the address space, and return true if it is completely unevictable. -These are currently used in two places in the kernel: +These are currently used in three places in the kernel: (1) By ramfs to mark the address spaces of its inodes when they are created, and this mark remains for the life of the inode. @@ -154,6 +154,10 @@ These are currently used in two places in the kernel: swapped out; the application must touch the pages manually if it wants to ensure they're in memory. + (3) By the i915 driver to mark pinned address space until it's unpinned. The + amount of unevictable memory marked by i915 driver is roughly the bounded + object size in debugfs/dri/0/i915_gem_objects. + Detecting Unevictable Pages --------------------------- diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 347b3836c809..5b80b0c14aed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2382,11 +2382,23 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) invalidate_mapping_pages(mapping, 0, (loff_t)-1); } +/* + * Move pages to appropriate lru and release the pagevec, decrementing the + * ref count of those pages. + */ +static void check_release_pagevec(struct pagevec *pvec) +{ + check_move_unevictable_pages(pvec); + __pagevec_release(pvec); + cond_resched(); +} + static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; + struct pagevec pvec; struct page *page; __i915_gem_object_release_shmem(obj, pages, true); @@ -2396,6 +2408,9 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); + mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); + + pagevec_init(&pvec); for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -2403,9 +2418,11 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); - put_page(page); - cond_resched(); + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); obj->mm.dirty = false; sg_free_table(pages); @@ -2526,6 +2543,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; + struct pagevec pvec; gfp_t noreclaim; int ret; @@ -2561,6 +2579,7 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; + mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; @@ -2675,8 +2694,14 @@ rebuild_st: err_sg: sg_mark_end(sg); err_pages: - for_each_sgt_page(page, sgt_iter, st) - put_page(page); + mapping_clear_unevictable(mapping); + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); sg_free_table(st); kfree(st); diff --git a/include/linux/swap.h b/include/linux/swap.h index 8e2c11e692ba..6c95df96c9aa 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -18,6 +18,8 @@ struct notifier_block; struct bio; +struct pagevec; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -373,7 +375,7 @@ static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, #endif extern int page_evictable(struct page *page); -extern void check_move_unevictable_pages(struct page **, int nr_pages); +extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); diff --git a/mm/shmem.c b/mm/shmem.c index 446942677cd4..0c3b005a59eb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -781,7 +781,7 @@ void shmem_unlock_mapping(struct address_space *mapping) break; index = indices[pvec.nr - 1] + 1; pagevec_remove_exceptionals(&pvec); - check_move_unevictable_pages(pvec.pages, pvec.nr); + check_move_unevictable_pages(&pvec); pagevec_release(&pvec); cond_resched(); } diff --git a/mm/vmscan.c b/mm/vmscan.c index c7ce2c161225..0dbc493026a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -4162,17 +4163,16 @@ int page_evictable(struct page *page) return ret; } -#ifdef CONFIG_SHMEM /** - * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list - * @pages: array of pages to check - * @nr_pages: number of pages to check + * check_move_unevictable_pages - check pages for evictability and move to + * appropriate zone lru list + * @pvec: pagevec with lru pages to check * - * Checks pages for evictability and moves them to the appropriate lru list. - * - * This function is only used for SysV IPC SHM_UNLOCK. + * Checks pages for evictability, if an evictable page is in the unevictable + * lru list, moves it to the appropriate evictable lru list. This function + * should be only used for lru pages. */ -void check_move_unevictable_pages(struct page **pages, int nr_pages) +void check_move_unevictable_pages(struct pagevec *pvec) { struct lruvec *lruvec; struct pglist_data *pgdat = NULL; @@ -4180,8 +4180,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) int pgrescued = 0; int i; - for (i = 0; i < nr_pages; i++) { - struct page *page = pages[i]; + for (i = 0; i < pvec->nr; i++) { + struct page *page = pvec->pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); pgscanned++; @@ -4213,4 +4213,4 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) spin_unlock_irq(&pgdat->lru_lock); } } -#endif /* CONFIG_SHMEM */ +EXPORT_SYMBOL_GPL(check_move_unevictable_pages); -- cgit v1.2.3 From 8811d616dfaa8c6e1905a20ce0543ec401275997 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Nov 2018 09:03:11 +0000 Subject: drm/i915: Initialise the obj->rcu head Make the rcu_head known to the system, in particular for debugobjects. And having declared it for debugobjects, we need to tidy up afterwards. v2: mark the obj->rcu as being destroyed when we reuse its location for the freed list. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108691 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181109090311.15321-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b80b0c14aed..5f69b9aadae8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4739,6 +4739,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); + init_rcu_head(&obj->rcu); + obj->ops = ops; reservation_object_init(&obj->__builtin_resv); @@ -5005,6 +5007,13 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* + * We reuse obj->rcu for the freed list, so we had better not treat + * it like a rcu_head from this point forwards. And we expect all + * objects to be freed via this path. + */ + destroy_rcu_head(&obj->rcu); + /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the -- cgit v1.2.3 From a1db9c54eb29afd9842a08b2cbc2bc07a8a602b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2018 09:21:01 +0000 Subject: drm/i915: Track rcu_head for our idle worker While our little rcu worker might be able to be replaced now by the dedicated rcu_work, in the meantime we should mark up the rcu_head for correct debugobjects tracking. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181108092101.27598-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5f69b9aadae8..7d9457915704 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3557,6 +3557,8 @@ static void __sleep_rcu(struct rcu_head *rcu) struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); struct drm_i915_private *i915 = s->i915; + destroy_rcu_head(&s->rcu); + if (same_epoch(i915, s->epoch)) { INIT_WORK(&s->work, __sleep_work); queue_work(i915->wq, &s->work); @@ -3673,6 +3675,7 @@ out_rearm: if (same_epoch(dev_priv, epoch)) { struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s) { + init_rcu_head(&s->rcu); s->i915 = dev_priv; s->epoch = epoch; call_rcu(&s->rcu, __sleep_rcu); -- cgit v1.2.3