diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 782 |
1 files changed, 464 insertions, 318 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 615f0a855222..b9e8e0d6e97b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,15 +46,13 @@ #include <linux/dma-buf.h> static void i915_gem_flush_free_objects(struct drm_i915_private *i915); -static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + if (obj->cache_dirty) return false; - if (!i915_gem_object_is_coherent(obj)) + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; return obj->pin_display; @@ -145,9 +143,9 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; - size_t pinned; + u64 pinned; - pinned = 0; + pinned = ggtt->base.reserved; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) if (i915_vma_is_pinned(vma)) @@ -235,6 +233,14 @@ err_phys: return st; } +static void __start_cpu_write(struct drm_i915_gem_object *obj) +{ + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + if (cpu_write_needs_clflush(obj)) + obj->cache_dirty = true; +} + static void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -247,11 +253,10 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !i915_gem_object_is_coherent(obj)) + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) drm_clflush_sg(pages); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + __start_cpu_write(obj); } static void @@ -383,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, */ if (rps) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies); + gen6_rps_boost(rq, rps); else rps = NULL; } @@ -394,22 +399,6 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&rq->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&rq->i915->rps.client_lock); - } - return timeout; } @@ -572,46 +561,6 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) return &fpriv->rps; } -int -i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, - int align) -{ - int ret; - - if (align > obj->base.size) - return -EINVAL; - - if (obj->ops == &i915_gem_phys_ops) - return 0; - - if (obj->mm.madv != I915_MADV_WILLNEED) - return -EFAULT; - - if (obj->base.filp == NULL) - return -EINVAL; - - ret = i915_gem_object_unbind(obj); - if (ret) - return ret; - - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - if (obj->mm.pages) - return -EBUSY; - - GEM_BUG_ON(obj->ops != &i915_gem_object_ops); - obj->ops = &i915_gem_phys_ops; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err_xfer; - - return 0; - -err_xfer: - obj->ops = &i915_gem_object_ops; - return ret; -} - static int i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -686,6 +635,12 @@ i915_gem_dumb_create(struct drm_file *file, args->size, &args->handle); } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + /** * Creates a new mm object and returns a handle to it. * @dev: drm device pointer @@ -705,6 +660,66 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, args->size, &args->handle); } +static inline enum fb_op_origin +fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) +{ + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + + if (!(obj->base.write_domain & flush_domains)) + return; + + /* No actual flushing is required for the GTT write domain. Writes + * to it "immediately" go to main memory as far as we know, so there's + * no chipset flush. It also doesn't land in render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour). + */ + wmb(); + + switch (obj->base.write_domain) { + case I915_GEM_DOMAIN_GTT: + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } + } + + intel_fb_obj_flush(obj, + fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + + case I915_GEM_DOMAIN_RENDER: + if (gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; + break; + } + + obj->base.write_domain = 0; +} + static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, @@ -785,7 +800,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; - if (i915_gem_object_is_coherent(obj) || + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) @@ -794,14 +809,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + if (!obj->cache_dirty && + !(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = CLFLUSH_BEFORE; out: @@ -837,7 +853,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; - if (i915_gem_object_is_coherent(obj) || + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) @@ -846,21 +862,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). * This optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) + if (!obj->cache_dirty) { *needs_clflush |= CLFLUSH_AFTER; - /* Same trick applies to invalidate partially written cachelines read - * before writing. - */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= CLFLUSH_BEFORE; + /* + * Same trick applies to invalidate partially written + * cachelines read before writing. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + *needs_clflush |= CLFLUSH_BEFORE; + } out: intel_fb_obj_invalidate(obj, ORIGIN_CPU); @@ -1501,13 +1519,6 @@ err: return ret; } -static inline enum fb_op_origin -write_origin(struct drm_i915_gem_object *obj, unsigned domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915; @@ -1591,10 +1602,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out_unpin; - if (read_domains & I915_GEM_DOMAIN_GTT) - err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + if (read_domains & I915_GEM_DOMAIN_WC) + err = i915_gem_object_set_to_wc_domain(obj, write_domain); + else if (read_domains & I915_GEM_DOMAIN_GTT) + err = i915_gem_object_set_to_gtt_domain(obj, write_domain); else - err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + err = i915_gem_object_set_to_cpu_domain(obj, write_domain); /* And bump the LRU for this access */ i915_gem_object_bump_inactive_ggtt(obj); @@ -1602,7 +1615,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); if (write_domain != 0) - intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); + intel_fb_obj_invalidate(obj, + fb_write_origin(obj, write_domain)); out_unpin: i915_gem_object_unpin_pages(obj); @@ -1737,6 +1751,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) * into userspace. (This view is aligned and sized appropriately for * fenced access.) * + * 2 - Recognise WC as a separate cache domain so that we can flush the + * delayed writes via GTT before performing direct access via WC. + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -1764,7 +1781,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 1; + return 2; } static inline struct i915_ggtt_view @@ -2228,7 +2245,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (obj->mm.mapping) { void *ptr; - ptr = ptr_mask_bits(obj->mm.mapping); + ptr = page_mask_bits(obj->mm.mapping); if (is_vmalloc_addr(ptr)) vunmap(ptr); else @@ -2315,8 +2332,7 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; - noreclaim = mapping_gfp_constraint(mapping, - ~(__GFP_IO | __GFP_RECLAIM)); + noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; sg = st->sgl; @@ -2364,8 +2380,9 @@ rebuild_st: * again with !__GFP_NORETRY. However, we still * want to fail this allocation rather than * trigger the out-of-memory killer and for - * this we want the future __GFP_MAYFAIL. + * this we want __GFP_RETRY_MAYFAIL. */ + gfp |= __GFP_RETRY_MAYFAIL; } } while (1); @@ -2524,7 +2541,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, if (n_pages > ARRAY_SIZE(stack_pages)) { /* Too big for stack -- allocate temporary array instead */ - pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); + pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_TEMPORARY); if (!pages) return NULL; } @@ -2546,7 +2563,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, addr = vmap(pages, n_pages, 0, pgprot); if (pages != stack_pages) - drm_free_large(pages); + kvfree(pages); return addr; } @@ -2580,7 +2597,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!obj->mm.pages); - ptr = ptr_unpack_bits(obj->mm.mapping, has_type); + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { ret = -EBUSY; @@ -2602,7 +2619,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - obj->mm.mapping = ptr_pack_bits(ptr, type); + obj->mm.mapping = page_pack_bits(ptr, type); } out_unlock: @@ -2685,34 +2702,38 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static bool ban_context(const struct i915_gem_context *ctx) +static bool ban_context(const struct i915_gem_context *ctx, + unsigned int score) { return (i915_gem_context_is_bannable(ctx) && - ctx->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD); + score >= CONTEXT_SCORE_BAN_THRESHOLD); } static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { - ctx->guilty_count++; - ctx->ban_score += CONTEXT_SCORE_GUILTY; - if (ban_context(ctx)) - i915_gem_context_set_banned(ctx); + unsigned int score; + bool banned; - DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", - ctx->name, ctx->ban_score, - yesno(i915_gem_context_is_banned(ctx))); + atomic_inc(&ctx->guilty_count); - if (!i915_gem_context_is_banned(ctx) || IS_ERR_OR_NULL(ctx->file_priv)) + score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); + banned = ban_context(ctx, score); + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", + ctx->name, score, yesno(banned)); + if (!banned) return; - ctx->file_priv->context_bans++; - DRM_DEBUG_DRIVER("client %s has had %d context banned\n", - ctx->name, ctx->file_priv->context_bans); + i915_gem_context_set_banned(ctx); + if (!IS_ERR_OR_NULL(ctx->file_priv)) { + atomic_inc(&ctx->file_priv->context_bans); + DRM_DEBUG_DRIVER("client %s has had %d context banned\n", + ctx->name, atomic_read(&ctx->file_priv->context_bans)); + } } static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) { - ctx->active_count++; + atomic_inc(&ctx->active_count); } struct drm_i915_gem_request * @@ -2761,46 +2782,62 @@ static bool engine_stalled(struct intel_engine_cs *engine) return true; } +/* + * Ensure irq handler finishes, and not run again. + * Also return the active request so that we only search for it once. + */ +struct drm_i915_gem_request * +i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *request = NULL; + + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ + tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); + + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + + request = i915_gem_find_active_request(engine); + if (request && request->fence.error == -EIO) + request = ERR_PTR(-EIO); /* Previous reset failed! */ + + return request; +} + int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; + struct drm_i915_gem_request *request; enum intel_engine_id id; int err = 0; - /* Ensure irq handler finishes, and not run again. */ for_each_engine(engine, dev_priv, id) { - struct drm_i915_gem_request *request; - - /* Prevent the signaler thread from updating the request - * state (by calling dma_fence_signal) as we are processing - * the reset. The write from the GPU of the seqno is - * asynchronous and the signaler thread may see a different - * value to us and declare the request complete, even though - * the reset routine have picked that request as the active - * (incomplete) request. This conflict is not handled - * gracefully! - */ - kthread_park(engine->breadcrumbs.signaler); - - /* Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its engine->irq_tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the engine->irq_tasklet until the reset is over - * prevents the race. - */ - tasklet_kill(&engine->irq_tasklet); - tasklet_disable(&engine->irq_tasklet); - - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - - if (engine_stalled(engine)) { - request = i915_gem_find_active_request(engine); - if (request && request->fence.error == -EIO) - err = -EIO; /* Previous reset failed! */ + request = i915_gem_reset_prepare_engine(engine); + if (IS_ERR(request)) { + err = PTR_ERR(request); + continue; } + + engine->hangcheck.active_request = request; } i915_gem_revoke_fences(dev_priv); @@ -2850,12 +2887,11 @@ static void engine_skip_context(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -/* Returns true if the request was guilty of hang */ -static bool i915_gem_reset_request(struct drm_i915_gem_request *request) +/* Returns the request if it was guilty of the hang */ +static struct drm_i915_gem_request * +i915_gem_reset_request(struct intel_engine_cs *engine, + struct drm_i915_gem_request *request) { - /* Read once and return the resolution */ - const bool guilty = engine_stalled(request->engine); - /* The guilty request will get skipped on a hung engine. * * Users of client default contexts do not rely on logical @@ -2877,29 +2913,47 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request) * subsequent hangs. */ - if (guilty) { + if (engine_stalled(engine)) { i915_gem_context_mark_guilty(request->ctx); skip_request(request); + + /* If this context is now banned, skip all pending requests. */ + if (i915_gem_context_is_banned(request->ctx)) + engine_skip_context(request); } else { - i915_gem_context_mark_innocent(request->ctx); - dma_fence_set_error(&request->fence, -EAGAIN); + /* + * Since this is not the hung engine, it may have advanced + * since the hang declaration. Double check by refinding + * the active request at the time of the reset. + */ + request = i915_gem_find_active_request(engine); + if (request) { + i915_gem_context_mark_innocent(request->ctx); + dma_fence_set_error(&request->fence, -EAGAIN); + + /* Rewind the engine to replay the incomplete rq */ + spin_lock_irq(&engine->timeline->lock); + request = list_prev_entry(request, link); + if (&request->link == &engine->timeline->requests) + request = NULL; + spin_unlock_irq(&engine->timeline->lock); + } } - return guilty; + return request; } -static void i915_gem_reset_engine(struct intel_engine_cs *engine) +void i915_gem_reset_engine(struct intel_engine_cs *engine, + struct drm_i915_gem_request *request) { - struct drm_i915_gem_request *request; + engine->irq_posted = 0; - request = i915_gem_find_active_request(engine); - if (request && i915_gem_reset_request(request)) { + if (request) + request = i915_gem_reset_request(engine, request); + + if (request) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", engine->name, request->global_seqno); - - /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) - engine_skip_context(request); } /* Setup the CS to resume from the breadcrumb of the hung request */ @@ -2918,7 +2972,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; - i915_gem_reset_engine(engine); + i915_gem_reset_engine(engine, engine->hangcheck.active_request); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) engine->context_unpin(engine, ctx); @@ -2934,6 +2988,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) +{ + tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); +} + void i915_gem_reset_finish(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2942,13 +3002,14 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); for_each_engine(engine, dev_priv, id) { - tasklet_enable(&engine->irq_tasklet); - kthread_unpark(engine->breadcrumbs.signaler); + engine->hangcheck.active_request = NULL; + i915_gem_reset_finish_engine(engine); } } static void nop_submit_request(struct drm_i915_gem_request *request) { + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); @@ -2970,16 +3031,10 @@ static void engine_set_wedged(struct intel_engine_cs *engine) /* Mark all executing requests as skipped */ spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) - dma_fence_set_error(&request->fence, -EIO); + if (!i915_gem_request_completed(request)) + dma_fence_set_error(&request->fence, -EIO); spin_unlock_irqrestore(&engine->timeline->lock, flags); - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); - /* * Clear the execlists queue up before freeing the requests, as those * are the ones that keep the context and ringbuffer backing objects @@ -2987,18 +3042,34 @@ static void engine_set_wedged(struct intel_engine_cs *engine) */ if (i915.enable_execlists) { + struct execlist_port *port = engine->execlist_port; unsigned long flags; + unsigned int n; spin_lock_irqsave(&engine->timeline->lock, flags); - i915_gem_request_put(engine->execlist_port[0].request); - i915_gem_request_put(engine->execlist_port[1].request); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) + i915_gem_request_put(port_request(&port[n])); memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); engine->execlist_queue = RB_ROOT; engine->execlist_first = NULL; spin_unlock_irqrestore(&engine->timeline->lock, flags); + + /* The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } + + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); } static int __i915_gem_set_wedged_BKL(void *data) @@ -3010,25 +3081,15 @@ static int __i915_gem_set_wedged_BKL(void *data) for_each_engine(engine, i915, id) engine_set_wedged(engine); + set_bit(I915_WEDGED, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); + return 0; } void i915_gem_set_wedged(struct drm_i915_private *dev_priv) { - lockdep_assert_held(&dev_priv->drm.struct_mutex); - set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); - - /* Retire completed requests first so the list of inflight/incomplete - * requests is accurate and we don't try and mark successful requests - * as in error during __i915_gem_set_wedged_BKL(). - */ - i915_gem_retire_requests(dev_priv); - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); - - i915_gem_context_lost(dev_priv); - - mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) @@ -3083,6 +3144,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * context and do not require stop_machine(). */ intel_engines_reset_default_submission(i915); + i915_gem_contexts_lost(i915); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); @@ -3121,8 +3183,6 @@ i915_gem_idle_work_handler(struct work_struct *work) struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - enum intel_engine_id id; bool rearm_hangcheck; if (!READ_ONCE(dev_priv->gt.awake)) @@ -3160,10 +3220,8 @@ i915_gem_idle_work_handler(struct work_struct *work) if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) { - intel_engine_disarm_breadcrumbs(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - } + intel_engines_mark_idle(dev_priv); + i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3184,21 +3242,33 @@ out_rearm: void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(gem->dev); struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; - struct i915_vma *vma, *vn; + struct i915_lut_handle *lut, *ln; - mutex_lock(&obj->base.dev->struct_mutex); - list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) - if (vma->vm->file == fpriv) + mutex_lock(&i915->drm.struct_mutex); + + list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { + struct i915_gem_context *ctx = lut->ctx; + struct i915_vma *vma; + + if (ctx->file_priv != fpriv) + continue; + + vma = radix_tree_delete(&ctx->handles_vma, lut->handle); + + if (!i915_vma_is_ggtt(vma)) i915_vma_close(vma); - if (i915_gem_object_is_active(obj) && - !i915_gem_object_has_active_reference(obj)) { - i915_gem_object_set_active_reference(obj); - i915_gem_object_get(obj); + list_del(&lut->obj_link); + list_del(&lut->ctx_link); + + kmem_cache_free(i915->luts, lut); + __i915_gem_object_release_unless_active(obj); } - mutex_unlock(&obj->base.dev->struct_mutex); + + mutex_unlock(&i915->drm.struct_mutex); } static unsigned long to_wait_timeout(s64 timeout_ns) @@ -3224,7 +3294,7 @@ static unsigned long to_wait_timeout(s64 timeout_ns) * -ERESTARTSYS: signal interrupted the wait * -ENONENT: object doesn't exist * Also possible, but rare: - * -EAGAIN: GPU wedged + * -EAGAIN: incomplete, restart syscall * -ENOMEM: damn * -ENODEV: Internal IRQ fail * -E?: The add request failed @@ -3272,6 +3342,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) */ if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) args->timeout_ns = 0; + + /* Asked to wait beyond the jiffie/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; } i915_gem_object_put(obj); @@ -3344,73 +3418,89 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return ret; } -/** Flushes the GTT write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - * - * We also have to wait a bit for the writes to land from the GTT. - * An uncached read (i.e. mmio) seems to be ideal for the round-trip - * timing. This issue has only been observed when switching quickly - * between GTT writes and CPU reads from inside the kernel on recent hw, - * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). + /* + * We manually flush the CPU domain so that we can override and + * force the flush for the display, and perform it asyncrhonously. */ - wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - if (intel_runtime_pm_get_if_in_use(dev_priv)) { - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } - } - - intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); - + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); + if (obj->cache_dirty) + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; } -/** Flushes the CPU write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) + if (!READ_ONCE(obj->pin_display)) return; - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - obj->base.write_domain = 0; + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); } -static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +/** + * Moves a single object to the WC read, and possibly write domain. + * @obj: object to act on + * @write: ask for write access or read only + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ +int +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) { - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) - return; + int ret; - i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); - obj->base.write_domain = 0; -} + lockdep_assert_held(&obj->base.dev->struct_mutex); -void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) -{ - if (!READ_ONCE(obj->pin_display)) - return; + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); + if (ret) + return ret; - mutex_lock(&obj->base.dev->struct_mutex); - __i915_gem_object_flush_for_display(obj); - mutex_unlock(&obj->base.dev->struct_mutex); + if (obj->base.write_domain == I915_GEM_DOMAIN_WC) + return 0; + + /* Flush and acquire obj->pages so that we are coherent through + * direct access in memory with previous cached writes through + * shmemfs and that our cache domain tracking remains valid. + * For example, if the obj->filp was moved to swap without us + * being notified and releasing the pages, we would mistakenly + * continue to assume that the obj remained out of the CPU cached + * domain. + */ + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + + /* Serialise direct access to this object with the barriers for + * coherent writes from the GPU, by effectively invalidating the + * WC domain upon first access. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) + mb(); + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); + obj->base.read_domains |= I915_GEM_DOMAIN_WC; + if (write) { + obj->base.read_domains = I915_GEM_DOMAIN_WC; + obj->base.write_domain = I915_GEM_DOMAIN_WC; + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + return 0; } /** @@ -3452,7 +3542,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3595,13 +3685,10 @@ restart: } } - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - i915_gem_object_is_coherent(obj)) - obj->cache_dirty = true; - list_for_each_entry(vma, &obj->vma_list, obj_link) vma->node.color = cache_level; - obj->cache_level = cache_level; + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->cache_dirty = true; /* Always invalidate stale cachelines */ return 0; } @@ -3823,10 +3910,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { @@ -3837,15 +3921,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); /* If we're writing through the CPU, then the GPU read domains will * need to be invalidated at next use. */ - if (write) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + if (write) + __start_cpu_write(obj); return 0; } @@ -4020,7 +4102,7 @@ __busy_set_if_active(const struct dma_fence *fence, if (i915_gem_request_completed(rq)) return 0; - return flag(rq->engine->exec_id); + return flag(rq->engine->uabi_id); } static __always_inline unsigned int @@ -4177,8 +4259,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); - INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; @@ -4211,6 +4293,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { struct drm_i915_gem_object *obj; struct address_space *mapping; + unsigned int cache_level; gfp_t mask; int ret; @@ -4219,7 +4302,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) * catch if we ever need to fix it. In the meantime, if you do spot * such a local variable, please consider fixing! */ - if (WARN_ON(size >> PAGE_SHIFT > INT_MAX)) + if (size >> PAGE_SHIFT > INT_MAX) return ERR_PTR(-E2BIG); if (overflows_type(size, obj->base.size)) @@ -4249,7 +4332,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(dev_priv)) { + if (HAS_LLC(dev_priv)) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than @@ -4262,9 +4345,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) * However, we maintain the display planes as UC, and so * need to rebind when first used as such. */ - obj->cache_level = I915_CACHE_LLC; - } else - obj->cache_level = I915_CACHE_NONE; + cache_level = I915_CACHE_LLC; + else + cache_level = I915_CACHE_NONE; + + i915_gem_object_set_cache_coherency(obj, cache_level); trace_i915_gem_object_create(obj); @@ -4314,7 +4399,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_close(vma); @@ -4327,6 +4411,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); + cond_resched(); + llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); @@ -4374,8 +4460,11 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ - while ((freed = llist_del_all(&i915->mm.free_list))) + while ((freed = llist_del_all(&i915->mm.free_list))) { __i915_gem_free_objects(i915, freed); + if (need_resched()) + break; + } } static void __i915_gem_free_object_rcu(struct rcu_head *head) @@ -4415,8 +4504,8 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) { lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(i915_gem_object_has_active_reference(obj)); - if (i915_gem_object_is_active(obj)) + if (!i915_gem_object_has_active_reference(obj) && + i915_gem_object_is_active(obj)) i915_gem_object_set_active_reference(obj); else i915_gem_object_put(obj); @@ -4440,10 +4529,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * try to take over. The only way to remove the earlier state * is by resetting. However, resetting on earlier gen is tricky as * it may impact the display and we are uncertain about the stability - * of the reset, so we only reset recent machines with logical - * context support (that must be reset to remove any stray contexts). + * of the reset, so this could be applied to even earlier gen. */ - if (HAS_HW_CONTEXTS(i915)) { + if (INTEL_GEN(i915) >= 5) { int reset = intel_gpu_reset(i915, ALL_ENGINES); WARN_ON(reset && reset != -ENODEV); } @@ -4478,7 +4566,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) goto err_unlock; assert_kernel_context_is_current(dev_priv); - i915_gem_context_lost(dev_priv); + i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); intel_guc_suspend(dev_priv); @@ -4492,8 +4580,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) while (flush_delayed_work(&dev_priv->gt.idle_work)) ; - i915_gem_drain_freed_objects(dev_priv); - /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. */ @@ -4686,11 +4772,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) if (value >= 0) return value; -#ifdef CONFIG_INTEL_IOMMU /* Enable semaphores on SNB when IO remapping is off */ - if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + if (IS_GEN6(dev_priv) && intel_vtd_active()) return false; -#endif return true; } @@ -4701,7 +4785,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_clflush_init(dev_priv); + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; @@ -4719,13 +4803,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - i915_gem_init_userptr(dev_priv); + ret = i915_gem_init_userptr(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_init_ggtt(dev_priv); if (ret) goto out_unlock; - ret = i915_gem_context_init(dev_priv); + ret = i915_gem_contexts_init(dev_priv); if (ret) goto out_unlock; @@ -4811,12 +4897,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->vmas) goto err_objects; + dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); + if (!dev_priv->luts) + goto err_vmas; + dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); if (!dev_priv->requests) - goto err_vmas; + goto err_luts; dev_priv->dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | @@ -4824,14 +4914,17 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->dependencies) goto err_requests; + dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); + if (!dev_priv->priorities) + goto err_dependencies; + mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); err = i915_gem_timeline_init__global(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); if (err) - goto err_dependencies; + goto err_priorities; - INIT_LIST_HEAD(&dev_priv->context_list); INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); @@ -4845,20 +4938,20 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - init_waitqueue_head(&dev_priv->pending_flip_queue); - - dev_priv->mm.interruptible = true; - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); spin_lock_init(&dev_priv->fb_tracking.lock); return 0; +err_priorities: + kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: kmem_cache_destroy(dev_priv->requests); +err_luts: + kmem_cache_destroy(dev_priv->luts); err_vmas: kmem_cache_destroy(dev_priv->vmas); err_objects: @@ -4878,8 +4971,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gt.timelines)); mutex_unlock(&dev_priv->drm.struct_mutex); + kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); + kmem_cache_destroy(dev_priv->luts); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); @@ -4889,9 +4984,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); + /* Discard all purgeable objects, let userspace recover those as + * required after resuming. + */ i915_gem_shrink_all(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } @@ -4916,17 +5012,16 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * we update that state just before writing out the image. * * To try and reduce the hibernation image, we manually shrink - * the objects as well. + * the objects as well, see i915_gem_freeze() */ - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); + i915_gem_drain_freed_objects(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) { - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + list_for_each_entry(obj, *p, global_link) + __start_cpu_write(obj); } mutex_unlock(&dev_priv->drm.struct_mutex); @@ -4946,15 +5041,9 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); - - if (!list_empty(&file_priv->rps.link)) { - spin_lock(&to_i915(dev)->rps.client_lock); - list_del(&file_priv->rps.link); - spin_unlock(&to_i915(dev)->rps.client_lock); - } } -int i915_gem_open(struct drm_device *dev, struct drm_file *file) +int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv; int ret; @@ -4966,16 +5055,15 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) return -ENOMEM; file->driver_priv = file_priv; - file_priv->dev_priv = to_i915(dev); + file_priv->dev_priv = i915; file_priv->file = file; - INIT_LIST_HEAD(&file_priv->rps.link); spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); file_priv->bsd_engine = -1; - ret = i915_gem_context_open(dev, file); + ret = i915_gem_context_open(i915, file); if (ret) kfree(file_priv); @@ -5219,6 +5307,64 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, return sg_dma_address(sg) + (offset << PAGE_SHIFT); } +int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) +{ + struct sg_table *pages; + int err; + + if (align > obj->base.size) + return -EINVAL; + + if (obj->ops == &i915_gem_phys_ops) + return 0; + + if (obj->ops != &i915_gem_object_ops) + return -EINVAL; + + err = i915_gem_object_unbind(obj); + if (err) + return err; + + mutex_lock(&obj->mm.lock); + + if (obj->mm.madv != I915_MADV_WILLNEED) { + err = -EFAULT; + goto err_unlock; + } + + if (obj->mm.quirked) { + err = -EFAULT; + goto err_unlock; + } + + if (obj->mm.mapping) { + err = -EBUSY; + goto err_unlock; + } + + pages = obj->mm.pages; + obj->ops = &i915_gem_phys_ops; + + err = ____i915_gem_object_get_pages(obj); + if (err) + goto err_xfer; + + /* Perma-pin (until release) the physical set of pages */ + __i915_gem_object_pin_pages(obj); + + if (!IS_ERR_OR_NULL(pages)) + i915_gem_object_ops.put_pages(obj, pages); + mutex_unlock(&obj->mm.lock); + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + obj->mm.pages = pages; +err_unlock: + mutex_unlock(&obj->mm.lock); + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" |