29 files changed, 1520 insertions, 877 deletions
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 30b3651d642b..9ddf8c6cb887 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3982,7 +3982,6 @@ int num_ioctls;</synopsis>
         <title>Interrupt Handling</title>
 !Pdrivers/gpu/drm/i915/i915_irq.c interrupt handling
 !Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_init intel_irq_init_hw intel_hpd_init
-!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_fini
 !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_disable_interrupts
 !Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_enable_interrupts
       </sect2>
@@ -4199,6 +4198,23 @@ int num_ioctls;</synopsis>
 !Idrivers/gpu/drm/i915/i915_gem_gtt.c
       </sect2>
       <sect2>
+        <title>GTT Fences and Swizzling</title>
+!Idrivers/gpu/drm/i915/i915_gem_fence.c
+        <sect3>
+          <title>Global GTT Fence Handling</title>
+!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling
+        </sect3>
+        <sect3>
+          <title>Hardware Tiling and Swizzling Details</title>
+!Pdrivers/gpu/drm/i915/i915_gem_fence.c tiling swizzling details
+        </sect3>
+      </sect2>
+      <sect2>
+        <title>Object Tiling IOCTLs</title>
+!Idrivers/gpu/drm/i915/i915_gem_tiling.c
+!Pdrivers/gpu/drm/i915/i915_gem_tiling.c buffer object tiling
+      </sect2>
+      <sect2>
         <title>Buffer Object Eviction</title>
 	<para>
 	  This section documents the interface functions for evicting buffer
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e52e01251644..41fb8a9c5bef 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -6,12 +6,13 @@
 
 # core driver code
 i915-y := i915_drv.o \
+	  i915_irq.o \
 	  i915_params.o \
           i915_suspend.o \
 	  i915_sysfs.o \
+	  intel_csr.o \
 	  intel_pm.o \
-	  intel_runtime_pm.o \
-	  intel_csr.o
+	  intel_runtime_pm.o
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
@@ -20,21 +21,20 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 i915-y += i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
 	  i915_gem_context.o \
-	  i915_gem_render_state.o \
 	  i915_gem_debug.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
 	  i915_gem_execbuffer.o \
+	  i915_gem_fence.o \
 	  i915_gem_gtt.o \
 	  i915_gem.o \
+	  i915_gem_render_state.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
 	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
-	  i915_irq.o \
 	  i915_trace_points.o \
-	  intel_hotplug.o \
 	  intel_lrc.o \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
@@ -48,11 +48,14 @@ i915-y += intel_renderstate_gen6.o \
 
 # modesetting core code
 i915-y += intel_audio.o \
+	  intel_atomic.o \
+	  intel_atomic_plane.o \
 	  intel_bios.o \
 	  intel_display.o \
 	  intel_fbc.o \
 	  intel_fifo_underrun.o \
 	  intel_frontbuffer.o \
+	  intel_hotplug.o \
 	  intel_modes.o \
 	  intel_overlay.o \
 	  intel_psr.o \
@@ -68,15 +71,13 @@ i915-y += dvo_ch7017.o \
 	  dvo_ns2501.o \
 	  dvo_sil164.o \
 	  dvo_tfp410.o \
-	  intel_atomic.o \
-	  intel_atomic_plane.o \
 	  intel_crt.o \
 	  intel_ddi.o \
-	  intel_dp.o \
 	  intel_dp_mst.o \
+	  intel_dp.o \
 	  intel_dsi.o \
-	  intel_dsi_pll.o \
 	  intel_dsi_panel_vbt.o \
+	  intel_dsi_pll.o \
 	  intel_dvo.o \
 	  intel_hdmi.o \
 	  intel_i2c.o \
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 430571b977db..237ff6884a22 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -151,8 +151,8 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = {
 	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
 	CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
 	CMD(  MI_TOPOLOGY_FILTER,               SMI,    F,  1,      S  ),
-	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
 	CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
+	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
 	CMD(  MI_SET_CONTEXT,                   SMI,   !F,  0xFF,   R  ),
 	CMD(  MI_URB_CLEAR,                     SMI,   !F,  0xFF,   S  ),
 	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3F,   B,
@@ -564,7 +564,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *ring,
 
 		for (j = 0; j < table->count; j++) {
 			const struct drm_i915_cmd_descriptor *desc =
-				&table->table[i];
+				&table->table[j];
 			u32 curr = desc->cmd.value & desc->cmd.mask;
 
 			if (curr < previous) {
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 51580bdd587f..23a69307e12e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4030,24 +4030,14 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 {
 	char *input_buffer;
 	int status = 0;
-	struct seq_file *m;
 	struct drm_device *dev;
 	struct drm_connector *connector;
 	struct list_head *connector_list;
 	struct intel_dp *intel_dp;
 	int val = 0;
 
-	m = file->private_data;
-	if (!m) {
-		status = -ENODEV;
-		return status;
-	}
-	dev = m->private;
+	dev = ((struct seq_file *)file->private_data)->private;
 
-	if (!dev) {
-		status = -ENODEV;
-		return status;
-	}
 	connector_list = &dev->mode_config.connector_list;
 
 	if (len == 0)
@@ -4071,9 +4061,7 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 		    DRM_MODE_CONNECTOR_DisplayPort)
 			continue;
 
-		if (connector->connector_type ==
-		    DRM_MODE_CONNECTOR_DisplayPort &&
-		    connector->status == connector_status_connected &&
+		if (connector->status == connector_status_connected &&
 		    connector->encoder != NULL) {
 			intel_dp = enc_to_intel_dp(connector->encoder);
 			status = kstrtoint(input_buffer, 10, &val);
@@ -4105,9 +4093,6 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data)
 	struct list_head *connector_list = &dev->mode_config.connector_list;
 	struct intel_dp *intel_dp;
 
-	if (!dev)
-		return -ENODEV;
-
 	list_for_each_entry(connector, connector_list, head) {
 
 		if (connector->connector_type !=
@@ -4152,9 +4137,6 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data)
 	struct list_head *connector_list = &dev->mode_config.connector_list;
 	struct intel_dp *intel_dp;
 
-	if (!dev)
-		return -ENODEV;
-
 	list_for_each_entry(connector, connector_list, head) {
 
 		if (connector->connector_type !=
@@ -4194,9 +4176,6 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data)
 	struct list_head *connector_list = &dev->mode_config.connector_list;
 	struct intel_dp *intel_dp;
 
-	if (!dev)
-		return -ENODEV;
-
 	list_for_each_entry(connector, connector_list, head) {
 
 		if (connector->connector_type !=
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index b1f9e5561cf2..ab37d1121be8 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1274,13 +1274,3 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 };
 
 int i915_max_ioctl = ARRAY_SIZE(i915_ioctls);
-
-/*
- * This is really ugly: Because old userspace abused the linux agp interface to
- * manage the gtt, we need to claim that all intel devices are agp.  For
- * otherwise the drm core refuses to initialize the agp support code.
- */
-int i915_driver_device_is_agp(struct drm_device *dev)
-{
-	return 1;
-}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0d6775a3e88c..1d887459e37f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -935,8 +935,6 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (PCI_FUNC(pdev->devfn))
 		return -ENODEV;
 
-	driver.driver_features &= ~(DRIVER_USE_AGP);
-
 	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
@@ -1491,7 +1489,15 @@ static int intel_runtime_suspend(struct device *device)
 	 * FIXME: We really should find a document that references the arguments
 	 * used below!
 	 */
-	if (IS_HASWELL(dev)) {
+	if (IS_BROADWELL(dev)) {
+		/*
+		 * On Broadwell, if we use PCI_D1 the PCH DDI ports will stop
+		 * being detected, and the call we do at intel_runtime_resume()
+		 * won't be able to restore them. Since PCI_D3hot matches the
+		 * actual specification and appears to be working, use it.
+		 */
+		intel_opregion_notify_adapter(dev, PCI_D3hot);
+	} else {
 		/*
 		 * current versions of firmware which depend on this opregion
 		 * notification have repurposed the D1 definition to mean
@@ -1500,16 +1506,6 @@ static int intel_runtime_suspend(struct device *device)
 		 * the suspend path.
 		 */
 		intel_opregion_notify_adapter(dev, PCI_D1);
-	} else {
-		/*
-		 * On Broadwell, if we use PCI_D1 the PCH DDI ports will stop
-		 * being detected, and the call we do at intel_runtime_resume()
-		 * won't be able to restore them. Since PCI_D3hot matches the
-		 * actual specification and appears to be working, use it. Let's
-		 * assume the other non-Haswell platforms will stay the same as
-		 * Broadwell.
-		 */
-		intel_opregion_notify_adapter(dev, PCI_D3hot);
 	}
 
 	assert_forcewakes_inactive(dev_priv);
@@ -1649,7 +1645,6 @@ static struct drm_driver driver = {
 	 * deal with them for Intel hardware.
 	 */
 	.driver_features =
-	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |
 	    DRIVER_RENDER,
 	.load = i915_driver_load,
@@ -1664,7 +1659,6 @@ static struct drm_driver driver = {
 	.suspend = i915_suspend_legacy,
 	.resume = i915_resume_legacy,
 
-	.device_is_agp = i915_driver_device_is_agp,
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = i915_debugfs_init,
 	.debugfs_cleanup = i915_debugfs_cleanup,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 23ce125e0298..4e9f7b16a729 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,7 +56,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20150717"
+#define DRIVER_DATE		"20150731"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -206,11 +206,11 @@ enum intel_display_power_domain {
 
 enum hpd_pin {
 	HPD_NONE = 0,
-	HPD_PORT_A = HPD_NONE, /* PORT_A is internal */
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
 	HPD_CRT,
 	HPD_SDVO_B,
 	HPD_SDVO_C,
+	HPD_PORT_A,
 	HPD_PORT_B,
 	HPD_PORT_C,
 	HPD_PORT_D,
@@ -742,7 +742,7 @@ enum csr_state {
 
 struct intel_csr {
 	const char *fw_path;
-	__be32 *dmc_payload;
+	uint32_t *dmc_payload;
 	uint32_t dmc_fw_size;
 	uint32_t mmio_count;
 	uint32_t mmioaddr[8];
@@ -894,6 +894,7 @@ enum fb_op_origin {
 	ORIGIN_CPU,
 	ORIGIN_CS,
 	ORIGIN_FLIP,
+	ORIGIN_DIRTYFB,
 };
 
 struct i915_fbc {
@@ -2610,6 +2611,8 @@ struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool enable_guc_submission;
+	int guc_log_level;
 	int use_mmio_flip;
 	int mmio_debug;
 	bool verbose_state_checks;
@@ -2626,7 +2629,6 @@ extern void i915_driver_preclose(struct drm_device *dev,
 				 struct drm_file *file);
 extern void i915_driver_postclose(struct drm_device *dev,
 				  struct drm_file *file);
-extern int i915_driver_device_is_agp(struct drm_device * dev);
 #ifdef CONFIG_COMPAT
 extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 			      unsigned long arg);
@@ -2646,7 +2648,7 @@ void intel_hpd_irq_handler(struct drm_device *dev, u32 pin_mask, u32 long_mask);
 void intel_hpd_init(struct drm_i915_private *dev_priv);
 void intel_hpd_init_work(struct drm_i915_private *dev_priv);
 void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
-enum port intel_hpd_pin_to_port(enum hpd_pin pin);
+bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
 
 /* i915_irq.c */
 void i915_queue_hangcheck(struct drm_device *dev);
@@ -2758,6 +2760,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
+struct drm_i915_gem_object *i915_gem_object_create_from_data(
+		struct drm_device *dev, const void *data, size_t size);
 void i915_init_vm(struct drm_i915_private *dev_priv,
 		  struct i915_address_space *vm);
 void i915_gem_free_object(struct drm_gem_object *obj);
@@ -2864,11 +2868,6 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
-int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
-
-bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
@@ -2966,8 +2965,6 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				struct drm_gem_object *gem_obj, int flags);
 
-void i915_gem_restore_fences(struct drm_device *dev);
-
 unsigned long
 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
 			      const struct i915_ggtt_view *view);
@@ -3062,6 +3059,19 @@ i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
 	i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal);
 }
 
+/* i915_gem_fence.c */
+int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
+
+void i915_gem_restore_fences(struct drm_device *dev);
+
+void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
+
 /* i915_gem_context.c */
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
@@ -3154,10 +3164,6 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
-void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
-
 /* i915_gem_debug.c */
 #if WATCH_LISTS
 int i915_verify_lists(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d9f2701b4593..84f91bcc12f7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,11 +46,6 @@ static void
 i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
 static void
 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj);
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-					 struct drm_i915_fence_reg *fence,
-					 bool enable);
 
 static bool cpu_cache_is_coherent(struct drm_device *dev,
 				  enum i915_cache_level level)
@@ -66,18 +61,6 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 	return obj->pin_display;
 }
 
-static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
-{
-	if (obj->tiling_mode)
-		i915_gem_release_mmap(obj);
-
-	/* As we do not have an associated fence register, we will force
-	 * a tiling change if we ever need to acquire one.
-	 */
-	obj->fence_dirty = false;
-	obj->fence_reg = I915_FENCE_REG_NONE;
-}
-
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
@@ -2402,6 +2385,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 	if (obj->active)
 		return;
 
+	/* Bump our place on the bound list to keep it roughly in LRU order
+	 * so that we don't steal from recently used but inactive objects
+	 * (unless we are forced to ofc!)
+	 */
+	list_move_tail(&obj->global_list,
+		       &to_i915(obj->base.dev)->mm.bound_list);
+
 	list_for_each_entry(vma, &obj->vma_list, vma_link) {
 		if (!list_empty(&vma->mm_list))
 			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
@@ -2793,27 +2783,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	}
 }
 
-void i915_gem_restore_fences(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
-
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-
-		/*
-		 * Commit delayed tiling changes if we have an object still
-		 * attached to the fence, otherwise just clear the fence.
-		 */
-		if (reg->obj) {
-			i915_gem_object_update_fence(reg->obj, reg,
-						     reg->obj->tiling_mode);
-		} else {
-			i915_gem_write_fence(dev, i, NULL);
-		}
-	}
-}
-
 void i915_gem_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3340,343 +3309,6 @@ int i915_gpu_idle(struct drm_device *dev)
 	return 0;
 }
 
-static void i965_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int fence_reg;
-	int fence_pitch_shift;
-
-	if (INTEL_INFO(dev)->gen >= 6) {
-		fence_reg = FENCE_REG_SANDYBRIDGE_0;
-		fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
-	} else {
-		fence_reg = FENCE_REG_965_0;
-		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
-	}
-
-	fence_reg += reg * 8;
-
-	/* To w/a incoherency with non-atomic 64-bit register updates,
-	 * we split the 64-bit update into two 32-bit writes. In order
-	 * for a partial fence not to be evaluated between writes, we
-	 * precede the update with write to turn off the fence register,
-	 * and only enable the fence as the last step.
-	 *
-	 * For extra levels of paranoia, we make sure each step lands
-	 * before applying the next step.
-	 */
-	I915_WRITE(fence_reg, 0);
-	POSTING_READ(fence_reg);
-
-	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
-		uint64_t val;
-
-		/* Adjust fence size to match tiled area */
-		if (obj->tiling_mode != I915_TILING_NONE) {
-			uint32_t row_size = obj->stride *
-				(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
-			size = (size / row_size) * row_size;
-		}
-
-		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
-				 0xfffff000) << 32;
-		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
-		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
-		if (obj->tiling_mode == I915_TILING_Y)
-			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
-		val |= I965_FENCE_REG_VALID;
-
-		I915_WRITE(fence_reg + 4, val >> 32);
-		POSTING_READ(fence_reg + 4);
-
-		I915_WRITE(fence_reg + 0, val);
-		POSTING_READ(fence_reg);
-	} else {
-		I915_WRITE(fence_reg + 4, 0);
-		POSTING_READ(fence_reg + 4);
-	}
-}
-
-static void i915_write_fence_reg(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 val;
-
-	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
-		int pitch_val;
-		int tile_width;
-
-		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
-		     (size & -size) != size ||
-		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
-		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
-
-		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
-			tile_width = 128;
-		else
-			tile_width = 512;
-
-		/* Note: pitch better be a power of two tile widths */
-		pitch_val = obj->stride / tile_width;
-		pitch_val = ffs(pitch_val) - 1;
-
-		val = i915_gem_obj_ggtt_offset(obj);
-		if (obj->tiling_mode == I915_TILING_Y)
-			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-		val |= I915_FENCE_SIZE_BITS(size);
-		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-		val |= I830_FENCE_REG_VALID;
-	} else
-		val = 0;
-
-	if (reg < 8)
-		reg = FENCE_REG_830_0 + reg * 4;
-	else
-		reg = FENCE_REG_945_8 + (reg - 8) * 4;
-
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
-}
-
-static void i830_write_fence_reg(struct drm_device *dev, int reg,
-				struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t val;
-
-	if (obj) {
-		u32 size = i915_gem_obj_ggtt_size(obj);
-		uint32_t pitch_val;
-
-		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
-		     (size & -size) != size ||
-		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
-		     "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
-		     i915_gem_obj_ggtt_offset(obj), size);
-
-		pitch_val = obj->stride / 128;
-		pitch_val = ffs(pitch_val) - 1;
-
-		val = i915_gem_obj_ggtt_offset(obj);
-		if (obj->tiling_mode == I915_TILING_Y)
-			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-		val |= I830_FENCE_SIZE_BITS(size);
-		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
-		val |= I830_FENCE_REG_VALID;
-	} else
-		val = 0;
-
-	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
-	POSTING_READ(FENCE_REG_830_0 + reg * 4);
-}
-
-inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
-{
-	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
-}
-
-static void i915_gem_write_fence(struct drm_device *dev, int reg,
-				 struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/* Ensure that all CPU reads are completed before installing a fence
-	 * and all writes before removing the fence.
-	 */
-	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
-		mb();
-
-	WARN(obj && (!obj->stride || !obj->tiling_mode),
-	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
-	     obj->stride, obj->tiling_mode);
-
-	if (IS_GEN2(dev))
-		i830_write_fence_reg(dev, reg, obj);
-	else if (IS_GEN3(dev))
-		i915_write_fence_reg(dev, reg, obj);
-	else if (INTEL_INFO(dev)->gen >= 4)
-		i965_write_fence_reg(dev, reg, obj);
-
-	/* And similarly be paranoid that no direct access to this region
-	 * is reordered to before the fence is installed.
-	 */
-	if (i915_gem_object_needs_mb(obj))
-		mb();
-}
-
-static inline int fence_number(struct drm_i915_private *dev_priv,
-			       struct drm_i915_fence_reg *fence)
-{
-	return fence - dev_priv->fence_regs;
-}
-
-static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
-					 struct drm_i915_fence_reg *fence,
-					 bool enable)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	int reg = fence_number(dev_priv, fence);
-
-	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
-
-	if (enable) {
-		obj->fence_reg = reg;
-		fence->obj = obj;
-		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
-	} else {
-		obj->fence_reg = I915_FENCE_REG_NONE;
-		fence->obj = NULL;
-		list_del_init(&fence->lru_list);
-	}
-	obj->fence_dirty = false;
-}
-
-static int
-i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->last_fenced_req) {
-		int ret = i915_wait_request(obj->last_fenced_req);
-		if (ret)
-			return ret;
-
-		i915_gem_request_assign(&obj->last_fenced_req, NULL);
-	}
-
-	return 0;
-}
-
-int
-i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct drm_i915_fence_reg *fence;
-	int ret;
-
-	ret = i915_gem_object_wait_fence(obj);
-	if (ret)
-		return ret;
-
-	if (obj->fence_reg == I915_FENCE_REG_NONE)
-		return 0;
-
-	fence = &dev_priv->fence_regs[obj->fence_reg];
-
-	if (WARN_ON(fence->pin_count))
-		return -EBUSY;
-
-	i915_gem_object_fence_lost(obj);
-	i915_gem_object_update_fence(obj, fence, false);
-
-	return 0;
-}
-
-static struct drm_i915_fence_reg *
-i915_find_fence_reg(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_fence_reg *reg, *avail;
-	int i;
-
-	/* First try to find a free reg */
-	avail = NULL;
-	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
-		reg = &dev_priv->fence_regs[i];
-		if (!reg->obj)
-			return reg;
-
-		if (!reg->pin_count)
-			avail = reg;
-	}
-
-	if (avail == NULL)
-		goto deadlock;
-
-	/* None available, try to steal one or wait for a user to finish */
-	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->pin_count)
-			continue;
-
-		return reg;
-	}
-
-deadlock:
-	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(dev))
-		return ERR_PTR(-EAGAIN);
-
-	return ERR_PTR(-EDEADLK);
-}
-
-/**
- * i915_gem_object_get_fence - set up fencing for an object
- * @obj: object to map through a fence reg
- *
- * When mapping objects through the GTT, userspace wants to be able to write
- * to them without having to worry about swizzling if the object is tiled.
- * This function walks the fence regs looking for a free one for @obj,
- * stealing one if it can't find any.
- *
- * It then sets up the reg based on the object's properties: address, pitch
- * and tiling format.
- *
- * For an untiled surface, this removes any existing fence.
- */
-int
-i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool enable = obj->tiling_mode != I915_TILING_NONE;
-	struct drm_i915_fence_reg *reg;
-	int ret;
-
-	/* Have we updated the tiling parameters upon the object and so
-	 * will need to serialise the write to the associated fence register?
-	 */
-	if (obj->fence_dirty) {
-		ret = i915_gem_object_wait_fence(obj);
-		if (ret)
-			return ret;
-	}
-
-	/* Just update our place in the LRU if our fence is getting reused. */
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		reg = &dev_priv->fence_regs[obj->fence_reg];
-		if (!obj->fence_dirty) {
-			list_move_tail(&reg->lru_list,
-				       &dev_priv->mm.fence_list);
-			return 0;
-		}
-	} else if (enable) {
-		if (WARN_ON(!obj->map_and_fenceable))
-			return -EINVAL;
-
-		reg = i915_find_fence_reg(dev);
-		if (IS_ERR(reg))
-			return PTR_ERR(reg);
-
-		if (reg->obj) {
-			struct drm_i915_gem_object *old = reg->obj;
-
-			ret = i915_gem_object_wait_fence(old);
-			if (ret)
-				return ret;
-
-			i915_gem_object_fence_lost(old);
-		}
-	} else
-		return 0;
-
-	i915_gem_object_update_fence(obj, reg, enable);
-
-	return 0;
-}
-
 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
 				     unsigned long cache_level)
 {
@@ -4476,32 +4108,6 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 	--vma->pin_count;
 }
 
-bool
-i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
-
-		WARN_ON(!ggtt_vma ||
-			dev_priv->fence_regs[obj->fence_reg].pin_count >
-			ggtt_vma->pin_count);
-		dev_priv->fence_regs[obj->fence_reg].pin_count++;
-		return true;
-	} else
-		return false;
-}
-
-void
-i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
-{
-	if (obj->fence_reg != I915_FENCE_REG_NONE) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
-		dev_priv->fence_regs[obj->fence_reg].pin_count--;
-	}
-}
-
 int
 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file)
@@ -5477,3 +5083,43 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 
 	return false;
 }
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_from_data(struct drm_device *dev,
+			         const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct sg_table *sg;
+	size_t bytes;
+	int ret;
+
+	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
+	if (IS_ERR_OR_NULL(obj))
+		return obj;
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret)
+		goto fail;
+
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		goto fail;
+
+	i915_gem_object_pin_pages(obj);
+	sg = obj->pages;
+	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
+	i915_gem_object_unpin_pages(obj);
+
+	if (WARN_ON(bytes != size)) {
+		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
+		ret = -EFAULT;
+		goto fail;
+	}
+
+	return obj;
+
+fail:
+	drm_gem_object_unreference(&obj->base);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
new file mode 100644
index 000000000000..af1f8c461060
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+/**
+ * DOC: fence register handling
+ *
+ * Important to avoid confusions: "fences" in the i915 driver are not execution
+ * fences used to track command completion but hardware detiler objects which
+ * wrap a given range of the global GTT. Each platform has only a fairly limited
+ * set of these objects.
+ *
+ * Fences are used to detile GTT memory mappings. They're also connected to the
+ * hardware frontbuffer render tracking and hence interract with frontbuffer
+ * conmpression. Furthermore on older platforms fences are required for tiled
+ * objects used by the display engine. They can also be used by the render
+ * engine - they're required for blitter commands and are optional for render
+ * commands. But on gen4+ both display (with the exception of fbc) and rendering
+ * have their own tiling state bits and don't need fences.
+ *
+ * Also note that fences only support X and Y tiling and hence can't be used for
+ * the fancier new tiling formats like W, Ys and Yf.
+ *
+ * Finally note that because fences are such a restricted resource they're
+ * dynamically associated with objects. Furthermore fence state is committed to
+ * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
+ * explictly call i915_gem_object_get_fence() to synchronize fencing status
+ * for cpu access. Also note that some code wants an unfenced view, for those
+ * cases the fence can be removed forcefully with i915_gem_object_put_fence().
+ *
+ * Internally these functions will synchronize with userspace access by removing
+ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
+ */
+
+static void i965_write_fence_reg(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int fence_reg;
+	int fence_pitch_shift;
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		fence_reg = FENCE_REG_SANDYBRIDGE_0;
+		fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT;
+	} else {
+		fence_reg = FENCE_REG_965_0;
+		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+	}
+
+	fence_reg += reg * 8;
+
+	/* To w/a incoherency with non-atomic 64-bit register updates,
+	 * we split the 64-bit update into two 32-bit writes. In order
+	 * for a partial fence not to be evaluated between writes, we
+	 * precede the update with write to turn off the fence register,
+	 * and only enable the fence as the last step.
+	 *
+	 * For extra levels of paranoia, we make sure each step lands
+	 * before applying the next step.
+	 */
+	I915_WRITE(fence_reg, 0);
+	POSTING_READ(fence_reg);
+
+	if (obj) {
+		u32 size = i915_gem_obj_ggtt_size(obj);
+		uint64_t val;
+
+		/* Adjust fence size to match tiled area */
+		if (obj->tiling_mode != I915_TILING_NONE) {
+			uint32_t row_size = obj->stride *
+				(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
+			size = (size / row_size) * row_size;
+		}
+
+		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
+				 0xfffff000) << 32;
+		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
+		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+		val |= I965_FENCE_REG_VALID;
+
+		I915_WRITE(fence_reg + 4, val >> 32);
+		POSTING_READ(fence_reg + 4);
+
+		I915_WRITE(fence_reg + 0, val);
+		POSTING_READ(fence_reg);
+	} else {
+		I915_WRITE(fence_reg + 4, 0);
+		POSTING_READ(fence_reg + 4);
+	}
+}
+
+static void i915_write_fence_reg(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 val;
+
+	if (obj) {
+		u32 size = i915_gem_obj_ggtt_size(obj);
+		int pitch_val;
+		int tile_width;
+
+		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
+		     (size & -size) != size ||
+		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
+		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
+
+		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+			tile_width = 128;
+		else
+			tile_width = 512;
+
+		/* Note: pitch better be a power of two tile widths */
+		pitch_val = obj->stride / tile_width;
+		pitch_val = ffs(pitch_val) - 1;
+
+		val = i915_gem_obj_ggtt_offset(obj);
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		val |= I915_FENCE_SIZE_BITS(size);
+		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+		val |= I830_FENCE_REG_VALID;
+	} else
+		val = 0;
+
+	if (reg < 8)
+		reg = FENCE_REG_830_0 + reg * 4;
+	else
+		reg = FENCE_REG_945_8 + (reg - 8) * 4;
+
+	I915_WRITE(reg, val);
+	POSTING_READ(reg);
+}
+
+static void i830_write_fence_reg(struct drm_device *dev, int reg,
+				struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t val;
+
+	if (obj) {
+		u32 size = i915_gem_obj_ggtt_size(obj);
+		uint32_t pitch_val;
+
+		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
+		     (size & -size) != size ||
+		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+		     "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
+		     i915_gem_obj_ggtt_offset(obj), size);
+
+		pitch_val = obj->stride / 128;
+		pitch_val = ffs(pitch_val) - 1;
+
+		val = i915_gem_obj_ggtt_offset(obj);
+		if (obj->tiling_mode == I915_TILING_Y)
+			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+		val |= I830_FENCE_SIZE_BITS(size);
+		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+		val |= I830_FENCE_REG_VALID;
+	} else
+		val = 0;
+
+	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
+	POSTING_READ(FENCE_REG_830_0 + reg * 4);
+}
+
+inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
+{
+	return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
+}
+
+static void i915_gem_write_fence(struct drm_device *dev, int reg,
+				 struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Ensure that all CPU reads are completed before installing a fence
+	 * and all writes before removing the fence.
+	 */
+	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
+		mb();
+
+	WARN(obj && (!obj->stride || !obj->tiling_mode),
+	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+	     obj->stride, obj->tiling_mode);
+
+	if (IS_GEN2(dev))
+		i830_write_fence_reg(dev, reg, obj);
+	else if (IS_GEN3(dev))
+		i915_write_fence_reg(dev, reg, obj);
+	else if (INTEL_INFO(dev)->gen >= 4)
+		i965_write_fence_reg(dev, reg, obj);
+
+	/* And similarly be paranoid that no direct access to this region
+	 * is reordered to before the fence is installed.
+	 */
+	if (i915_gem_object_needs_mb(obj))
+		mb();
+}
+
+static inline int fence_number(struct drm_i915_private *dev_priv,
+			       struct drm_i915_fence_reg *fence)
+{
+	return fence - dev_priv->fence_regs;
+}
+
+static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
+					 struct drm_i915_fence_reg *fence,
+					 bool enable)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	int reg = fence_number(dev_priv, fence);
+
+	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+
+	if (enable) {
+		obj->fence_reg = reg;
+		fence->obj = obj;
+		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
+	} else {
+		obj->fence_reg = I915_FENCE_REG_NONE;
+		fence->obj = NULL;
+		list_del_init(&fence->lru_list);
+	}
+	obj->fence_dirty = false;
+}
+
+static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
+{
+	if (obj->tiling_mode)
+		i915_gem_release_mmap(obj);
+
+	/* As we do not have an associated fence register, we will force
+	 * a tiling change if we ever need to acquire one.
+	 */
+	obj->fence_dirty = false;
+	obj->fence_reg = I915_FENCE_REG_NONE;
+}
+
+static int
+i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->last_fenced_req) {
+		int ret = i915_wait_request(obj->last_fenced_req);
+		if (ret)
+			return ret;
+
+		i915_gem_request_assign(&obj->last_fenced_req, NULL);
+	}
+
+	return 0;
+}
+
+/**
+ * i915_gem_object_put_fence - force-remove fence for an object
+ * @obj: object to map through a fence reg
+ *
+ * This function force-removes any fence from the given object, which is useful
+ * if the kernel wants to do untiled GTT access.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct drm_i915_fence_reg *fence;
+	int ret;
+
+	ret = i915_gem_object_wait_fence(obj);
+	if (ret)
+		return ret;
+
+	if (obj->fence_reg == I915_FENCE_REG_NONE)
+		return 0;
+
+	fence = &dev_priv->fence_regs[obj->fence_reg];
+
+	if (WARN_ON(fence->pin_count))
+		return -EBUSY;
+
+	i915_gem_object_fence_lost(obj);
+	i915_gem_object_update_fence(obj, fence, false);
+
+	return 0;
+}
+
+static struct drm_i915_fence_reg *
+i915_find_fence_reg(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_fence_reg *reg, *avail;
+	int i;
+
+	/* First try to find a free reg */
+	avail = NULL;
+	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+		reg = &dev_priv->fence_regs[i];
+		if (!reg->obj)
+			return reg;
+
+		if (!reg->pin_count)
+			avail = reg;
+	}
+
+	if (avail == NULL)
+		goto deadlock;
+
+	/* None available, try to steal one or wait for a user to finish */
+	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
+		if (reg->pin_count)
+			continue;
+
+		return reg;
+	}
+
+deadlock:
+	/* Wait for completion of pending flips which consume fences */
+	if (intel_has_pending_fb_unpin(dev))
+		return ERR_PTR(-EAGAIN);
+
+	return ERR_PTR(-EDEADLK);
+}
+
+/**
+ * i915_gem_object_get_fence - set up fencing for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ *
+ * For an untiled surface, this removes any existing fence.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int
+i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool enable = obj->tiling_mode != I915_TILING_NONE;
+	struct drm_i915_fence_reg *reg;
+	int ret;
+
+	/* Have we updated the tiling parameters upon the object and so
+	 * will need to serialise the write to the associated fence register?
+	 */
+	if (obj->fence_dirty) {
+		ret = i915_gem_object_wait_fence(obj);
+		if (ret)
+			return ret;
+	}
+
+	/* Just update our place in the LRU if our fence is getting reused. */
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		reg = &dev_priv->fence_regs[obj->fence_reg];
+		if (!obj->fence_dirty) {
+			list_move_tail(&reg->lru_list,
+				       &dev_priv->mm.fence_list);
+			return 0;
+		}
+	} else if (enable) {
+		if (WARN_ON(!obj->map_and_fenceable))
+			return -EINVAL;
+
+		reg = i915_find_fence_reg(dev);
+		if (IS_ERR(reg))
+			return PTR_ERR(reg);
+
+		if (reg->obj) {
+			struct drm_i915_gem_object *old = reg->obj;
+
+			ret = i915_gem_object_wait_fence(old);
+			if (ret)
+				return ret;
+
+			i915_gem_object_fence_lost(old);
+		}
+	} else
+		return 0;
+
+	i915_gem_object_update_fence(obj, reg, enable);
+
+	return 0;
+}
+
+/**
+ * i915_gem_object_pin_fence - pin fencing state
+ * @obj: object to pin fencing for
+ *
+ * This pins the fencing state (whether tiled or untiled) to make sure the
+ * object is ready to be used as a scanout target. Fencing status must be
+ * synchronize first by calling i915_gem_object_get_fence():
+ *
+ * The resulting fence pin reference must be released again with
+ * i915_gem_object_unpin_fence().
+ *
+ * Returns:
+ *
+ * True if the object has a fence, false otherwise.
+ */
+bool
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
+
+		WARN_ON(!ggtt_vma ||
+			dev_priv->fence_regs[obj->fence_reg].pin_count >
+			ggtt_vma->pin_count);
+		dev_priv->fence_regs[obj->fence_reg].pin_count++;
+		return true;
+	} else
+		return false;
+}
+
+/**
+ * i915_gem_object_unpin_fence - unpin fencing state
+ * @obj: object to unpin fencing for
+ *
+ * This releases the fence pin reference acquired through
+ * i915_gem_object_pin_fence. It will handle both objects with and without an
+ * attached fence correctly, callers do not need to distinguish this.
+ */
+void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
+		dev_priv->fence_regs[obj->fence_reg].pin_count--;
+	}
+}
+
+/**
+ * i915_gem_restore_fences - restore fence state
+ * @dev: DRM device
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume.
+ */
+void i915_gem_restore_fences(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+
+		/*
+		 * Commit delayed tiling changes if we have an object still
+		 * attached to the fence, otherwise just clear the fence.
+		 */
+		if (reg->obj) {
+			i915_gem_object_update_fence(reg->obj, reg,
+						     reg->obj->tiling_mode);
+		} else {
+			i915_gem_write_fence(dev, i, NULL);
+		}
+	}
+}
+
+/**
+ * DOC: tiling swizzling details
+ *
+ * The idea behind tiling is to increase cache hit rates by rearranging
+ * pixel data so that a group of pixel accesses are in the same cacheline.
+ * Performance improvement from doing this on the back/depth buffer are on
+ * the order of 30%.
+ *
+ * Intel architectures make this somewhat more complicated, though, by
+ * adjustments made to addressing of data when the memory is in interleaved
+ * mode (matched pairs of DIMMS) to improve memory bandwidth.
+ * For interleaved memory, the CPU sends every sequential 64 bytes
+ * to an alternate memory channel so it can get the bandwidth from both.
+ *
+ * The GPU also rearranges its accesses for increased bandwidth to interleaved
+ * memory, and it matches what the CPU does for non-tiled.  However, when tiled
+ * it does it a little differently, since one walks addresses not just in the
+ * X direction but also Y.  So, along with alternating channels when bit
+ * 6 of the address flips, it also alternates when other bits flip --  Bits 9
+ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
+ * are common to both the 915 and 965-class hardware.
+ *
+ * The CPU also sometimes XORs in higher bits as well, to improve
+ * bandwidth doing strided access like we do so frequently in graphics.  This
+ * is called "Channel XOR Randomization" in the MCH documentation.  The result
+ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
+ * decode.
+ *
+ * All of this bit 6 XORing has an effect on our memory management,
+ * as we need to make sure that the 3d driver can correctly address object
+ * contents.
+ *
+ * If we don't have interleaved memory, all tiling is safe and no swizzling is
+ * required.
+ *
+ * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
+ * 17 is not just a page offset, so as we page an objet out and back in,
+ * individual pages in it will have different bit 17 addresses, resulting in
+ * each 64 bytes being swapped with its neighbor!
+ *
+ * Otherwise, if interleaved, we have to tell the 3d driver what the address
+ * swizzling it needs to do is, since it's writing with the CPU to the pages
+ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
+ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
+ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
+ * to match what the GPU expects.
+ */
+
+/**
+ * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @dev: DRM device
+ *
+ * Detects bit 6 swizzling of address lookup between IGD access and CPU
+ * access through main memory.
+ */
+void
+i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+
+	if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
+		/*
+		 * On BDW+, swizzling is not used. We leave the CPU memory
+		 * controller in charge of optimizing memory accesses without
+		 * the extra address manipulation GPU side.
+		 *
+		 * VLV and CHV don't have GPU swizzling.
+		 */
+		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+	} else if (INTEL_INFO(dev)->gen >= 6) {
+		if (dev_priv->preserve_bios_swizzle) {
+			if (I915_READ(DISP_ARB_CTL) &
+			    DISP_TILE_SURFACE_SWIZZLING) {
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+				swizzle_y = I915_BIT_6_SWIZZLE_9;
+			} else {
+				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+			}
+		} else {
+			uint32_t dimm_c0, dimm_c1;
+			dimm_c0 = I915_READ(MAD_DIMM_C0);
+			dimm_c1 = I915_READ(MAD_DIMM_C1);
+			dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+			dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+			/* Enable swizzling when the channels are populated
+			 * with identically sized dimms. We don't need to check
+			 * the 3rd channel because no cpu with gpu attached
+			 * ships in that configuration. Also, swizzling only
+			 * makes sense for 2 channels anyway. */
+			if (dimm_c0 == dimm_c1) {
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+				swizzle_y = I915_BIT_6_SWIZZLE_9;
+			} else {
+				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+			}
+		}
+	} else if (IS_GEN5(dev)) {
+		/* On Ironlake whatever DRAM config, GPU always do
+		 * same swizzling setup.
+		 */
+		swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+		swizzle_y = I915_BIT_6_SWIZZLE_9;
+	} else if (IS_GEN2(dev)) {
+		/* As far as we know, the 865 doesn't have these bit 6
+		 * swizzling issues.
+		 */
+		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+	} else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
+		uint32_t dcc;
+
+		/* On 9xx chipsets, channel interleave by the CPU is
+		 * determined by DCC.  For single-channel, neither the CPU
+		 * nor the GPU do swizzling.  For dual channel interleaved,
+		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+		 * 9 for Y tiled.  The CPU's interleave is independent, and
+		 * can be based on either bit 11 (haven't seen this yet) or
+		 * bit 17 (common).
+		 */
+		dcc = I915_READ(DCC);
+		switch (dcc & DCC_ADDRESSING_MODE_MASK) {
+		case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
+		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+			break;
+		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
+			if (dcc & DCC_CHANNEL_XOR_DISABLE) {
+				/* This is the base swizzling by the GPU for
+				 * tiled buffers.
+				 */
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+				swizzle_y = I915_BIT_6_SWIZZLE_9;
+			} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
+				/* Bit 11 swizzling by the CPU in addition. */
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
+				swizzle_y = I915_BIT_6_SWIZZLE_9_11;
+			} else {
+				/* Bit 17 swizzling by the CPU in addition. */
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+				swizzle_y = I915_BIT_6_SWIZZLE_9_17;
+			}
+			break;
+		}
+
+		/* check for L-shaped memory aka modified enhanced addressing */
+		if (IS_GEN4(dev)) {
+			uint32_t ddc2 = I915_READ(DCC2);
+
+			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
+				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		}
+
+		if (dcc == 0xffffffff) {
+			DRM_ERROR("Couldn't read from MCHBAR.  "
+				  "Disabling tiling.\n");
+			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+		}
+	} else {
+		/* The 965, G33, and newer, have a very flexible memory
+		 * configuration.  It will enable dual-channel mode
+		 * (interleaving) on as much memory as it can, and the GPU
+		 * will additionally sometimes enable different bit 6
+		 * swizzling for tiled objects from the CPU.
+		 *
+		 * Here's what I found on the G965:
+		 *    slot fill         memory size  swizzling
+		 * 0A   0B   1A   1B    1-ch   2-ch
+		 * 512  0    0    0     512    0     O
+		 * 512  0    512  0     16     1008  X
+		 * 512  0    0    512   16     1008  X
+		 * 0    512  0    512   16     1008  X
+		 * 1024 1024 1024 0     2048   1024  O
+		 *
+		 * We could probably detect this based on either the DRB
+		 * matching, which was the case for the swizzling required in
+		 * the table above, or from the 1-ch value being less than
+		 * the minimum size of a rank.
+		 */
+		if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		} else {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		}
+	}
+
+	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
+	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
+}
+
+/*
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static void
+i915_gem_swizzle_page(struct page *page)
+{
+	char temp[64];
+	char *vaddr;
+	int i;
+
+	vaddr = kmap(page);
+
+	for (i = 0; i < PAGE_SIZE; i += 128) {
+		memcpy(temp, &vaddr[i], 64);
+		memcpy(&vaddr[i], &vaddr[i + 64], 64);
+		memcpy(&vaddr[i + 64], temp, 64);
+	}
+
+	kunmap(page);
+}
+
+/**
+ * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ *
+ * This function fixes up the swizzling in case any page frame number for this
+ * object has changed in bit 17 since that state has been saved with
+ * i915_gem_object_save_bit_17_swizzle().
+ *
+ * This is called when pinning backing storage again, since the kernel is free
+ * to move unpinned backing storage around (either by directly moving pages or
+ * by swapping them out and back in again).
+ */
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
+{
+	struct sg_page_iter sg_iter;
+	int i;
+
+	if (obj->bit_17 == NULL)
+		return;
+
+	i = 0;
+	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+		struct page *page = sg_page_iter_page(&sg_iter);
+		char new_bit_17 = page_to_phys(page) >> 17;
+		if ((new_bit_17 & 0x1) !=
+		    (test_bit(i, obj->bit_17) != 0)) {
+			i915_gem_swizzle_page(page);
+			set_page_dirty(page);
+		}
+		i++;
+	}
+}
+
+/**
+ * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ *
+ * This function saves the bit 17 of each page frame number so that swizzling
+ * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
+ * be called before the backing storage can be unpinned.
+ */
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
+{
+	struct sg_page_iter sg_iter;
+	int page_count = obj->base.size >> PAGE_SHIFT;
+	int i;
+
+	if (obj->bit_17 == NULL) {
+		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
+				      sizeof(long), GFP_KERNEL);
+		if (obj->bit_17 == NULL) {
+			DRM_ERROR("Failed to allocate memory for bit 17 "
+				  "record\n");
+			return;
+		}
+	}
+
+	i = 0;
+	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
+			__set_bit(i, obj->bit_17);
+		else
+			__clear_bit(i, obj->bit_17);
+		i++;
+	}
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index a0201fc94d25..5026a6267a88 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -73,6 +73,24 @@ free_gem:
 	return ret;
 }
 
+/*
+ * Macro to add commands to auxiliary batch.
+ * This macro only checks for page overflow before inserting the commands,
+ * this is sufficient as the null state generator makes the final batch
+ * with two passes to build command and state separately. At this point
+ * the size of both are known and it compacts them by relocating the state
+ * right after the commands taking care of aligment so we should sufficient
+ * space below them for adding new commands.
+ */
+#define OUT_BATCH(batch, i, val)				\
+	do {							\
+		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
+			ret = -ENOSPC;				\
+			goto err_out;				\
+		}						\
+		(batch)[(i)++] = (val);				\
+	} while(0)
+
 static int render_state_setup(struct render_state *so)
 {
 	const struct intel_renderstate_rodata *rodata = so->rodata;
@@ -96,8 +114,10 @@ static int render_state_setup(struct render_state *so)
 			s = lower_32_bits(r);
 			if (so->gen >= 8) {
 				if (i + 1 >= rodata->batch_items ||
-				    rodata->batch[i + 1] != 0)
-					return -EINVAL;
+				    rodata->batch[i + 1] != 0) {
+					ret = -EINVAL;
+					goto err_out;
+				}
 
 				d[i++] = s;
 				s = upper_32_bits(r);
@@ -108,6 +128,21 @@ static int render_state_setup(struct render_state *so)
 
 		d[i++] = s;
 	}
+
+	while (i % CACHELINE_DWORDS)
+		OUT_BATCH(d, i, MI_NOOP);
+
+	so->aux_batch_offset = i * sizeof(u32);
+
+	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
+
+	/*
+	 * Since we are sending length, we need to strictly conform to
+	 * all requirements. For Gen2 this must be a multiple of 8.
+	 */
+	so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
+
 	kunmap(page);
 
 	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
@@ -120,8 +155,14 @@ static int render_state_setup(struct render_state *so)
 	}
 
 	return 0;
+
+err_out:
+	kunmap(page);
+	return ret;
 }
 
+#undef OUT_BATCH
+
 void i915_gem_render_state_fini(struct render_state *so)
 {
 	i915_gem_object_ggtt_unpin(so->obj);
@@ -170,6 +211,16 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	if (ret)
 		goto out;
 
+	if (so.aux_batch_size > 8) {
+		ret = req->ring->dispatch_execbuffer(req,
+						     (so.ggtt_offset +
+						      so.aux_batch_offset),
+						     so.aux_batch_size,
+						     I915_DISPATCH_SECURE);
+		if (ret)
+			goto out;
+	}
+
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 7aa73728178a..e641bb093a90 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -37,6 +37,8 @@ struct render_state {
 	struct drm_i915_gem_object *obj;
 	u64 ggtt_offset;
 	int gen;
+	u32 aux_batch_size;
+	u32 aux_batch_offset;
 };
 
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 633bd1fcab69..ac3eb566c9d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -31,201 +31,32 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-/** @file i915_gem_tiling.c
- *
- * Support for managing tiling state of buffer objects.
- *
- * The idea behind tiling is to increase cache hit rates by rearranging
- * pixel data so that a group of pixel accesses are in the same cacheline.
- * Performance improvement from doing this on the back/depth buffer are on
- * the order of 30%.
- *
- * Intel architectures make this somewhat more complicated, though, by
- * adjustments made to addressing of data when the memory is in interleaved
- * mode (matched pairs of DIMMS) to improve memory bandwidth.
- * For interleaved memory, the CPU sends every sequential 64 bytes
- * to an alternate memory channel so it can get the bandwidth from both.
- *
- * The GPU also rearranges its accesses for increased bandwidth to interleaved
- * memory, and it matches what the CPU does for non-tiled.  However, when tiled
- * it does it a little differently, since one walks addresses not just in the
- * X direction but also Y.  So, along with alternating channels when bit
- * 6 of the address flips, it also alternates when other bits flip --  Bits 9
- * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
- * are common to both the 915 and 965-class hardware.
- *
- * The CPU also sometimes XORs in higher bits as well, to improve
- * bandwidth doing strided access like we do so frequently in graphics.  This
- * is called "Channel XOR Randomization" in the MCH documentation.  The result
- * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
- * decode.
+/**
+ * DOC: buffer object tiling
  *
- * All of this bit 6 XORing has an effect on our memory management,
- * as we need to make sure that the 3d driver can correctly address object
- * contents.
+ * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to
+ * declare fence register requirements.
  *
- * If we don't have interleaved memory, all tiling is safe and no swizzling is
- * required.
+ * In principle GEM doesn't care at all about the internal data layout of an
+ * object, and hence it also doesn't care about tiling or swizzling. There's two
+ * exceptions:
  *
- * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
- * 17 is not just a page offset, so as we page an objet out and back in,
- * individual pages in it will have different bit 17 addresses, resulting in
- * each 64 bytes being swapped with its neighbor!
+ * - For X and Y tiling the hardware provides detilers for CPU access, so called
+ *   fences. Since there's only a limited amount of them the kernel must manage
+ *   these, and therefore userspace must tell the kernel the object tiling if it
+ *   wants to use fences for detiling.
+ * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
+ *   depends upon the physical page frame number. When swapping such objects the
+ *   page frame number might change and the kernel must be able to fix this up
+ *   and hence now the tiling. Note that on a subset of platforms with
+ *   asymmetric memory channel population the swizzling pattern changes in an
+ *   unknown way, and for those the kernel simply forbids swapping completely.
  *
- * Otherwise, if interleaved, we have to tell the 3d driver what the address
- * swizzling it needs to do is, since it's writing with the CPU to the pages
- * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
- * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
- * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
- * to match what the GPU expects.
- */
-
-/**
- * Detects bit 6 swizzling of address lookup between IGD access and CPU
- * access through main memory.
+ * Since neither of this applies for new tiling layouts on modern platforms like
+ * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
+ * Anything else can be handled in userspace entirely without the kernel's
+ * invovlement.
  */
-void
-i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-
-	if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
-		/*
-		 * On BDW+, swizzling is not used. We leave the CPU memory
-		 * controller in charge of optimizing memory accesses without
-		 * the extra address manipulation GPU side.
-		 *
-		 * VLV and CHV don't have GPU swizzling.
-		 */
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		if (dev_priv->preserve_bios_swizzle) {
-			if (I915_READ(DISP_ARB_CTL) &
-			    DISP_TILE_SURFACE_SWIZZLING) {
-				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-				swizzle_y = I915_BIT_6_SWIZZLE_9;
-			} else {
-				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-			}
-		} else {
-			uint32_t dimm_c0, dimm_c1;
-			dimm_c0 = I915_READ(MAD_DIMM_C0);
-			dimm_c1 = I915_READ(MAD_DIMM_C1);
-			dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-			dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-			/* Enable swizzling when the channels are populated
-			 * with identically sized dimms. We don't need to check
-			 * the 3rd channel because no cpu with gpu attached
-			 * ships in that configuration. Also, swizzling only
-			 * makes sense for 2 channels anyway. */
-			if (dimm_c0 == dimm_c1) {
-				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-				swizzle_y = I915_BIT_6_SWIZZLE_9;
-			} else {
-				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-			}
-		}
-	} else if (IS_GEN5(dev)) {
-		/* On Ironlake whatever DRAM config, GPU always do
-		 * same swizzling setup.
-		 */
-		swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-		swizzle_y = I915_BIT_6_SWIZZLE_9;
-	} else if (IS_GEN2(dev)) {
-		/* As far as we know, the 865 doesn't have these bit 6
-		 * swizzling issues.
-		 */
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
-		uint32_t dcc;
-
-		/* On 9xx chipsets, channel interleave by the CPU is
-		 * determined by DCC.  For single-channel, neither the CPU
-		 * nor the GPU do swizzling.  For dual channel interleaved,
-		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
-		 * 9 for Y tiled.  The CPU's interleave is independent, and
-		 * can be based on either bit 11 (haven't seen this yet) or
-		 * bit 17 (common).
-		 */
-		dcc = I915_READ(DCC);
-		switch (dcc & DCC_ADDRESSING_MODE_MASK) {
-		case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
-		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
-			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-			break;
-		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
-			if (dcc & DCC_CHANNEL_XOR_DISABLE) {
-				/* This is the base swizzling by the GPU for
-				 * tiled buffers.
-				 */
-				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-				swizzle_y = I915_BIT_6_SWIZZLE_9;
-			} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
-				/* Bit 11 swizzling by the CPU in addition. */
-				swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
-				swizzle_y = I915_BIT_6_SWIZZLE_9_11;
-			} else {
-				/* Bit 17 swizzling by the CPU in addition. */
-				swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
-				swizzle_y = I915_BIT_6_SWIZZLE_9_17;
-			}
-			break;
-		}
-
-		/* check for L-shaped memory aka modified enhanced addressing */
-		if (IS_GEN4(dev)) {
-			uint32_t ddc2 = I915_READ(DCC2);
-
-			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
-				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
-		}
-
-		if (dcc == 0xffffffff) {
-			DRM_ERROR("Couldn't read from MCHBAR.  "
-				  "Disabling tiling.\n");
-			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-		}
-	} else {
-		/* The 965, G33, and newer, have a very flexible memory
-		 * configuration.  It will enable dual-channel mode
-		 * (interleaving) on as much memory as it can, and the GPU
-		 * will additionally sometimes enable different bit 6
-		 * swizzling for tiled objects from the CPU.
-		 *
-		 * Here's what I found on the G965:
-		 *    slot fill         memory size  swizzling
-		 * 0A   0B   1A   1B    1-ch   2-ch
-		 * 512  0    0    0     512    0     O
-		 * 512  0    512  0     16     1008  X
-		 * 512  0    0    512   16     1008  X
-		 * 0    512  0    512   16     1008  X
-		 * 1024 1024 1024 0     2048   1024  O
-		 *
-		 * We could probably detect this based on either the DRB
-		 * matching, which was the case for the swizzling required in
-		 * the table above, or from the 1-ch value being less than
-		 * the minimum size of a rank.
-		 */
-		if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
-			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-		} else {
-			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-			swizzle_y = I915_BIT_6_SWIZZLE_9;
-		}
-	}
-
-	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
-	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
-}
 
 /* Check pitch constriants for all chips & tiling formats */
 static bool
@@ -313,8 +144,18 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
 }
 
 /**
+ * i915_gem_set_tiling - IOCTL handler to set tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
  * Sets the tiling mode of an object, returning the required swizzling of
  * bit 6 of addresses in the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
  */
 int
 i915_gem_set_tiling(struct drm_device *dev, void *data,
@@ -432,7 +273,17 @@ err:
 }
 
 /**
+ * i915_gem_get_tiling - IOCTL handler to get tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
  * Returns the current tiling mode and required bit 6 swizzling for the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
  */
 int
 i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -475,75 +326,3 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 
 	return 0;
 }
-
-/**
- * Swap every 64 bytes of this page around, to account for it having a new
- * bit 17 of its physical address and therefore being interpreted differently
- * by the GPU.
- */
-static void
-i915_gem_swizzle_page(struct page *page)
-{
-	char temp[64];
-	char *vaddr;
-	int i;
-
-	vaddr = kmap(page);
-
-	for (i = 0; i < PAGE_SIZE; i += 128) {
-		memcpy(temp, &vaddr[i], 64);
-		memcpy(&vaddr[i], &vaddr[i + 64], 64);
-		memcpy(&vaddr[i + 64], temp, 64);
-	}
-
-	kunmap(page);
-}
-
-void
-i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
-{
-	struct sg_page_iter sg_iter;
-	int i;
-
-	if (obj->bit_17 == NULL)
-		return;
-
-	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		struct page *page = sg_page_iter_page(&sg_iter);
-		char new_bit_17 = page_to_phys(page) >> 17;
-		if ((new_bit_17 & 0x1) !=
-		    (test_bit(i, obj->bit_17) != 0)) {
-			i915_gem_swizzle_page(page);
-			set_page_dirty(page);
-		}
-		i++;
-	}
-}
-
-void
-i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
-{
-	struct sg_page_iter sg_iter;
-	int page_count = obj->base.size >> PAGE_SHIFT;
-	int i;
-
-	if (obj->bit_17 == NULL) {
-		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
-				      sizeof(long), GFP_KERNEL);
-		if (obj->bit_17 == NULL) {
-			DRM_ERROR("Failed to allocate memory for bit 17 "
-				  "record\n");
-			return;
-		}
-	}
-
-	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
-			__set_bit(i, obj->bit_17);
-		else
-			__clear_bit(i, obj->bit_17);
-		i++;
-	}
-}
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
new file mode 100644
index 000000000000..ccdc6c8ac20b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#ifndef _I915_GUC_REG_H_
+#define _I915_GUC_REG_H_
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS			0xc000
+#define   GS_BOOTROM_SHIFT		1
+#define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
+#define   GS_UKERNEL_SHIFT		8
+#define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_DPC_ERROR		  (0x60 << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
+#define   GS_MIA_SHIFT			16
+#define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
+
+#define GUC_WOPCM_SIZE			0xc050
+#define   GUC_WOPCM_SIZE_VALUE  	  (0x80 << 12)	/* 512KB */
+#define GUC_WOPCM_OFFSET		0x80000		/* 512KB */
+
+#define SOFT_SCRATCH(n)			(0xc180 + ((n) * 4))
+
+#define UOS_RSA_SCRATCH_0		0xc200
+#define DMA_ADDR_0_LOW			0xc300
+#define DMA_ADDR_0_HIGH			0xc304
+#define DMA_ADDR_1_LOW			0xc308
+#define DMA_ADDR_1_HIGH			0xc30c
+#define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
+#define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
+#define DMA_COPY_SIZE			0xc310
+#define DMA_CTRL			0xc314
+#define   UOS_MOVE			  (1<<4)
+#define   START_DMA			  (1<<0)
+#define DMA_GUC_WOPCM_OFFSET		0xc340
+
+#define GEN8_GT_PM_CONFIG		0x138140
+#define GEN9_GT_PM_CONFIG		0x13816c
+#define   GEN8_GT_DOORBELL_ENABLE	  (1<<0)
+
+#define GEN8_GTCR			0x4274
+#define   GEN8_GTCR_INVALIDATE		  (1<<0)
+
+#define GUC_ARAT_C6DIS			0xA178
+
+#define GUC_SHIM_CONTROL		0xc064
+#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
+#define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
+#define   GUC_ENABLE_MIA_CACHING		(1<<2)
+#define   GUC_GEN10_MSGCH_ENABLE		(1<<4)
+#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	(1<<9)
+#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	(1<<10)
+#define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
+#define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
+
+#define GUC_SHIM_CONTROL_VALUE	(GUC_DISABLE_SRAM_INIT_TO_ZEROES	| \
+				 GUC_ENABLE_READ_CACHE_LOGIC		| \
+				 GUC_ENABLE_MIA_CACHING			| \
+				 GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA	| \
+				 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA)
+
+#define HOST2GUC_INTERRUPT		0xc4c8
+#define   HOST2GUC_TRIGGER		  (1<<0)
+
+#define DRBMISC1			0x1984
+#define   DOORBELL_ENABLE		  (1<<0)
+
+#define GEN8_DRBREGL(x)			(0x1000 + (x) * 8)
+#define   GEN8_DRB_VALID		  (1<<0)
+#define GEN8_DRBREGU(x)			(GEN8_DRBREGL(x) + 4)
+
+#define DE_GUCRMR			0x44054
+
+#define GUC_BCS_RCS_IER			0xC550
+#define GUC_VCS2_VCS1_IER		0xC554
+#define GUC_WD_VECS_IER			0xC558
+#define GUC_PM_P24C_IER			0xC55C
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d87f173a0179..1118c39281f9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1227,6 +1227,22 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
+static bool bxt_port_hotplug_long_detect(enum port port, u32 val)
+{
+	switch (port) {
+	case PORT_A:
+		return val & BXT_PORTA_HOTPLUG_LONG_DETECT;
+	case PORT_B:
+		return val & PORTB_HOTPLUG_LONG_DETECT;
+	case PORT_C:
+		return val & PORTC_HOTPLUG_LONG_DETECT;
+	case PORT_D:
+		return val & PORTD_HOTPLUG_LONG_DETECT;
+	default:
+		return false;
+	}
+}
+
 static bool pch_port_hotplug_long_detect(enum port port, u32 val)
 {
 	switch (port) {
@@ -1256,9 +1272,10 @@ static bool i9xx_port_hotplug_long_detect(enum port port, u32 val)
 }
 
 /* Get a bit mask of pins that have triggered, and which ones may be long. */
-static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
+static void intel_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
 			     u32 hotplug_trigger, u32 dig_hotplug_reg,
-			     const u32 hpd[HPD_NUM_PINS])
+			     const u32 hpd[HPD_NUM_PINS],
+			     bool long_pulse_detect(enum port port, u32 val))
 {
 	enum port port;
 	int i;
@@ -1272,8 +1289,10 @@ static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
 
 		*pin_mask |= BIT(i);
 
-		port = intel_hpd_pin_to_port(i);
-		if (pch_port_hotplug_long_detect(port, dig_hotplug_reg))
+		if (!intel_hpd_pin_to_port(i, &port))
+			continue;
+
+		if (long_pulse_detect(port, dig_hotplug_reg))
 			*long_mask |= BIT(i);
 	}
 
@@ -1282,34 +1301,6 @@ static void pch_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
 
 }
 
-/* Get a bit mask of pins that have triggered, and which ones may be long. */
-static void i9xx_get_hpd_pins(u32 *pin_mask, u32 *long_mask,
-			      u32 hotplug_trigger, const u32 hpd[HPD_NUM_PINS])
-{
-	enum port port;
-	int i;
-
-	*pin_mask = 0;
-	*long_mask = 0;
-
-	if (!hotplug_trigger)
-		return;
-
-	for_each_hpd_pin(i) {
-		if ((hpd[i] & hotplug_trigger) == 0)
-			continue;
-
-		*pin_mask |= BIT(i);
-
-		port = intel_hpd_pin_to_port(i);
-		if (i9xx_port_hotplug_long_detect(port, hotplug_trigger))
-			*long_mask |= BIT(i);
-	}
-
-	DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x, pins 0x%08x\n",
-			 hotplug_trigger, *pin_mask);
-}
-
 static void gmbus_irq_handler(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1547,7 +1538,9 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
 	if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) {
 		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
 
-		i9xx_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, hpd_status_g4x);
+		intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+				   hotplug_trigger, hpd_status_g4x,
+				   i9xx_port_hotplug_long_detect);
 		intel_hpd_irq_handler(dev, pin_mask, long_mask);
 
 		if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X)
@@ -1555,7 +1548,9 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
 	} else {
 		u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
 
-		i9xx_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger, hpd_status_i915);
+		intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+				   hotplug_trigger, hpd_status_g4x,
+				   i9xx_port_hotplug_long_detect);
 		intel_hpd_irq_handler(dev, pin_mask, long_mask);
 	}
 }
@@ -1662,8 +1657,9 @@ static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
 		dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
 		I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
 
-		pch_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
-				 dig_hotplug_reg, hpd_ibx);
+		intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+				   dig_hotplug_reg, hpd_ibx,
+				   pch_port_hotplug_long_detect);
 		intel_hpd_irq_handler(dev, pin_mask, long_mask);
 	}
 
@@ -1763,8 +1759,10 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
 
 		dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
 		I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
-		pch_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
-				 dig_hotplug_reg, hpd_cpt);
+
+		intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
+				   dig_hotplug_reg, hpd_cpt,
+				   pch_port_hotplug_long_detect);
 		intel_hpd_irq_handler(dev, pin_mask, long_mask);
 	}
 
@@ -1981,7 +1979,8 @@ static void bxt_hpd_handler(struct drm_device *dev, uint32_t iir_status)
 	/* Clear sticky bits in hpd status */
 	I915_WRITE(BXT_HOTPLUG_CTL, hp_control);
 
-	pch_get_hpd_pins(&pin_mask, &long_mask, hp_trigger, hp_control, hpd_bxt);
+	intel_get_hpd_pins(&pin_mask, &long_mask, hp_trigger, hp_control,
+			   hpd_bxt, bxt_port_hotplug_long_detect);
 	intel_hpd_irq_handler(dev, pin_mask, long_mask);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 5f4e7295295f..5ae4b0aba564 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -52,6 +52,8 @@ struct i915_params i915 __read_mostly = {
 	.mmio_debug = 0,
 	.verbose_state_checks = 1,
 	.edp_vswing = 0,
+	.enable_guc_submission = false,
+	.guc_log_level = -1,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -181,3 +183,10 @@ MODULE_PARM_DESC(edp_vswing,
 		 "Ignore/Override vswing pre-emph table selection from VBT "
 		 "(0=use value from vbt [default], 1=low power swing(200mV),"
 		 "2=default swing(400mV))");
+
+module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400);
+MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)");
+
+module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
+MODULE_PARM_DESC(guc_log_level,
+	"GuC firmware logging level (-1:disabled (default), 0-3:enabled)");
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e9a95df639f0..0f67f3da0762 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5985,6 +5985,11 @@ enum skl_disp_power_wells {
 
 /* digital port hotplug */
 #define PCH_PORT_HOTPLUG        0xc4030		/* SHOTPLUG_CTL */
+#define BXT_PORTA_HOTPLUG_ENABLE	(1 << 28)
+#define BXT_PORTA_HOTPLUG_STATUS_MASK	(0x3 << 24)
+#define  BXT_PORTA_HOTPLUG_NO_DETECT	(0 << 24)
+#define  BXT_PORTA_HOTPLUG_SHORT_DETECT	(1 << 24)
+#define  BXT_PORTA_HOTPLUG_LONG_DETECT	(2 << 24)
 #define PORTD_HOTPLUG_ENABLE            (1 << 20)
 #define PORTD_PULSE_DURATION_2ms        (0)
 #define PORTD_PULSE_DURATION_4_5ms      (1 << 18)
@@ -6846,6 +6851,9 @@ enum skl_disp_power_wells {
 #define GEN7_MISCCPCTL			(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE	(1<<0)
 
+#define GEN8_GARBCNTL                   0xB004
+#define   GEN9_GAPS_TSV_CREDIT_DISABLE  (1<<7)
+
 /* IVYBRIDGE DPF */
 #define GEN7_L3CDERRST1			0xB008 /* L3CD Error Status 1 */
 #define HSW_L3CDERRST11			0xB208 /* L3CD Error Status register 1 slice 1 */
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 6d8a7bf06dfc..ba1ae031e6fd 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
 void intel_csr_load_program(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	__be32 *payload = dev_priv->csr.dmc_payload;
+	u32 *payload = dev_priv->csr.dmc_payload;
 	uint32_t i, fw_size;
 
 	if (!IS_GEN9(dev)) {
@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
 	fw_size = dev_priv->csr.dmc_fw_size;
 	for (i = 0; i < fw_size; i++)
 		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
-			(u32 __force)payload[i]);
+			payload[i]);
 
 	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
 	char substepping = intel_get_substepping(dev);
 	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
 	uint32_t i;
-	__be32 *dmc_payload;
+	uint32_t *dmc_payload;
 	bool fw_loaded = false;
 
 	if (!fw) {
@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
 	}
 
 	dmc_payload = csr->dmc_payload;
-	for (i = 0; i < dmc_header->fw_size; i++) {
-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
-		/*
-		 * The firmware payload is an array of 32 bit words stored in
-		 * little-endian format in the firmware image and programmed
-		 * as 32 bit big-endian format to memory.
-		 */
-		dmc_payload[i] = cpu_to_be32(*tmp);
-	}
+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
 
 	/* load csr program during system boot, as needed for DC states */
 	intel_csr_load_program(dev);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 43b0f17ad1fa..a76751633af6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1936,7 +1936,9 @@ static void intel_disable_shared_dpll(struct intel_crtc *crtc)
 	struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
 
 	/* PCH only available on ILK+ */
-	BUG_ON(INTEL_INFO(dev)->gen < 5);
+	if (INTEL_INFO(dev)->gen < 5)
+		return;
+
 	if (pll == NULL)
 		return;
 
@@ -10764,15 +10766,12 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 	struct intel_crtc *crtc = to_intel_crtc(work->crtc);
 	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_plane *primary = crtc->base.primary;
 
 	mutex_lock(&dev->struct_mutex);
 	intel_unpin_fb_obj(work->old_fb, primary->state);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 
-	intel_fbc_update(dev_priv);
-
 	if (work->flip_queued_req)
 		i915_gem_request_assign(&work->flip_queued_req, NULL);
 	mutex_unlock(&dev->struct_mutex);
@@ -11544,7 +11543,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_fbc_disable(dev_priv);
+	intel_fbc_disable_crtc(intel_crtc);
 	intel_frontbuffer_flip_prepare(dev,
 				       to_intel_plane(primary)->frontbuffer_bit);
 
@@ -14271,7 +14270,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct drm_i915_gem_object *obj = intel_fb->obj;
 
 	mutex_lock(&dev->struct_mutex);
-	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
+	intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index f1b9f939b435..df7e2cfef38d 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1409,7 +1409,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	 * bpc in between. */
 	bpp = pipe_config->pipe_bpp;
 	if (is_edp(intel_dp)) {
-		if (dev_priv->vbt.edp_bpp && dev_priv->vbt.edp_bpp < bpp) {
+
+		/* Get bpp from vbt only for panels that dont have bpp in edid */
+		if (intel_connector->base.display_info.bpc == 0 &&
+			(dev_priv->vbt.edp_bpp && dev_priv->vbt.edp_bpp < bpp)) {
 			DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n",
 				      dev_priv->vbt.edp_bpp);
 			bpp = dev_priv->vbt.edp_bpp;
@@ -3958,43 +3961,67 @@ intel_dp_probe_mst(struct intel_dp *intel_dp)
 	return intel_dp->is_mst;
 }
 
-int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
+static void intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = intel_dig_port->base.base.dev;
-	struct intel_crtc *intel_crtc =
-		to_intel_crtc(intel_dig_port->base.base.crtc);
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
 	u8 buf;
-	int test_crc_count;
-	int attempts = 6;
-	int ret = 0;
 
-	hsw_disable_ips(intel_crtc);
-
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
-		ret = -EIO;
-		goto out;
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+		DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
+		return;
 	}
 
-	if (!(buf & DP_TEST_CRC_SUPPORTED)) {
-		ret = -ENOTTY;
-		goto out;
-	}
+	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
+			       buf & ~DP_TEST_SINK_START) < 0)
+		DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
 
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
-		ret = -EIO;
-		goto out;
-	}
+	hsw_enable_ips(intel_crtc);
+}
+
+static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
+	u8 buf;
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
+		return -EIO;
+
+	if (!(buf & DP_TEST_CRC_SUPPORTED))
+		return -ENOTTY;
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
+		return -EIO;
+
+	hsw_disable_ips(intel_crtc);
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-				buf | DP_TEST_SINK_START) < 0) {
-		ret = -EIO;
-		goto out;
+			       buf | DP_TEST_SINK_START) < 0) {
+		hsw_enable_ips(intel_crtc);
+		return -EIO;
 	}
 
+	return 0;
+}
+
+int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
+	u8 buf;
+	int test_crc_count;
+	int attempts = 6;
+	int ret;
+
+	ret = intel_dp_sink_crc_start(intel_dp);
+	if (ret)
+		return ret;
+
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
 		ret = -EIO;
-		goto out;
+		goto stop;
 	}
 
 	test_crc_count = buf & DP_TEST_COUNT_MASK;
@@ -4003,7 +4030,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
 		if (drm_dp_dpcd_readb(&intel_dp->aux,
 				      DP_TEST_SINK_MISC, &buf) < 0) {
 			ret = -EIO;
-			goto out;
+			goto stop;
 		}
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 	} while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count);
@@ -4011,25 +4038,13 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
 	if (attempts == 0) {
 		DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n");
 		ret = -ETIMEDOUT;
-		goto out;
-	}
-
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
-		ret = -EIO;
-		goto out;
+		goto stop;
 	}
 
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0)
 		ret = -EIO;
-		goto out;
-	}
-	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
-			       buf & ~DP_TEST_SINK_START) < 0) {
-		ret = -EIO;
-		goto out;
-	}
-out:
-	hsw_enable_ips(intel_crtc);
+stop:
+	intel_dp_sink_crc_stop(intel_dp);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 320c9e6bd848..34ad042f18b5 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1243,7 +1243,7 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned int frontbuffer_bits,
 			  enum fb_op_origin origin);
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
-		     unsigned int frontbuffer_bits);
+		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
 const char *intel_no_fbc_reason_str(enum no_fbc_reason reason);
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index c271af767981..1f97fb548c2a 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -884,22 +884,23 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 }
 
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
-		     unsigned int frontbuffer_bits)
+		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
 {
 	if (!dev_priv->fbc.enable_fbc)
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
+	if (origin == ORIGIN_GTT)
+		return;
 
-	if (!dev_priv->fbc.busy_bits)
-		goto out;
+	mutex_lock(&dev_priv->fbc.lock);
 
 	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
 
-	if (!dev_priv->fbc.busy_bits)
+	if (!dev_priv->fbc.busy_bits) {
+		__intel_fbc_disable(dev_priv);
 		__intel_fbc_update(dev_priv);
+	}
 
-out:
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 777b1d3ccd41..ac85357010b4 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -129,7 +129,7 @@ static void intel_frontbuffer_flush(struct drm_device *dev,
 
 	intel_edp_drrs_flush(dev, frontbuffer_bits);
 	intel_psr_flush(dev, frontbuffer_bits, origin);
-	intel_fbc_flush(dev_priv, frontbuffer_bits);
+	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
new file mode 100644
index 000000000000..18d7f20936c8
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef _INTEL_GUC_FWIF_H
+#define _INTEL_GUC_FWIF_H
+
+/*
+ * This file is partially autogenerated, although currently with some manual
+ * fixups afterwards. In future, it should be entirely autogenerated, in order
+ * to ensure that the definitions herein remain in sync with those used by the
+ * GuC's own firmware.
+ *
+ * EDITING THIS FILE IS THEREFORE NOT RECOMMENDED - YOUR CHANGES MAY BE LOST.
+ */
+
+#define GFXCORE_FAMILY_GEN8		11
+#define GFXCORE_FAMILY_GEN9		12
+#define GFXCORE_FAMILY_FORCE_ULONG	0x7fffffff
+
+#define GUC_CTX_PRIORITY_CRITICAL	0
+#define GUC_CTX_PRIORITY_HIGH		1
+#define GUC_CTX_PRIORITY_NORMAL		2
+#define GUC_CTX_PRIORITY_LOW		3
+
+#define GUC_MAX_GPU_CONTEXTS		1024
+#define	GUC_INVALID_CTX_ID		(GUC_MAX_GPU_CONTEXTS + 1)
+
+/* Work queue item header definitions */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_SHIFT			0
+#define   WQ_TYPE_BATCH_BUF		(0x1 << WQ_TYPE_SHIFT)
+#define   WQ_TYPE_PSEUDO		(0x2 << WQ_TYPE_SHIFT)
+#define   WQ_TYPE_INORDER		(0x3 << WQ_TYPE_SHIFT)
+#define WQ_TARGET_SHIFT			10
+#define WQ_LEN_SHIFT			16
+#define WQ_NO_WCFLUSH_WAIT		(1 << 27)
+#define WQ_PRESENT_WORKLOAD		(1 << 28)
+#define WQ_WORKLOAD_SHIFT		29
+#define   WQ_WORKLOAD_GENERAL		(0 << WQ_WORKLOAD_SHIFT)
+#define   WQ_WORKLOAD_GPGPU		(1 << WQ_WORKLOAD_SHIFT)
+#define   WQ_WORKLOAD_TOUCH		(2 << WQ_WORKLOAD_SHIFT)
+
+#define WQ_RING_TAIL_SHIFT		20
+#define WQ_RING_TAIL_MASK		(0x7FF << WQ_RING_TAIL_SHIFT)
+
+#define GUC_DOORBELL_ENABLED		1
+#define GUC_DOORBELL_DISABLED		0
+
+#define GUC_CTX_DESC_ATTR_ACTIVE	(1 << 0)
+#define GUC_CTX_DESC_ATTR_PENDING_DB	(1 << 1)
+#define GUC_CTX_DESC_ATTR_KERNEL	(1 << 2)
+#define GUC_CTX_DESC_ATTR_PREEMPT	(1 << 3)
+#define GUC_CTX_DESC_ATTR_RESET		(1 << 4)
+#define GUC_CTX_DESC_ATTR_WQLOCKED	(1 << 5)
+#define GUC_CTX_DESC_ATTR_PCH		(1 << 6)
+
+/* The guc control data is 10 DWORDs */
+#define GUC_CTL_CTXINFO			0
+#define   GUC_CTL_CTXNUM_IN16_SHIFT	0
+#define   GUC_CTL_BASE_ADDR_SHIFT	12
+#define GUC_CTL_ARAT_HIGH		1
+#define GUC_CTL_ARAT_LOW		2
+#define GUC_CTL_DEVICE_INFO		3
+#define   GUC_CTL_GTTYPE_SHIFT		0
+#define   GUC_CTL_COREFAMILY_SHIFT	7
+#define GUC_CTL_LOG_PARAMS		4
+#define   GUC_LOG_VALID			(1 << 0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	(1 << 1)
+#define   GUC_LOG_ALLOC_IN_MEGABYTE	(1 << 3)
+#define   GUC_LOG_CRASH_PAGES		1
+#define   GUC_LOG_CRASH_SHIFT		4
+#define   GUC_LOG_DPC_PAGES		3
+#define   GUC_LOG_DPC_SHIFT		6
+#define   GUC_LOG_ISR_PAGES		3
+#define   GUC_LOG_ISR_SHIFT		9
+#define   GUC_LOG_BUF_ADDR_SHIFT	12
+#define GUC_CTL_PAGE_FAULT_CONTROL	5
+#define GUC_CTL_WA			6
+#define   GUC_CTL_WA_UK_BY_DRIVER	(1 << 3)
+#define GUC_CTL_FEATURE			7
+#define   GUC_CTL_VCS2_ENABLED		(1 << 0)
+#define   GUC_CTL_KERNEL_SUBMISSIONS	(1 << 1)
+#define   GUC_CTL_FEATURE2		(1 << 2)
+#define   GUC_CTL_POWER_GATING		(1 << 3)
+#define   GUC_CTL_DISABLE_SCHEDULER	(1 << 4)
+#define   GUC_CTL_PREEMPTION_LOG	(1 << 5)
+#define   GUC_CTL_ENABLE_SLPC		(1 << 7)
+#define GUC_CTL_DEBUG			8
+#define   GUC_LOG_VERBOSITY_SHIFT	0
+#define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_HIGH	(2 << GUC_LOG_VERBOSITY_SHIFT)
+#define   GUC_LOG_VERBOSITY_ULTRA	(3 << GUC_LOG_VERBOSITY_SHIFT)
+/* Verbosity range-check limits, without the shift */
+#define	  GUC_LOG_VERBOSITY_MIN		0
+#define	  GUC_LOG_VERBOSITY_MAX		3
+
+#define GUC_CTL_MAX_DWORDS		(GUC_CTL_DEBUG + 1)
+
+struct guc_doorbell_info {
+	u32 db_status;
+	u32 cookie;
+	u32 reserved[14];
+} __packed;
+
+union guc_doorbell_qw {
+	struct {
+		u32 db_status;
+		u32 cookie;
+	};
+	u64 value_qw;
+} __packed;
+
+#define GUC_MAX_DOORBELLS		256
+#define GUC_INVALID_DOORBELL_ID		(GUC_MAX_DOORBELLS)
+
+#define GUC_DB_SIZE			(PAGE_SIZE)
+#define GUC_WQ_SIZE			(PAGE_SIZE * 2)
+
+/* Work item for submitting workloads into work queue of GuC. */
+struct guc_wq_item {
+	u32 header;
+	u32 context_desc;
+	u32 ring_tail;
+	u32 fence_id;
+} __packed;
+
+struct guc_process_desc {
+	u32 context_id;
+	u64 db_base_addr;
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u64 wq_base_addr;
+	u32 wq_size_bytes;
+	u32 wq_status;
+	u32 engine_presence;
+	u32 priority;
+	u32 reserved[30];
+} __packed;
+
+/* engine id and context id is packed into guc_execlist_context.context_id*/
+#define GUC_ELC_CTXID_OFFSET		0
+#define GUC_ELC_ENGINE_OFFSET		29
+
+/* The execlist context including software and HW information */
+struct guc_execlist_context {
+	u32 context_desc;
+	u32 context_id;
+	u32 ring_status;
+	u32 ring_lcra;
+	u32 ring_begin;
+	u32 ring_end;
+	u32 ring_next_free_location;
+	u32 ring_current_tail_pointer_value;
+	u8 engine_state_submit_value;
+	u8 engine_state_wait_value;
+	u16 pagefault_count;
+	u16 engine_submit_queue_count;
+} __packed;
+
+/*Context descriptor for communicating between uKernel and Driver*/
+struct guc_context_desc {
+	u32 sched_common_area;
+	u32 context_id;
+	u32 pas_id;
+	u8 engines_used;
+	u64 db_trigger_cpu;
+	u32 db_trigger_uk;
+	u64 db_trigger_phy;
+	u16 db_id;
+
+	struct guc_execlist_context lrc[I915_NUM_RINGS];
+
+	u8 attribute;
+
+	u32 priority;
+
+	u32 wq_sampled_tail_offset;
+	u32 wq_total_submit_enqueues;
+
+	u32 process_desc;
+	u32 wq_addr;
+	u32 wq_size;
+
+	u32 engine_presence;
+
+	u32 reserved0[1];
+	u64 reserved1[1];
+
+	u64 desc_private;
+} __packed;
+
+/* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */
+enum host2guc_action {
+	HOST2GUC_ACTION_DEFAULT = 0x0,
+	HOST2GUC_ACTION_SAMPLE_FORCEWAKE = 0x6,
+	HOST2GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+	HOST2GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+	HOST2GUC_ACTION_SLPC_REQUEST = 0x3003,
+	HOST2GUC_ACTION_LIMIT
+};
+
+/*
+ * The GuC sends its response to a command by overwriting the
+ * command in SS0. The response is distinguishable from a command
+ * by the fact that all the MASK bits are set. The remaining bits
+ * give more detail.
+ */
+#define	GUC2HOST_RESPONSE_MASK		((u32)0xF0000000)
+#define	GUC2HOST_IS_RESPONSE(x) 	((u32)(x) >= GUC2HOST_RESPONSE_MASK)
+#define	GUC2HOST_STATUS(x)		(GUC2HOST_RESPONSE_MASK | (x))
+
+/* GUC will return status back to SOFT_SCRATCH_O_REG */
+enum guc2host_status {
+	GUC2HOST_STATUS_SUCCESS = GUC2HOST_STATUS(0x0),
+	GUC2HOST_STATUS_ALLOCATE_DOORBELL_FAIL = GUC2HOST_STATUS(0x10),
+	GUC2HOST_STATUS_DEALLOCATE_DOORBELL_FAIL = GUC2HOST_STATUS(0x20),
+	GUC2HOST_STATUS_GENERIC_FAIL = GUC2HOST_STATUS(0x0000F000)
+};
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 3c9171f11531..032a0bf75f3b 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -76,17 +76,23 @@
  * it will use i915_hotplug_work_func where this logic is handled.
  */
 
-enum port intel_hpd_pin_to_port(enum hpd_pin pin)
+bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port)
 {
 	switch (pin) {
+	case HPD_PORT_A:
+		*port = PORT_A;
+		return true;
 	case HPD_PORT_B:
-		return PORT_B;
+		*port = PORT_B;
+		return true;
 	case HPD_PORT_C:
-		return PORT_C;
+		*port = PORT_C;
+		return true;
 	case HPD_PORT_D:
-		return PORT_D;
+		*port = PORT_D;
+		return true;
 	default:
-		return PORT_A; /* no hpd */
+		return false;	/* no hpd */
 	}
 }
 
@@ -369,8 +375,8 @@ void intel_hpd_irq_handler(struct drm_device *dev,
 		if (!(BIT(i) & pin_mask))
 			continue;
 
-		port = intel_hpd_pin_to_port(i);
-		is_dig_port = port && dev_priv->hotplug.irq_port[port];
+		is_dig_port = intel_hpd_pin_to_port(i, &port) &&
+			      dev_priv->hotplug.irq_port[port];
 
 		if (is_dig_port) {
 			bool long_hpd = long_mask & BIT(i);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9faad82c42ec..99bba8ece464 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1740,6 +1740,12 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
 	if (ret)
 		goto out;
 
+	ret = req->ring->emit_bb_start(req,
+				       (so.ggtt_offset + so.aux_batch_offset),
+				       I915_DISPATCH_SECURE);
+	if (ret)
+		goto out;
+
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 5004c4a46a9e..fff0c22682ee 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -102,6 +102,12 @@ static void skl_init_clock_gating(struct drm_device *dev)
 		/* WaDisableLSQCROPERFforOCL:skl */
 		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
 			   GEN8_LQSC_RO_PERF_DIS);
+
+	/* WaEnableGapsTsvCreditFix:skl */
+	if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
+		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+					   GEN9_GAPS_TSV_CREDIT_DISABLE));
+	}
 }
 
 static void bxt_init_clock_gating(struct drm_device *dev)
@@ -4266,7 +4272,7 @@ static void ironlake_enable_drps(struct drm_device *dev)
 
 	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
 		DRM_ERROR("stuck trying to change perf mode\n");
-	msleep(1);
+	mdelay(1);
 
 	ironlake_set_drps(dev, fstart);
 
@@ -4297,10 +4303,10 @@ static void ironlake_disable_drps(struct drm_device *dev)
 
 	/* Go back to the starting frequency */
 	ironlake_set_drps(dev, dev_priv->ips.fstart);
-	msleep(1);
+	mdelay(1);
 	rgvswctl |= MEMCTL_CMD_STS;
 	I915_WRITE(MEMSWCTL, rgvswctl);
-	msleep(1);
+	mdelay(1);
 
 	spin_unlock_irq(&mchdev_lock);
 }
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index acd8ec859f71..a04b4dc5ed9b 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -698,6 +698,7 @@ void intel_psr_flush(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	enum pipe pipe;
+	int delay_ms = HAS_DDI(dev) ? 100 : 500;
 
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
@@ -733,7 +734,7 @@ void intel_psr_flush(struct drm_device *dev,
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
 		schedule_delayed_work(&dev_priv->psr.work,
-				      msecs_to_jiffies(100));
+				      msecs_to_jiffies(delay_ms));
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 177f7ed16cf0..1c14233d179f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1041,13 +1041,6 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-	    INTEL_REVID(dev) == SKL_REVID_D0)
-		/* WaBarrierPerformanceFixDisable:skl */
-		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-				  HDC_FENCE_DEST_SLM_DISABLE |
-				  HDC_BARRIER_PERFORMANCE_DISABLE);
-
 	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
 		/*
 		 *Use Force Non-Coherent whenever executing a 3D context. This
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 6393b76f87ff..821644d1b544 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,6 +68,22 @@
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 				    int power_well_id);
 
+static void intel_power_well_enable(struct drm_i915_private *dev_priv,
+				    struct i915_power_well *power_well)
+{
+	DRM_DEBUG_KMS("enabling %s\n", power_well->name);
+	power_well->ops->enable(dev_priv, power_well);
+	power_well->hw_enabled = true;
+}
+
+static void intel_power_well_disable(struct drm_i915_private *dev_priv,
+				     struct i915_power_well *power_well)
+{
+	DRM_DEBUG_KMS("disabling %s\n", power_well->name);
+	power_well->hw_enabled = false;
+	power_well->ops->disable(dev_priv, power_well);
+}
+
 /*
  * We should only use the power well if we explicitly asked the hardware to
  * enable it, so check if it's enabled and also check if we've requested it to
@@ -1104,11 +1120,8 @@ void intel_display_power_get(struct drm_i915_private *dev_priv,
 	mutex_lock(&power_domains->lock);
 
 	for_each_power_well(i, power_well, BIT(domain), power_domains) {
-		if (!power_well->count++) {
-			DRM_DEBUG_KMS("enabling %s\n", power_well->name);
-			power_well->ops->enable(dev_priv, power_well);
-			power_well->hw_enabled = true;
-		}
+		if (!power_well->count++)
+			intel_power_well_enable(dev_priv, power_well);
 	}
 
 	power_domains->domain_use_count[domain]++;
@@ -1142,11 +1155,8 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
 		WARN_ON(!power_well->count);
 
-		if (!--power_well->count && i915.disable_power_well) {
-			DRM_DEBUG_KMS("disabling %s\n", power_well->name);
-			power_well->hw_enabled = false;
-			power_well->ops->disable(dev_priv, power_well);
-		}
+		if (!--power_well->count && i915.disable_power_well)
+			intel_power_well_disable(dev_priv, power_well);
 	}
 
 	mutex_unlock(&power_domains->lock);