summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c550
1 files changed, 443 insertions, 107 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 004074936300..050b8ae7b8e7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
@@ -208,6 +209,7 @@
#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_rc6.h"
#include "gt/intel_ring.h"
#include "gt/uc/intel_guc_slpc.h"
@@ -326,6 +328,12 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
+ [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
+ [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+ [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};
#define SAMPLE_OA_REPORT (1<<0)
@@ -418,11 +426,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
kfree(oa_bo);
}
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+ return &stream->engine->oa_group->regs;
+}
+
static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}
@@ -441,6 +455,67 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
+#define oa_report_header_64bit(__s) \
+ ((__s)->oa_buffer.format->header == HDR_64_BIT)
+
+static u64 oa_report_id(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+}
+
+static u64 oa_report_reason(struct i915_perf_stream *stream, void *report)
+{
+ return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
+ (GRAPHICS_VER(stream->perf->i915) == 12 ?
+ OAREPORT_REASON_MASK_EXTENDED :
+ OAREPORT_REASON_MASK);
+}
+
+static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)report = 0;
+ else
+ *report = 0;
+}
+
+static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report)
+{
+ return !(oa_report_id(stream, report) &
+ stream->perf->gen8_valid_ctx_bit) &&
+ GRAPHICS_VER(stream->perf->i915) <= 11;
+}
+
+static u64 oa_timestamp(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ?
+ *((u64 *)report + 1) :
+ *((u32 *)report + 1);
+}
+
+static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)&report[2] = 0;
+ else
+ report[1] = 0;
+}
+
+static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report)
+{
+ u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
+
+ return ctx_id & stream->specific_ctx_id_mask;
+}
+
+static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ report[4] = INVALID_CTX_ID;
+ else
+ report[2] = INVALID_CTX_ID;
+}
+
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
* @stream: i915 stream instance
@@ -473,6 +548,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
bool pollin;
u32 hw_tail;
u64 now;
+ u32 partial_report_size;
/* We have to consider the (unlikely) possibility that read() errors
* could result in an OA buffer reset which might reset the head and
@@ -482,10 +558,15 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
+ /* The tail pointer increases in 64 byte increments, not in report_size
+ * steps. Also the report size may not be a power of 2. Compute
+ * potentially partially landed report in the OA buffer
*/
- hw_tail &= ~(report_size - 1);
+ partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+ partial_report_size %= report_size;
+
+ /* Subtract partial amount off the tail */
+ hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
now = ktime_get_mono_fast_ns();
@@ -509,21 +590,22 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
hw_tail -= gtt_offset;
tail = hw_tail;
- /* Walk the stream backward until we find a report with dword 0
- * & 1 not at 0. Since the circular buffer pointers progress by
- * increments of 64 bytes and that reports can be up to 256
- * bytes long, we can't tell whether a report has fully landed
- * in memory before the first 2 dwords of the following report
- * have effectively landed.
+ /* Walk the stream backward until we find a report with report
+ * id and timestmap not at 0. Since the circular buffer pointers
+ * progress by increments of 64 bytes and that reports can be up
+ * to 256 bytes long, we can't tell whether a report has fully
+ * landed in memory before the report id and timestamp of the
+ * following report have effectively landed.
*
* This is assuming that the writes of the OA unit land in
* memory in the order they were written to.
* If not : (╯°□°)╯︵ ┻━┻
*/
while (OA_TAKEN(tail, aged_tail) >= report_size) {
- u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
+ void *report = stream->oa_buffer.vaddr + tail;
- if (report32[0] != 0 || report32[1] != 0)
+ if (oa_report_id(stream, report) ||
+ oa_timestamp(stream, report))
break;
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
@@ -607,6 +689,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
{
int report_size = stream->oa_buffer.format->size;
struct drm_i915_perf_record_header header;
+ int report_size_partial;
+ u8 *oa_buf_end;
header.type = DRM_I915_PERF_RECORD_SAMPLE;
header.pad = 0;
@@ -620,8 +704,20 @@ static int append_oa_sample(struct i915_perf_stream *stream,
return -EFAULT;
buf += sizeof(header);
- if (copy_to_user(buf, report, report_size))
+ oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
+ report_size_partial = oa_buf_end - report;
+
+ if (report_size_partial < report_size) {
+ if (copy_to_user(buf, report, report_size_partial))
+ return -EFAULT;
+ buf += report_size_partial;
+
+ if (copy_to_user(buf, stream->oa_buffer.vaddr,
+ report_size - report_size_partial))
+ return -EFAULT;
+ } else if (copy_to_user(buf, report, report_size)) {
return -EFAULT;
+ }
(*offset) += header.size;
@@ -685,12 +781,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* An out of bounds or misaligned head or tail pointer implies a driver
* bug since we validate + align the tail pointers we read from the
* hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
+ * only be incremented by multiples of the report size.
*/
if (drm_WARN_ONCE(&uncore->i915->drm,
- head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
+ head > OA_BUFFER_SIZE ||
+ tail > OA_BUFFER_SIZE,
"Inconsistent OA buffer pointers: head = %u, tail = %u\n",
head, tail))
return -EIO;
@@ -702,39 +797,19 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
u32 ctx_id;
- u32 reason;
-
- /*
- * All the report sizes factor neatly into the buffer
- * size so we never expect to see a report split
- * between the beginning and end of the buffer.
- *
- * Given the initial alignment check a misalignment
- * here would imply a driver bug that would result
- * in an overrun.
- */
- if (drm_WARN_ON(&uncore->i915->drm,
- (OA_BUFFER_SIZE - head) < report_size)) {
- drm_err(&uncore->i915->drm,
- "Spurious OA head ptr: non-integral report offset\n");
- break;
- }
+ u64 reason;
/*
* The reason field includes flags identifying what
* triggered this specific report (mostly timer
* triggered or e.g. due to a context switch).
*
- * This field is never expected to be zero so we can
- * check that the report isn't invalid before copying
- * it to userspace...
+ * In MMIO triggered reports, some platforms do not set the
+ * reason bit in this field and it is valid to have a reason
+ * field of zero.
*/
- reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
- (GRAPHICS_VER(stream->perf->i915) == 12 ?
- OAREPORT_REASON_MASK_EXTENDED :
- OAREPORT_REASON_MASK));
-
- ctx_id = report32[2] & stream->specific_ctx_id_mask;
+ reason = oa_report_reason(stream, report);
+ ctx_id = oa_context_id(stream, report32);
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -744,9 +819,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
- GRAPHICS_VER(stream->perf->i915) <= 11)
- ctx_id = report32[2] = INVALID_CTX_ID;
+ if (oa_report_ctx_invalid(stream, report)) {
+ ctx_id = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
+ }
/*
* NB: For Gen 8 the OA unit no longer supports clock gating
@@ -790,7 +866,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
*/
if (stream->ctx &&
stream->specific_ctx_id != ctx_id) {
- report32[2] = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
}
ret = append_oa_sample(stream, buf, count, offset,
@@ -802,18 +878,19 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
/*
- * Clear out the first 2 dword as a mean to detect unlanded
+ * Clear out the report id and timestamp as a means to detect unlanded
* reports.
*/
- report32[0] = 0;
- report32[1] = 0;
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
}
if (start_offset != *offset) {
i915_reg_t oaheadptr;
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+ __oa_regs(stream)->oa_head_ptr :
+ GEN8_OAHEADPTR;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -866,7 +943,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return -EIO;
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+ __oa_regs(stream)->oa_status :
+ GEN8_OASTATUS;
oastatus = intel_uncore_read(uncore, oastatus_reg);
@@ -1570,12 +1648,23 @@ free_noa_wait(struct i915_perf_stream *stream)
i915_vma_unpin_and_release(&stream->noa_wait, 0);
}
+static bool engine_supports_oa(const struct intel_engine_cs *engine)
+{
+ return engine->oa_group;
+}
+
+static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+ return engine->oa_group && engine->oa_group->type == type;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
struct intel_gt *gt = stream->engine->gt;
+ struct i915_perf_group *g = stream->engine->oa_group;
- if (WARN_ON(stream != gt->perf.exclusive_stream))
+ if (WARN_ON(stream != g->exclusive_stream))
return;
/*
@@ -1584,7 +1673,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
*
* See i915_oa_init_reg_state() and lrc_configure_all_contexts()
*/
- WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
@@ -1720,8 +1809,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
- intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = gtt_offset;
@@ -1733,9 +1822,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* to enable proper functionality of the overflow
* bit."
*/
- intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
@@ -2495,7 +2584,8 @@ err_add_request:
return err;
}
-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+ struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
@@ -2636,7 +2726,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, num_regs);
+ err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2681,6 +2771,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream,
},
};
+ if (stream->engine->class != RENDER_CLASS)
+ return 0;
+
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
@@ -2810,7 +2903,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
}
- intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
@@ -2820,7 +2913,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
*/
oag_report_ctx_switches(stream));
- intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
@@ -2974,8 +3067,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format->format;
+ const struct i915_perf_regs *regs;
+ u32 val;
/*
* If we don't want OA reports from the OA buffer, then we don't even
@@ -2986,9 +3079,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)
gen12_init_oa_buffer(stream);
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
- (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
- GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+ regs = __oa_regs(stream);
+ val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+ intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}
/**
@@ -3040,9 +3135,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
- GEN12_OAG_OACONTROL,
+ __oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
@@ -3182,6 +3277,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct i915_perf *perf = stream->perf;
+ struct i915_perf_group *g;
struct intel_gt *gt;
int ret;
@@ -3191,6 +3287,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
gt = props->engine->gt;
+ g = props->engine->oa_group;
/*
* If the sysfs metrics/ directory wasn't registered for some
@@ -3221,7 +3318,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (gt->perf.exclusive_stream) {
+ if (g->exclusive_stream) {
drm_dbg(&stream->perf->i915->drm,
"OA unit already in use\n");
return -EBUSY;
@@ -3316,7 +3413,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->ops = &i915_oa_stream_ops;
stream->engine->gt->perf.sseu = props->sseu;
- WRITE_ONCE(gt->perf.exclusive_stream, stream);
+ WRITE_ONCE(g->exclusive_stream, stream);
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
@@ -3339,7 +3436,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return 0;
err_enable:
- WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
@@ -3373,7 +3470,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
return;
/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
- stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+ stream = READ_ONCE(engine->oa_group->exclusive_stream);
if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
gen8_update_reg_state_unlocked(ce, stream);
}
@@ -3940,41 +4037,35 @@ static int read_properties_unlocked(struct i915_perf *perf,
u32 n_props,
struct perf_open_properties *props)
{
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ const struct i915_oa_format *f;
u64 __user *uprop = uprops;
+ bool config_instance = false;
+ bool config_class = false;
+ bool config_sseu = false;
+ u8 class, instance;
u32 i;
int ret;
memset(props, 0, sizeof(struct perf_open_properties));
props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
- if (!n_props) {
- drm_dbg(&perf->i915->drm,
- "No i915 perf properties given\n");
- return -EINVAL;
- }
-
- /* At the moment we only support using i915-perf on the RCS. */
- props->engine = intel_engine_lookup_user(perf->i915,
- I915_ENGINE_CLASS_RENDER,
- 0);
- if (!props->engine) {
- drm_dbg(&perf->i915->drm,
- "No RENDER-capable engines\n");
- return -EINVAL;
- }
-
/* Considering that ID = 0 is reserved and assuming that we don't
* (currently) expect any configurations to ever specify duplicate
* values for a particular property ID then the last _PROP_MAX value is
* one greater than the maximum number of properties we expect to get
* from userspace.
*/
- if (n_props >= DRM_I915_PERF_PROP_MAX) {
+ if (!n_props || n_props >= DRM_I915_PERF_PROP_MAX) {
drm_dbg(&perf->i915->drm,
- "More i915 perf properties specified than exist\n");
+ "Invalid number of i915 perf properties given\n");
return -EINVAL;
}
+ /* Defaults when class:instance is not passed */
+ class = I915_ENGINE_CLASS_RENDER;
+ instance = 0;
+
for (i = 0; i < n_props; i++) {
u64 oa_period, oa_freq_hz;
u64 id, value;
@@ -4069,8 +4160,6 @@ static int read_properties_unlocked(struct i915_perf *perf,
props->hold_preemption = !!value;
break;
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
- struct drm_i915_gem_context_param_sseu user_sseu;
-
if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
drm_dbg(&perf->i915->drm,
"SSEU config not supported on gfx %x\n",
@@ -4085,14 +4174,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
"Unable to copy global sseu parameter\n");
return -EFAULT;
}
-
- ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
- if (ret) {
- drm_dbg(&perf->i915->drm,
- "Invalid SSEU configuration\n");
- return ret;
- }
- props->has_sseu = true;
+ config_sseu = true;
break;
}
case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
@@ -4104,7 +4186,15 @@ static int read_properties_unlocked(struct i915_perf *perf,
}
props->poll_oa_period = value;
break;
- case DRM_I915_PERF_PROP_MAX:
+ case DRM_I915_PERF_PROP_OA_ENGINE_CLASS:
+ class = (u8)value;
+ config_class = true;
+ break;
+ case DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE:
+ instance = (u8)value;
+ config_instance = true;
+ break;
+ default:
MISSING_CASE(id);
return -EINVAL;
}
@@ -4112,6 +4202,60 @@ static int read_properties_unlocked(struct i915_perf *perf,
uprop += 2;
}
+ if ((config_class && !config_instance) ||
+ (config_instance && !config_class)) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine-class and engine-instance parameters must be passed together\n");
+ return -EINVAL;
+ }
+
+ props->engine = intel_engine_lookup_user(perf->i915, class, instance);
+ if (!props->engine) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine class and instance invalid %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ if (!engine_supports_oa(props->engine)) {
+ drm_dbg(&perf->i915->drm,
+ "Engine not supported by OA %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM
+ * does not work as expected.
+ */
+ if (IS_MTL_MEDIA_STEP(props->engine->i915, STEP_A0, STEP_C0) &&
+ props->engine->oa_group->type == TYPE_OAM &&
+ intel_check_bios_c6_setup(&props->engine->gt->rc6)) {
+ drm_dbg(&perf->i915->drm,
+ "OAM requires media C6 to be disabled in BIOS\n");
+ return -EINVAL;
+ }
+
+ i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
+ f = &perf->oa_formats[i];
+ if (!engine_supports_oa_format(props->engine, f->type)) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid OA format %d for class %d\n",
+ f->type, props->engine->class);
+ return -EINVAL;
+ }
+
+ if (config_sseu) {
+ ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+ if (ret) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid SSEU configuration\n");
+ return ret;
+ }
+ props->has_sseu = true;
+ }
+
return 0;
}
@@ -4282,6 +4426,14 @@ static const struct i915_range gen12_oa_b_counters[] = {
{}
};
+static const struct i915_range mtl_oam_b_counters[] = {
+ { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
+ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
+ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
+ { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
+ {}
+};
+
static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
@@ -4335,6 +4487,8 @@ static const struct i915_range mtl_oa_mux_regs[] = {
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
{ .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
+ {}
};
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
@@ -4372,10 +4526,20 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}
+static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ if (HAS_OAM(perf->i915) &&
+ GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return reg_in_range_table(addr, mtl_oam_b_counters);
+
+ return false;
+}
+
static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, xehp_oa_b_counters) ||
- reg_in_range_table(addr, gen12_oa_b_counters);
+ reg_in_range_table(addr, gen12_oa_b_counters) ||
+ mtl_is_valid_oam_b_counter_addr(perf, addr);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
@@ -4740,6 +4904,136 @@ static struct ctl_table oa_table[] = {
{}
};
+static u32 num_perf_groups_per_gt(struct intel_gt *gt)
+{
+ return 1;
+}
+
+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
+ /*
+ * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+ * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+ */
+ drm_WARN_ON(&engine->i915->drm,
+ engine->gt->type != GT_MEDIA);
+
+ return PERF_GROUP_OAM_SAMEDIA_0;
+ }
+
+ return PERF_GROUP_INVALID;
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+ switch (engine->class) {
+ case RENDER_CLASS:
+ return PERF_GROUP_OAG;
+
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return __oam_engine_group(engine);
+
+ default:
+ return PERF_GROUP_INVALID;
+ }
+}
+
+static struct i915_perf_regs __oam_regs(u32 base)
+{
+ return (struct i915_perf_regs) {
+ base,
+ GEN12_OAM_HEAD_POINTER(base),
+ GEN12_OAM_TAIL_POINTER(base),
+ GEN12_OAM_BUFFER(base),
+ GEN12_OAM_CONTEXT_CONTROL(base),
+ GEN12_OAM_CONTROL(base),
+ GEN12_OAM_DEBUG(base),
+ GEN12_OAM_STATUS(base),
+ GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+ return (struct i915_perf_regs) {
+ 0,
+ GEN12_OAG_OAHEADPTR,
+ GEN12_OAG_OATAILPTR,
+ GEN12_OAG_OABUFFER,
+ GEN12_OAG_OAGLBCTXCTRL,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OA_DEBUG,
+ GEN12_OAG_OASTATUS,
+ GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+ int i, num_groups = gt->perf.num_perf_groups;
+
+ for (i = 0; i < num_groups; i++) {
+ struct i915_perf_group *g = &gt->perf.group[i];
+
+ /* Fused off engines can result in a group with num_engines == 0 */
+ if (g->num_engines == 0)
+ continue;
+
+ if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
+ g->regs = __oag_regs();
+ g->type = TYPE_OAG;
+ } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ g->regs = __oam_regs(mtl_oa_base[i]);
+ g->type = TYPE_OAM;
+ }
+ }
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+ u32 num_groups = num_perf_groups_per_gt(gt);
+ struct intel_engine_cs *engine;
+ struct i915_perf_group *g;
+ intel_engine_mask_t tmp;
+
+ g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+ if (!g)
+ return -ENOMEM;
+
+ for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+ u32 index = __oa_engine_group(engine);
+
+ engine->oa_group = NULL;
+ if (index < num_groups) {
+ g[index].num_engines++;
+ engine->oa_group = &g[index];
+ }
+ }
+
+ gt->perf.num_perf_groups = num_groups;
+ gt->perf.group = g;
+
+ oa_init_groups(gt);
+
+ return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+ struct intel_gt *gt;
+ int i, ret;
+
+ for_each_gt(gt, perf->i915, i) {
+ ret = oa_init_gt(gt);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
@@ -4780,9 +5074,15 @@ static void oa_init_supported_formats(struct i915_perf *perf)
break;
case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
case INTEL_METEORLAKE:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
break;
default:
@@ -4830,7 +5130,7 @@ static void i915_perf_init_info(struct drm_i915_private *i915)
* Note: i915-perf initialization is split into an 'init' and 'register'
* phase with the i915_perf_register() exposing state to userspace.
*/
-void i915_perf_init(struct drm_i915_private *i915)
+int i915_perf_init(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
@@ -4906,7 +5206,7 @@ void i915_perf_init(struct drm_i915_private *i915)
if (perf->ops.enable_metric_set) {
struct intel_gt *gt;
- int i;
+ int i, ret;
for_each_gt(gt, i915, i)
mutex_init(&gt->perf.lock);
@@ -4945,8 +5245,17 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->i915 = i915;
+ ret = oa_init_engine_groups(perf);
+ if (ret) {
+ drm_err(&i915->drm,
+ "OA initialization failed %d\n", ret);
+ return ret;
+ }
+
oa_init_supported_formats(perf);
}
+
+ return 0;
}
static int destroy_config(int id, void *p, void *data)
@@ -4973,10 +5282,15 @@ void i915_perf_sysctl_unregister(void)
void i915_perf_fini(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
+ struct intel_gt *gt;
+ int i;
if (!perf->i915)
return;
+ for_each_gt(gt, perf->i915, i)
+ kfree(gt->perf.group);
+
idr_for_each(&perf->metrics_idr, destroy_config, perf);
idr_destroy(&perf->metrics_idr);
@@ -4989,7 +5303,7 @@ void i915_perf_fini(struct drm_i915_private *i915)
*
* This version number is used by userspace to detect available features.
*/
-int i915_perf_ioctl_version(void)
+int i915_perf_ioctl_version(struct drm_i915_private *i915)
{
/*
* 1: Initial version
@@ -5010,8 +5324,30 @@ int i915_perf_ioctl_version(void)
*
* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
* interval for the hrtimer used to check for OA data.
+ *
+ * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
+ * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
+ *
+ * 7: Add support for video decode and enhancement classes.
+ */
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6
+ * to indicate that OA media is not supported.
*/
- return 5;
+ if (IS_MTL_MEDIA_STEP(i915, STEP_A0, STEP_C0)) {
+ struct intel_gt *gt;
+ int i;
+
+ for_each_gt(gt, i915, i) {
+ if (gt->type == GT_MEDIA &&
+ intel_check_bios_c6_setup(&gt->rc6))
+ return 6;
+ }
+ }
+
+ return 7;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)