From 8f5e39ce9214888464d34ef7949e56bdc999b46c Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 14:16:51 -0400 Subject: i40e/i40evf: refactor IRQ enable function This change moves a multi-line register setting into a function which simplifies reading the flow of the enable function. This also fixes a bug where the enable function was enabling the interrupt twice while trying to update the two interrupt throttle rate thresholds for Rx and Tx. Change-ID: Ie308f9d0d48540204590cb9d7a5a7b1196f959bb Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 108 +++++++++++++++----------- 1 file changed, 62 insertions(+), 46 deletions(-) (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c') diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 97493a4164eb..597e0964c232 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -318,6 +318,8 @@ static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector * i40e_set_new_dynamic_itr - Find new ITR level * @rc: structure containing ring performance data * + * Returns true if ITR changed, false if not + * * Stores a new ITR value based on packets and byte counts during * the last interrupt. The advantage of per interrupt computation * is faster updates and more accurate ITR for the current traffic @@ -326,14 +328,14 @@ static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector * testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; int bytes_per_int; if (rc->total_packets == 0 || !rc->itr) - return; + return false; /* simple throttlerate management * 0-10MB/s lowest (100000 ints/s) @@ -377,11 +379,15 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) break; } - if (new_itr != rc->itr) - rc->itr = new_itr; - rc->total_bytes = 0; rc->total_packets = 0; + + if (new_itr != rc->itr) { + rc->itr = new_itr; + return true; + } + + return false; } /* @@ -1187,6 +1193,21 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) return total_rx_packets; } +static u32 i40e_buildreg_itr(const int type, const u16 itr) +{ + u32 val; + + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | + (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | + (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + + return val; +} + +/* a small macro to shorten up some long lines */ +#define INTREG I40E_VFINT_DYN_CTLN1 + /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt * @vsi: the VSI we care about @@ -1197,55 +1218,50 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - u16 old_itr; + bool rx = false, tx = false; + u32 rxval, txval; int vector; - u32 val; vector = (q_vector->v_idx + vsi->base_vector); + rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { - old_itr = q_vector->rx.itr; - i40e_set_new_dynamic_itr(&q_vector->rx); - if (old_itr != q_vector->rx.itr) { - val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | - (I40E_RX_ITR << - I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (q_vector->rx.itr << - I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); - } else { - val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | - (I40E_ITR_NONE << - I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT); - } - if (!test_bit(__I40E_DOWN, &vsi->state)) - wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val); - } else { - i40evf_irq_enable_queues(vsi->back, 1 - << q_vector->v_idx); + rx = i40e_set_new_dynamic_itr(&q_vector->rx); + rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { - old_itr = q_vector->tx.itr; - i40e_set_new_dynamic_itr(&q_vector->tx); - if (old_itr != q_vector->tx.itr) { - val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | - (I40E_TX_ITR << - I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (q_vector->tx.itr << - I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + tx = i40e_set_new_dynamic_itr(&q_vector->tx); + txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); + } + if (rx || tx) { + /* get the higher of the two ITR adjustments and + * use the same value for both ITR registers + * when in adaptive mode (Rx and/or Tx) + */ + u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - } else { - val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | - (I40E_ITR_NONE << - I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT); - } - if (!test_bit(__I40E_DOWN, &vsi->state)) - wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val); - } else { - i40evf_irq_enable_queues(vsi->back, BIT(q_vector->v_idx)); + q_vector->tx.itr = q_vector->rx.itr = itr; + txval = i40e_buildreg_itr(I40E_TX_ITR, itr); + tx = true; + rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); + rx = true; } + + /* only need to enable the interrupt once, but need + * to possibly update both ITR values + */ + if (rx) { + /* set the INTENA_MSK_MASK so that this first write + * won't actually enable the interrupt, instead just + * updating the ITR (it's bit 31 PF and VF) + */ + rxval |= BIT(31); + /* don't check _DOWN because interrupt isn't being enabled */ + wr32(hw, INTREG(vector - 1), rxval); + } + + if (!test_bit(__I40E_DOWN, &vsi->state)) + wr32(hw, INTREG(vector - 1), txval); } /** -- cgit v1.2.3 From 51cc6d9fccde27310b7dfba2be268ff5b8dcf52d Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 14:16:52 -0400 Subject: i40e/i40evf: fix bug in throttle rate math The driver was using a value expressed in 2us increments for the divisor to figure out our bytes/usec values. Fix the usecs variable to contain a value in microseconds. Change-ID: I5c20493103c295d6f201947bb908add7040b7c41 Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 ++++++++- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c') diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 889d25d6fde2..c96e581cc2b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -830,6 +830,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; int bytes_per_int; + int usecs; if (rc->total_packets == 0 || !rc->itr) return false; @@ -838,8 +839,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * 0-10MB/s lowest (100000 ints/s) * 10-20MB/s low (20000 ints/s) * 20-1249MB/s bulk (8000 ints/s) + * + * The math works out because the divisor is in 10^(-6) which + * turns the bytes/us input value into MB/s values, but + * make sure to use usecs, as the register values written + * are in 2 usec increments in the ITR registers. */ - bytes_per_int = rc->total_bytes / rc->itr; + usecs = (rc->itr << 1); + bytes_per_int = rc->total_bytes / usecs; switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 597e0964c232..7d98042338e0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -333,6 +333,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; int bytes_per_int; + int usecs; if (rc->total_packets == 0 || !rc->itr) return false; @@ -341,8 +342,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * 0-10MB/s lowest (100000 ints/s) * 10-20MB/s low (20000 ints/s) * 20-1249MB/s bulk (8000 ints/s) + * + * The math works out because the divisor is in 10^(-6) which + * turns the bytes/us input value into MB/s values, but + * make sure to use usecs, as the register values written + * are in 2 usec increments in the ITR registers. */ - bytes_per_int = rc->total_bytes / rc->itr; + usecs = (rc->itr << 1); + bytes_per_int = rc->total_bytes / usecs; switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) -- cgit v1.2.3 From c56625d59726ee2dc4dfd91c8b6c22098abe1ac4 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 14:16:53 -0400 Subject: i40e/i40evf: change dynamic interrupt thresholds The dynamic algorithm, while now working, doesn't have good performance in 40G mode. One part of this patch addresses the high CPU utilization of some small streaming workloads that the driver should reduce CPU in. It also changes the minimum ITR that the dynamic algorithm will settle on, causing our minimum latency to go from 12us to about 14us, when using adaptive mode. It also changes the BULK interrupt rate to allow maximum throughput on a 40Gb connection with a single thread of transmit, clamping interrupt rate to 8000 for TX makes single thread traffic go too slow. The new ULTRA bulk setting is introduced and is used when the Rx packet rate on this queue exceeds 40000 packets per second. This value of 40000 was chosen because the automatic tuning of minimum ITR=20us means that a single queue can't quite achieve that many packets per second from a round-robin test. Change-ID: Icce8faa128688ca5fd2c4229bdd9726877a92ea2 Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 27 +++++++++++++++++++++------ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 3 +++ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 27 +++++++++++++++++++++------ drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 3 +++ 4 files changed, 48 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c') diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index c96e581cc2b7..7aea14389d7b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -828,6 +828,7 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; + struct i40e_q_vector *qv = rc->ring->q_vector; u32 new_itr = rc->itr; int bytes_per_int; int usecs; @@ -836,9 +837,10 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) return false; /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) + * 0-10MB/s lowest (50000 ints/s) * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (8000 ints/s) + * 20-1249MB/s bulk (18000 ints/s) + * > 40000 Rx packets per second (8000 ints/s) * * The math works out because the divisor is in 10^(-6) which * turns the bytes/us input value into MB/s values, but @@ -859,24 +861,37 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: - if (bytes_per_int <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + case I40E_ULTRA_LATENCY: default: if (bytes_per_int <= 20) new_latency_range = I40E_LOW_LATENCY; break; } + + /* this is to adjust RX more aggressively when streaming small + * packets. The value of 40000 was picked as it is just beyond + * what the hardware can receive per second if in low latency + * mode. + */ +#define RX_ULTRA_PACKET_RATE 40000 + + if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && + (&qv->rx == rc)) + new_latency_range = I40E_ULTRA_LATENCY; + rc->latency_range = new_latency_range; switch (new_latency_range) { case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_100K; + new_itr = I40E_ITR_50K; break; case I40E_LOW_LATENCY: new_itr = I40E_ITR_20K; break; case I40E_BULK_LATENCY: + new_itr = I40E_ITR_18K; + break; + case I40E_ULTRA_LATENCY: new_itr = I40E_ITR_8K; break; default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 7c0ed84b296d..0fe7eb77cae5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -32,7 +32,9 @@ #define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ #define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ #define I40E_ITR_100K 0x0005 +#define I40E_ITR_50K 0x000A #define I40E_ITR_20K 0x0019 +#define I40E_ITR_18K 0x001B #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ @@ -296,6 +298,7 @@ enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, I40E_BULK_LATENCY = 2, + I40E_ULTRA_LATENCY = 3, }; struct i40e_ring_container { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7d98042338e0..d76fe4a948df 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -331,6 +331,7 @@ static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; + struct i40e_q_vector *qv = rc->ring->q_vector; u32 new_itr = rc->itr; int bytes_per_int; int usecs; @@ -339,9 +340,10 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) return false; /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) + * 0-10MB/s lowest (50000 ints/s) * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (8000 ints/s) + * 20-1249MB/s bulk (18000 ints/s) + * > 40000 Rx packets per second (8000 ints/s) * * The math works out because the divisor is in 10^(-6) which * turns the bytes/us input value into MB/s values, but @@ -362,24 +364,37 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: - if (bytes_per_int <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + case I40E_ULTRA_LATENCY: default: if (bytes_per_int <= 20) new_latency_range = I40E_LOW_LATENCY; break; } + + /* this is to adjust RX more aggressively when streaming small + * packets. The value of 40000 was picked as it is just beyond + * what the hardware can receive per second if in low latency + * mode. + */ +#define RX_ULTRA_PACKET_RATE 40000 + + if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && + (&qv->rx == rc)) + new_latency_range = I40E_ULTRA_LATENCY; + rc->latency_range = new_latency_range; switch (new_latency_range) { case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_100K; + new_itr = I40E_ITR_50K; break; case I40E_LOW_LATENCY: new_itr = I40E_ITR_20K; break; case I40E_BULK_LATENCY: + new_itr = I40E_ITR_18K; + break; + case I40E_ULTRA_LATENCY: new_itr = I40E_ITR_8K; break; default: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 03275eb54cc4..e2352b886b77 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -32,7 +32,9 @@ #define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ #define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ #define I40E_ITR_100K 0x0005 +#define I40E_ITR_50K 0x000A #define I40E_ITR_20K 0x0019 +#define I40E_ITR_18K 0x001B #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ @@ -291,6 +293,7 @@ enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, I40E_BULK_LATENCY = 2, + I40E_ULTRA_LATENCY = 3, }; struct i40e_ring_container { -- cgit v1.2.3 From ee2319cf17ee64bbd0096f2f8f3f8390c93b1e39 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 14:16:54 -0400 Subject: i40e/i40evf: adjust interrupt throttle less frequently The adaptive ITR (interrupt throttle rate) algorithm was adjusting the hardware's interrupt rate too frequently. This caused a lot of variation in the interrupt rate for fairly constant workloads. Change the code to have a counter and adjust only once every N number of interrupts. Change-ID: I0460f1f86571037484eca5aca36ac4d889cb8389 Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 ++++++++++++++++++++-- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 4 ++-- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 23 +++++++++++++++++++++-- drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 4 ++-- drivers/net/ethernet/intel/i40evf/i40evf.h | 2 ++ drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++ 8 files changed, 53 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b7818be0f06d..7c2b2e891f62 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -577,6 +577,8 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; +#define ITR_COUNTDOWN_START 100 + u8 itr_countdown; /* when 0 should adjust ITR */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7d52092947dc..e0142de018e6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3087,6 +3087,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; + q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), @@ -3182,6 +3183,7 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) u32 val; /* set the ITR configuration */ + q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7aea14389d7b..006f0fb4720c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -845,10 +845,12 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * The math works out because the divisor is in 10^(-6) which * turns the bytes/us input value into MB/s values, but * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers. + * are in 2 usec increments in the ITR registers, and make sure + * to use the smoothed values that the countdown timer gives us. */ - usecs = (rc->itr << 1); + usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; bytes_per_int = rc->total_bytes / usecs; + switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) @@ -1806,8 +1808,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, vector = (q_vector->v_idx + vsi->base_vector); + /* avoid dynamic calculation if in countdown mode OR if + * all dynamic is disabled + */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown > 0 || + (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + goto enable_int; + } + if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); @@ -1845,8 +1856,15 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, wr32(hw, INTREG(vector - 1), rxval); } +enable_int: if (!test_bit(__I40E_DOWN, &vsi->state)) wr32(hw, INTREG(vector - 1), txval); + + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + else + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 0fe7eb77cae5..6779fb771d6a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -38,8 +38,8 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_8K -#define I40E_ITR_TX_DEF I40E_ITR_4K +#define I40E_ITR_RX_DEF I40E_ITR_20K +#define I40E_ITR_TX_DEF I40E_ITR_20K #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d76fe4a948df..47e9a90d6b10 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -348,10 +348,12 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * The math works out because the divisor is in 10^(-6) which * turns the bytes/us input value into MB/s values, but * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers. + * are in 2 usec increments in the ITR registers, and make sure + * to use the smoothed values that the countdown timer gives us. */ - usecs = (rc->itr << 1); + usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; bytes_per_int = rc->total_bytes / usecs; + switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) @@ -1245,8 +1247,18 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, int vector; vector = (q_vector->v_idx + vsi->base_vector); + + /* avoid dynamic calculation if in countdown mode OR if + * all dynamic is disabled + */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown > 0 || + (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + goto enable_int; + } + if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); @@ -1282,8 +1294,15 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, wr32(hw, INTREG(vector - 1), rxval); } +enable_int: if (!test_bit(__I40E_DOWN, &vsi->state)) wr32(hw, INTREG(vector - 1), txval); + + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + else + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index e2352b886b77..ebc1bf77f036 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -38,8 +38,8 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_8K -#define I40E_ITR_TX_DEF I40E_ITR_4K +#define I40E_ITR_RX_DEF I40E_ITR_20K +#define I40E_ITR_TX_DEF I40E_ITR_20K #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 22841c619f37..22fc3d49c4b9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -112,6 +112,8 @@ struct i40e_q_vector { struct i40e_ring_container tx; u32 ring_mask; u8 num_ringpairs; /* total number of ring pairs in vector */ +#define ITR_COUNTDOWN_START 100 + u8 itr_countdown; /* when 0 or 1 update ITR */ int v_idx; /* vector index in list */ char name[IFNAMSIZ + 9]; bool arm_wb_state; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index ce997902f5a3..a44a42bb83c0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -357,6 +357,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->itr_countdown = ITR_COUNTDOWN_START; } /** @@ -377,6 +378,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; q_vector->ring_mask |= BIT(t_idx); } -- cgit v1.2.3