diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 215 |
1 files changed, 102 insertions, 113 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b936febc315a..4566d66ffc7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -759,7 +759,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); /* we have caught up to head, no work left to do */ @@ -860,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); -#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this @@ -959,19 +959,31 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; - struct i40e_q_vector *qv = rc->ring->q_vector; u32 new_itr = rc->itr; - int bytes_per_int; - int usecs; + int bytes_per_usec; + unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; + usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; + bytes_per_usec = rc->total_bytes / usecs; + + /* The calculations in this algorithm depend on interrupts actually + * firing at the ITR rate. This may not happen if the packet rate is + * really low, or if we've been napi polling. Check to make sure + * that's not the case before we continue. + */ + estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); + if (estimated_usecs > usecs) { + new_latency_range = I40E_LOW_LATENCY; + goto reset_latency; + } + /* simple throttlerate management * 0-10MB/s lowest (50000 ints/s) * 10-20MB/s low (20000 ints/s) * 20-1249MB/s bulk (18000 ints/s) - * > 40000 Rx packets per second (8000 ints/s) * * The math works out because the divisor is in 10^(-6) which * turns the bytes/us input value into MB/s values, but @@ -979,39 +991,25 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * are in 2 usec increments in the ITR registers, and make sure * to use the smoothed values that the countdown timer gives us. */ - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; - switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: - case I40E_ULTRA_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } - /* this is to adjust RX more aggressively when streaming small - * packets. The value of 40000 was picked as it is just beyond - * what the hardware can receive per second if in low latency - * mode. - */ -#define RX_ULTRA_PACKET_RATE 40000 - - if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && - (&qv->rx == rc)) - new_latency_range = I40E_ULTRA_LATENCY; - +reset_latency: rc->latency_range = new_latency_range; switch (new_latency_range) { @@ -1024,25 +1022,48 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) case I40E_BULK_LATENCY: new_itr = I40E_ITR_18K; break; - case I40E_ULTRA_LATENCY: - new_itr = I40E_ITR_8K; - break; default: break; } rc->total_bytes = 0; rc->total_packets = 0; + rc->last_itr_update = jiffies; if (new_itr != rc->itr) { rc->itr = new_itr; return true; } - return false; } /** + * i40e_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_buff) +{ + struct i40e_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_bi[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; +} + +/** * i40e_rx_is_programming_status - check for programming status descriptor * @qw: qword representing status_error_len in CPU ordering * @@ -1076,15 +1097,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, u64 qw) { - u32 ntc = rx_ring->next_to_clean + 1; + struct i40e_rx_buffer *rx_buffer; + u32 ntc = rx_ring->next_to_clean; u8 id; /* fetch, update, and store next to clean */ + rx_buffer = &rx_ring->rx_bi[ntc++]; ntc = (ntc < rx_ring->count) ? ntc : 0; rx_ring->next_to_clean = ntc; prefetch(I40E_RX_DESC(rx_ring, ntc)); + /* place unused page back on the ring */ + i40e_reuse_rx_page(rx_ring, rx_buffer); + rx_ring->rx_stats.page_reuse_count++; + + /* clear contents of buffer_info */ + rx_buffer->page = NULL; + id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; @@ -1113,6 +1143,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!tx_ring->tx_bi) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); /* add u32 for head writeback, align after this takes care of @@ -1642,32 +1674,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, } /** - * i40e_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *old_buff) -{ - struct i40e_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -/** * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check * @@ -2063,7 +2069,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false, xdp_xmit = false; - while (likely(total_rx_packets < budget)) { + while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; struct xdp_buff xdp; @@ -2096,6 +2102,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (unlikely(i40e_rx_is_programming_status(qword))) { i40e_clean_programming_status(rx_ring, rx_desc, qword); + cleaned_count++; continue; } size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> @@ -2110,6 +2117,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - i40e_rx_offset(rx_ring); xdp.data_end = xdp.data + size; @@ -2196,7 +2204,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_bytes += total_rx_bytes; /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : total_rx_packets; + return failure ? budget : (int)total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -2204,9 +2212,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - /* Don't clear PBA because that can cause lost interrupts that - * came in while we were cleaning/polling - */ + I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -2241,6 +2247,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, int idx = q_vector->v_idx; int rx_itr_setting, tx_itr_setting; + /* If we don't have MSIX, then we only need to re-enable icr0 */ + if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { + i40e_irq_dynamic_enable_icr0(vsi->back); + return; + } + vector = (q_vector->v_idx + vsi->base_vector); /* avoid dynamic calculation if in countdown mode OR if @@ -2257,7 +2269,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, goto enable_int; } - if (ITR_IS_DYNAMIC(tx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } @@ -2361,7 +2373,6 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { - const cpumask_t *aff_mask = &q_vector->affinity_mask; int cpu_id = smp_processor_id(); /* It is possible that the interrupt affinity has changed but, @@ -2371,15 +2382,22 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * continue to poll, otherwise we must stop polling so the * interrupt can move to the correct cpu. */ - if (likely(cpumask_test_cpu(cpu_id, aff_mask) || - !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) { + if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) { + /* Tell napi that we are done polling */ + napi_complete_done(napi, work_done); + + /* Force an interrupt */ + i40e_force_wb(vsi, q_vector); + + /* Return budget-1 so that polling stops */ + return budget - 1; + } tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_enable_wb_on_itr(vsi, q_vector); - } - return budget; + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; + i40e_enable_wb_on_itr(vsi, q_vector); } + return budget; } if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) @@ -2388,16 +2406,7 @@ tx_only: /* Work is done so exit the polling mode and re-enable the interrupt */ napi_complete_done(napi, work_done); - /* If we're prematurely stopping polling to fix the interrupt - * affinity we want to make sure polling starts back up so we - * issue a call to i40e_force_wb which triggers a SW interrupt. - */ - if (!clean_complete) - i40e_force_wb(vsi, q_vector); - else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) - i40e_irq_dynamic_enable_icr0(vsi->back, false); - else - i40e_update_enable_itr(vsi, q_vector); + i40e_update_enable_itr(vsi, q_vector); return min(work_done, budget - 1); } @@ -2451,9 +2460,15 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, hlen = (hdr.network[0] & 0x0F) << 2; l4_proto = hdr.ipv4->protocol; } else { - hlen = hdr.network - skb->data; - l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); - hlen -= hdr.network - skb->data; + /* find the start of the innermost ipv6 header */ + unsigned int inner_hlen = hdr.network - skb->data; + unsigned int h_offset = inner_hlen; + + /* this function updates h_offset to the end of the header */ + l4_proto = + ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL); + /* hlen will contain our best estimate of the tcp header */ + hlen = h_offset - inner_hlen; } if (l4_proto != IPPROTO_TCP) @@ -3160,38 +3175,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, /* write last descriptor with EOP bit */ td_cmd |= I40E_TX_DESC_CMD_EOP; - /* We can OR these values together as they both are checked against - * 4 below and at this point desc_count will be used as a boolean value - * after this if/else block. + /* We OR these values together to check both against 4 (WB_STRIDE) + * below. This is safe since we don't re-use desc_count afterwards. */ desc_count |= ++tx_ring->packet_stride; - /* Algorithm to optimize tail and RS bit setting: - * if queue is stopped - * mark RS bit - * reset packet counter - * else if xmit_more is supported and is true - * advance packet counter to 4 - * reset desc_count to 0 - * - * if desc_count >= 4 - * mark RS bit - * reset packet counter - * if desc_count > 0 - * update tail - * - * Note: If there are less than 4 descriptors - * pending and interrupts were disabled the service task will - * trigger a force WB. - */ - if (netif_xmit_stopped(txring_txq(tx_ring))) { - goto do_rs; - } else if (skb->xmit_more) { - /* set stride to arm on next packet and reset desc_count */ - tx_ring->packet_stride = WB_STRIDE; - desc_count = 0; - } else if (desc_count >= WB_STRIDE) { -do_rs: + if (desc_count >= WB_STRIDE) { /* write last descriptor with RS bit set */ td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; @@ -3212,7 +3201,7 @@ do_rs: first->next_to_watch = tx_desc; /* notify HW of packet */ - if (desc_count) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail |