diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
| -rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 506 |
1 files changed, 262 insertions, 244 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6287bf63c43c..97d46058d71d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -122,14 +122,10 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, struct device *dev; dma_addr_t dma; u32 td_cmd = 0; - u16 delay = 0; u16 i; /* find existing FDIR VSI */ - vsi = NULL; - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) - vsi = pf->vsi[i]; + vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (!vsi) return -ENOENT; @@ -137,15 +133,11 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, dev = tx_ring->dev; /* we need two descriptors to add/del a filter and we can wait */ - do { - if (I40E_DESC_UNUSED(tx_ring) > 1) - break; + for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) { + if (!i) + return -EAGAIN; msleep_interruptible(1); - delay++; - } while (delay < I40E_FD_CLEAN_DELAY); - - if (!(I40E_DESC_UNUSED(tx_ring) > 1)) - return -EAGAIN; + } dma = dma_map_single(dev, raw_packet, I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); @@ -335,22 +327,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, return err ? -EOPNOTSUPP : 0; } -/** - * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for - * a specific flow spec - * @vsi: pointer to the targeted VSI - * @fd_data: the flow director data required for the FDir descriptor - * @add: true adds a filter, false removes it - * - * Returns 0 if the filters were successfully added or removed - **/ -static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) -{ - return -EOPNOTSUPP; -} - #define I40E_IP_DUMMY_PACKET_LEN 34 /** * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for @@ -433,12 +409,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case UDP_V4_FLOW: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; - case SCTP_V4_FLOW: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); - break; - case IPV4_FLOW: - ret = i40e_add_del_fdir_ipv4(vsi, input, add); - break; case IP_USER_FLOW: switch (input->ip4_proto) { case IPPROTO_TCP: @@ -447,21 +417,27 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case IPPROTO_UDP: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; - case IPPROTO_SCTP: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); - break; - default: + case IPPROTO_IP: ret = i40e_add_del_fdir_ipv4(vsi, input, add); break; + default: + /* We cannot support masking based on protocol */ + goto unsupported_flow; } break; default: +unsupported_flow: dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", input->flow_type); ret = -EINVAL; } - /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ + /* The buffer allocated here will be normally be freed by + * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit + * completion. In the event of an error adding the buffer to the FDIR + * ring, it will immediately be freed. It may also be freed by + * i40e_clean_tx_ring() when closing the VSI. + */ return ret; } @@ -645,7 +621,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) return 0; } -#define WB_STRIDE 0x3 +#define WB_STRIDE 4 /** * i40e_clean_tx_irq - Reclaim resources after transmit completes @@ -761,7 +737,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, unsigned int j = i40e_get_tx_pending(tx_ring, false); if (budget && - ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && + ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__I40E_DOWN, &vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; @@ -1042,14 +1018,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -1246,7 +1223,6 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) * because each write-back erases this info. */ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); - rx_desc->read.hdr_addr = 0; rx_desc++; bi++; @@ -1437,13 +1413,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> + u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK; + u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; - if (unlikely(rsyn)) { - i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn); - rx_ring->last_rx_timestamp = jiffies; - } + if (unlikely(tsynvalid)) + i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn); i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); @@ -1456,45 +1431,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -1509,10 +1445,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1544,19 +1476,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1569,30 +1567,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1600,34 +1597,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1642,18 +1631,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1677,19 +1669,17 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1731,7 +1721,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define staterrlen rx_desc->wb.qword1.status_error_len if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { i40e_clean_programming_status(rx_ring, rx_desc); - rx_ring->rx_bi[ntc].skb = skb; return true; } /* if we are the last buffer then there is nothing else to do */ @@ -1739,8 +1728,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1761,13 +1748,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; - u32 rx_status; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1781,21 +1767,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; - - if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) - break; - /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr * which is always zero because packet split isn't used, if the * hardware wrote DD then it will be non-zero */ - if (!rx_desc->wb.qword1.status_error_len) + if (!i40e_test_staterr(rx_desc, + BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) break; /* This memory barrier is needed to keep us from reading @@ -1804,7 +1782,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40e_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1823,12 +1801,18 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; + /* populate checksum, VLAN, and protocol */ i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); @@ -1845,11 +1829,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1877,14 +1864,14 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) { - return !!(vsi->rx_rings[idx]->rx_itr_setting); + return vsi->rx_rings[idx]->rx_itr_setting; } -static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) { - return !!(vsi->tx_rings[idx]->tx_itr_setting); + return vsi->tx_rings[idx]->tx_itr_setting; } /** @@ -1910,8 +1897,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - rx_itr_setting = get_rx_itr_enabled(vsi, idx); - tx_itr_setting = get_tx_itr_enabled(vsi, idx); + rx_itr_setting = get_rx_itr(vsi, idx); + tx_itr_setting = get_tx_itr(vsi, idx); if (q_vector->itr_countdown > 0 || (!ITR_IS_DYNAMIC(rx_itr_setting) && @@ -2025,12 +2012,25 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { + const cpumask_t *aff_mask = &q_vector->affinity_mask; + int cpu_id = smp_processor_id(); + + /* It is possible that the interrupt affinity has changed but, + * if the cpu is pegged at 100%, polling will never exit while + * traffic continues and the interrupt will be stuck on this + * cpu. We check to make sure affinity is correct before we + * continue to poll, otherwise we must stop polling so the + * interrupt can move to the correct cpu. + */ + if (likely(cpumask_test_cpu(cpu_id, aff_mask) || + !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) { tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_enable_wb_on_itr(vsi, q_vector); + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; + i40e_enable_wb_on_itr(vsi, q_vector); + } + return budget; } - return budget; } if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR) @@ -2038,12 +2038,19 @@ tx_only: /* Work is done so exit the polling mode and re-enable the interrupt */ napi_complete_done(napi, work_done); - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { - i40e_update_enable_itr(vsi, q_vector); - } else { /* Legacy mode */ + + /* If we're prematurely stopping polling to fix the interrupt + * affinity we want to make sure polling starts back up so we + * issue a call to i40e_force_wb which triggers a SW interrupt. + */ + if (!clean_complete) + i40e_force_wb(vsi, q_vector); + else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) i40e_irq_dynamic_enable_icr0(vsi->back, false); - } - return 0; + else + i40e_update_enable_itr(vsi, q_vector); + + return min(work_done, budget - 1); } /** @@ -2267,14 +2274,16 @@ out: /** * i40e_tso - set up the tso context descriptor - * @skb: ptr to the skb we're sending + * @first: pointer to first Tx buffer for xmit * @hdr_len: ptr to the size of the packet header * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ -static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) +static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, + u64 *cd_type_cmd_tso_mss) { + struct sk_buff *skb = first->skb; u64 cd_cmd, cd_tso_len, cd_mss; union { struct iphdr *v4; @@ -2287,6 +2296,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) unsigned char *hdr; } l4; u32 paylen, l4_offset; + u16 gso_segs, gso_size; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -2325,7 +2335,8 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from outer checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.udp->check, htonl(paylen)); + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); } /* reset pointers to inner headers */ @@ -2346,15 +2357,23 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; + /* pull values out of skb_shinfo */ + gso_size = skb_shinfo(skb)->gso_size; + gso_segs = skb_shinfo(skb)->gso_segs; + + /* update GSO size and bytecount with header size */ + first->gso_segs = gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; - cd_mss = skb_shinfo(skb)->gso_size; + cd_mss = gso_size; *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); @@ -2715,10 +2734,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; - u16 gso_segs; - u16 desc_count = 0; - bool tail_bump = true; - bool do_rs = false; + u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2726,15 +2742,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TX_FLAGS_VLAN_SHIFT; } - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) - gso_segs = skb_shinfo(skb)->gso_segs; - else - gso_segs = 1; - - /* multiply data chunks by size of headers */ - first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); - first->gso_segs = gso_segs; - first->skb = skb; first->tx_flags = tx_flags; dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); @@ -2801,8 +2808,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i++; if (i == tx_ring->count) @@ -2810,66 +2816,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); + /* write last descriptor with EOP bit */ + td_cmd |= I40E_TX_DESC_CMD_EOP; + + /* We can OR these values together as they both are checked against + * 4 below and at this point desc_count will be used as a boolean value + * after this if/else block. + */ + desc_count |= ++tx_ring->packet_stride; + /* Algorithm to optimize tail and RS bit setting: - * if xmit_more is supported - * if xmit_more is true - * do not update tail and do not mark RS bit. - * if xmit_more is false and last xmit_more was false - * if every packet spanned less than 4 desc - * then set RS bit on 4th packet and update tail - * on every packet - * else - * update tail and set RS bit on every packet. - * if xmit_more is false and last_xmit_more was true - * update tail and set RS bit. + * if queue is stopped + * mark RS bit + * reset packet counter + * else if xmit_more is supported and is true + * advance packet counter to 4 + * reset desc_count to 0 * - * Optimization: wmb to be issued only in case of tail update. - * Also optimize the Descriptor WB path for RS bit with the same - * algorithm. + * if desc_count >= 4 + * mark RS bit + * reset packet counter + * if desc_count > 0 + * update tail * - * Note: If there are less than 4 packets + * Note: If there are less than 4 descriptors * pending and interrupts were disabled the service task will * trigger a force WB. */ - if (skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring))) { - tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - tail_bump = false; - } else if (!skb->xmit_more && - !netif_xmit_stopped(txring_txq(tx_ring)) && - (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && - (tx_ring->packet_stride < WB_STRIDE) && - (desc_count < WB_STRIDE)) { - tx_ring->packet_stride++; - } else { + if (netif_xmit_stopped(txring_txq(tx_ring))) { + goto do_rs; + } else if (skb->xmit_more) { + /* set stride to arm on next packet and reset desc_count */ + tx_ring->packet_stride = WB_STRIDE; + desc_count = 0; + } else if (desc_count >= WB_STRIDE) { +do_rs: + /* write last descriptor with RS bit set */ + td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; - tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; - do_rs = true; } - if (do_rs) - tx_ring->packet_stride = 0; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : - I40E_TX_DESC_CMD_EOP) << - I40E_TXD_QW1_CMD_SHIFT); + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; /* notify HW of packet */ - if (!tail_bump) { - prefetchw(tx_desc + 1); - } else { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); + if (desc_count) { writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); } + return; dma_error: @@ -2915,8 +2927,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, count = i40e_xmit_descriptor_count(skb); if (i40e_chk_linearize(skb, count)) { - if (__skb_linearize(skb)) - goto out_drop; + if (__skb_linearize(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } count = i40e_txd_use_count(skb->len); tx_ring->tx_stats.tx_linearize++; } @@ -2932,6 +2946,12 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_bi[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) goto out_drop; @@ -2939,16 +2959,13 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, /* obtain protocol of skb */ protocol = vlan_get_protocol(skb); - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_bi[tx_ring->next_to_use]; - /* setup IPv4/IPv6 offloads */ if (protocol == htons(ETH_P_IP)) tx_flags |= I40E_TX_FLAGS_IPV4; else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); + tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2986,7 +3003,8 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; return NETDEV_TX_OK; } |
