summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-09-28 20:56:02 -0700
committerDavid S. Miller <davem@davemloft.net>2015-09-28 20:56:02 -0700
commiteae93fe4ff88ec0979a00c440a1aa63f92c8f367 (patch)
tree174c92ecfef8b0030684b90a98edd0222f2f4ad6 /drivers/net/ethernet/intel/i40e/i40e_txrx.c
parent34c2d9fb0498c066afbe610b15e18995fd8be792 (diff)
parentcbfe360a1541a32e9e28f8f8ac925d2b7979d767 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says: ==================== Intel Wired LAN Driver Updates 2015-09-28 This series contains updates to i40e, i40evf and igb to resolve issues seen and reported by Red Hat. Kiran moves i40e_get_head() in preparation for the refactor of the Tx timeout logic, so that it can be used in other areas of the driver. Refactored the driver timeout logic by issuing a writeback request via a software interrupt to the hardware the first time the driver detects a hang. This was due to the driver being too aggressive in resetting a hung queue. Shannon adds the GRE protocol to the transmit checksum encoding. Anjali fixes an issue of forcing writeback too often, which caused us to not benefit from NAPI. We now disable force writeback in the clean routine for X710 and XL710 adapters. The X722 adapters do not enable interrupt to force a writeback and benefit from WB_ON_ITR and so force WB is left enabled for those adapters. Fixed a possible deadlock issue where sync_vsi_filters() can be called directly under RTNL or through the timer subtask without RTNL. So update the flow to see if we are already under RTNL before trying to grab it. Stefan Assmann provides a fix for igb where SR-IOV was not getting enabled properly and we ran into a NULL pointer if the max_vfs module parameter is specified. This is prevented by setting the IGB_FLAG_HAS_MSIX bit before calling igb_probe_vfs(). v2: added "i40e: Fix for recursive RTNL lock during PROMISC change" patch to the series, as it resolves another issues seen and reported by Red Hat. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c214
1 files changed, 87 insertions, 127 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 738aca68f665..3ce4900c0c43 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -601,27 +601,13 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
}
/**
- * i40e_get_head - Retrieve head from head writeback
- * @tx_ring: tx ring to fetch head of
- *
- * Returns value of Tx ring head based on value stored
- * in head write-back location
- **/
-static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
-{
- void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
-
- return le32_to_cpu(*(volatile __le32 *)head);
-}
-
-/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-static u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring)
{
u32 head, tail;
@@ -635,50 +621,6 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring)
return 0;
}
-/**
- * i40e_check_tx_hang - Is there a hang in the Tx queue
- * @tx_ring: the ring of descriptors
- **/
-static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
-{
- u32 tx_done = tx_ring->stats.packets;
- u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
- u32 tx_pending = i40e_get_tx_pending(tx_ring);
- struct i40e_pf *pf = tx_ring->vsi->back;
- bool ret = false;
-
- clear_check_for_tx_hang(tx_ring);
-
- /* Check for a hung queue, but be thorough. This verifies
- * that a transmit has been completed since the previous
- * check AND there is at least one packet pending. The
- * ARMED bit is set to indicate a potential hang. The
- * bit is cleared if a pause frame is received to remove
- * false hang detection due to PFC or 802.3x frames. By
- * requiring this to fail twice we avoid races with
- * PFC clearing the ARMED bit and conditions where we
- * run the check_tx_hang logic with a transmit completion
- * pending but without time to complete it yet.
- */
- if ((tx_done_old == tx_done) && tx_pending) {
- /* make sure it is true for two checks in a row */
- ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
- &tx_ring->state);
- } else if (tx_done_old == tx_done &&
- (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
- if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
- dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
- tx_pending, tx_ring->queue_index);
- pf->tx_sluggish_count++;
- } else {
- /* update completed stats and disarm the hang check */
- tx_ring->tx_stats.tx_done_old = tx_done;
- clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
- }
-
- return ret;
-}
-
#define WB_STRIDE 0x3
/**
@@ -784,42 +726,21 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
- /* check to see if there are any non-cache aligned descriptors
- * waiting to be written back, and kick the hardware to force
- * them to be written back in case of napi polling
- */
- if (budget &&
- !((i & WB_STRIDE) == WB_STRIDE) &&
- !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
- (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
- tx_ring->arm_wb = true;
- else
- tx_ring->arm_wb = false;
-
- if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
- /* schedule immediate reset if we believe we hung */
- dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
- " VSI <%d>\n"
- " Tx Queue <%d>\n"
- " next_to_use <%x>\n"
- " next_to_clean <%x>\n",
- tx_ring->vsi->seid,
- tx_ring->queue_index,
- tx_ring->next_to_use, i);
-
- netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
-
- dev_info(tx_ring->dev,
- "tx hang detected on queue %d, reset requested\n",
- tx_ring->queue_index);
-
- /* do not fire the reset immediately, wait for the stack to
- * decide we are truly stuck, also prevents every queue from
- * simultaneously requesting a reset
+ if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+ unsigned int j = 0;
+
+ /* check to see if there are < 4 descriptors
+ * waiting to be written back, then kick the hardware to force
+ * them to be written back in case we stay in NAPI.
+ * In this mode on X722 we do not enable Interrupt.
*/
+ j = i40e_get_tx_pending(tx_ring);
- /* the adapter is about to reset, no point in enabling polling */
- budget = 1;
+ if (budget &&
+ ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
+ !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
+ (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+ tx_ring->arm_wb = true;
}
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
@@ -851,7 +772,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
* @q_vector: the vector on which to force writeback
*
**/
-static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
u16 flags = q_vector->tx.ring[0].flags;
@@ -2324,6 +2245,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
break;
+ case IPPROTO_GRE:
+ l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
+ break;
default:
return;
}
@@ -2581,6 +2505,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
u32 td_tag = 0;
dma_addr_t dma;
u16 gso_segs;
+ u16 desc_count = 0;
+ bool tail_bump = true;
+ bool do_rs = false;
if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2621,6 +2548,8 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
+ desc_count++;
+
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
@@ -2640,6 +2569,8 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
+ desc_count++;
+
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
@@ -2654,34 +2585,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_bi = &tx_ring->tx_bi[i];
}
- /* Place RS bit on last descriptor of any packet that spans across the
- * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
- */
- if (((i & WB_STRIDE) != WB_STRIDE) &&
- (first <= &tx_ring->tx_bi[i]) &&
- (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag) |
- cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
- I40E_TXD_QW1_CMD_SHIFT);
- } else {
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag) |
- cpu_to_le64((u64)I40E_TXD_CMD <<
- I40E_TXD_QW1_CMD_SHIFT);
- }
-
- netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->queue_index),
- first->bytecount);
-
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
-
/* set next_to_watch value indicating a packet is present */
first->next_to_watch = tx_desc;
@@ -2691,15 +2594,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_ring->next_to_use = i;
+ netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index),
+ first->bytecount);
i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ /* Algorithm to optimize tail and RS bit setting:
+ * if xmit_more is supported
+ * if xmit_more is true
+ * do not update tail and do not mark RS bit.
+ * if xmit_more is false and last xmit_more was false
+ * if every packet spanned less than 4 desc
+ * then set RS bit on 4th packet and update tail
+ * on every packet
+ * else
+ * update tail and set RS bit on every packet.
+ * if xmit_more is false and last_xmit_more was true
+ * update tail and set RS bit.
+ *
+ * Optimization: wmb to be issued only in case of tail update.
+ * Also optimize the Descriptor WB path for RS bit with the same
+ * algorithm.
+ *
+ * Note: If there are less than 4 packets
+ * pending and interrupts were disabled the service task will
+ * trigger a force WB.
+ */
+ if (skb->xmit_more &&
+ !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index))) {
+ tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+ tail_bump = false;
+ } else if (!skb->xmit_more &&
+ !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->queue_index)) &&
+ (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
+ (tx_ring->packet_stride < WB_STRIDE) &&
+ (desc_count < WB_STRIDE)) {
+ tx_ring->packet_stride++;
+ } else {
+ tx_ring->packet_stride = 0;
+ tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+ do_rs = true;
+ }
+ if (do_rs)
+ tx_ring->packet_stride = 0;
+
+ tx_desc->cmd_type_offset_bsz =
+ build_ctob(td_cmd, td_offset, size, td_tag) |
+ cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
+ I40E_TX_DESC_CMD_EOP) <<
+ I40E_TXD_QW1_CMD_SHIFT);
+
/* notify HW of packet */
- if (!skb->xmit_more ||
- netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->queue_index)))
- writel(i, tx_ring->tail);
- else
+ if (!tail_bump)
prefetchw(tx_desc + 1);
+ if (tail_bump) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, tx_ring->tail);
+ }
+
return;
dma_error: