diff options
43 files changed, 773 insertions, 403 deletions
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index dc7961b33076..f55aa280d34f 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -14,7 +14,10 @@ Required properties for all the ethernet interfaces: - "enet_csr": Ethernet control and status register address space - "ring_csr": Descriptor ring control and status register address space - "ring_cmd": Descriptor ring command register address space -- interrupts: Ethernet main interrupt +- interrupts: Two interrupt specifiers can be specified. + - First is the Rx interrupt. This irq is mandatory. + - Second is the Tx completion interrupt. + This is supported only on SGMII based 1GbE and 10GbE interfaces. - port-id: Port number (0 or 1) - clocks: Reference to the clock entry. - local-mac-address: MAC address assigned to this device diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index c1eb6911e539..e74f6e0a208c 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -638,7 +638,8 @@ <0x0 0x1f200000 0x0 0Xc300>, <0x0 0x1B000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0xA0 0x4>; + interrupts = <0x0 0xA0 0x4>, + <0x0 0xA1 0x4>; dma-coherent; clocks = <&sge0clk 0>; local-mac-address = [00 00 00 00 00 00]; @@ -652,7 +653,8 @@ <0x0 0x1f200000 0x0 0Xc300>, <0x0 0x1B000000 0x0 0X8000>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0xAC 0x4>; + interrupts = <0x0 0xAC 0x4>, + <0x0 0xAD 0x4>; port-id = <1>; dma-coherent; clocks = <&sge1clk 0>; @@ -667,7 +669,8 @@ <0x0 0x1f600000 0x0 0Xc300>, <0x0 0x18000000 0x0 0X200>; reg-names = "enet_csr", "ring_csr", "ring_cmd"; - interrupts = <0x0 0x60 0x4>; + interrupts = <0x0 0x60 0x4>, + <0x0 0x61 0x4>; dma-coherent; clocks = <&xge0clk 0>; /* mac address will be overwritten by the bootloader */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 343ea7c987aa..ff95d15a2384 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -57,7 +57,6 @@ enum interruption_class { IRQIO_TAP, IRQIO_VMR, IRQIO_LCS, - IRQIO_CLW, IRQIO_CTC, IRQIO_APB, IRQIO_ADM, diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 8276f21ea7be..60496d405ebf 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -537,7 +537,7 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, while (iov_iter_count(&msg->msg_iter)) { struct skcipher_async_rsgl *rsgl; - unsigned long used; + int used; if (!ctx->used) { err = skcipher_wait_for_data(sk, flags); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 6146a993a136..40d3530d7f30 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -428,13 +428,23 @@ static int xgene_enet_register_irq(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct device *dev = ndev_to_dev(ndev); + struct xgene_enet_desc_ring *ring; int ret; - ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq, - IRQF_SHARED, ndev->name, pdata->rx_ring); - if (ret) { - netdev_err(ndev, "rx%d interrupt request failed\n", - pdata->rx_ring->irq); + ring = pdata->rx_ring; + ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ring->irq_name, ring); + if (ret) + netdev_err(ndev, "Failed to request irq %s\n", ring->irq_name); + + if (pdata->cq_cnt) { + ring = pdata->tx_ring->cp_ring; + ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ring->irq_name, ring); + if (ret) { + netdev_err(ndev, "Failed to request irq %s\n", + ring->irq_name); + } } return ret; @@ -448,6 +458,37 @@ static void xgene_enet_free_irq(struct net_device *ndev) pdata = netdev_priv(ndev); dev = ndev_to_dev(ndev); devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring); + + if (pdata->cq_cnt) { + devm_free_irq(dev, pdata->tx_ring->cp_ring->irq, + pdata->tx_ring->cp_ring); + } +} + +static void xgene_enet_napi_enable(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + napi_enable(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + napi_enable(napi); + } +} + +static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + napi_disable(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + napi_disable(napi); + } } static int xgene_enet_open(struct net_device *ndev) @@ -462,7 +503,7 @@ static int xgene_enet_open(struct net_device *ndev) ret = xgene_enet_register_irq(ndev); if (ret) return ret; - napi_enable(&pdata->rx_ring->napi); + xgene_enet_napi_enable(pdata); if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) phy_start(pdata->phy_dev); @@ -486,7 +527,7 @@ static int xgene_enet_close(struct net_device *ndev) else cancel_delayed_work_sync(&pdata->link_work); - napi_disable(&pdata->rx_ring->napi); + xgene_enet_napi_disable(pdata); xgene_enet_free_irq(ndev); xgene_enet_process_ring(pdata->rx_ring, -1); @@ -580,6 +621,8 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) if (ring) { if (ring->cp_ring && ring->cp_ring->cp_skb) devm_kfree(dev, ring->cp_ring->cp_skb); + if (ring->cp_ring && pdata->cq_cnt) + xgene_enet_free_desc_ring(ring->cp_ring); xgene_enet_free_desc_ring(ring); } @@ -673,6 +716,12 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) rx_ring->nbufpool = NUM_BUFPOOL; rx_ring->buf_pool = buf_pool; rx_ring->irq = pdata->rx_irq; + if (!pdata->cq_cnt) { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc", + ndev->name); + } else { + snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx", ndev->name); + } buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots, sizeof(struct sk_buff *), GFP_KERNEL); if (!buf_pool->rx_skb) { @@ -694,7 +743,22 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) } pdata->tx_ring = tx_ring; - cp_ring = pdata->rx_ring; + if (!pdata->cq_cnt) { + cp_ring = pdata->rx_ring; + } else { + /* allocate tx completion descriptor ring */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); + cp_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, + ring_id); + if (!cp_ring) { + ret = -ENOMEM; + goto err; + } + cp_ring->irq = pdata->txc_irq; + snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc", ndev->name); + } + cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots, sizeof(struct sk_buff *), GFP_KERNEL); if (!cp_ring->cp_skb) { @@ -853,14 +917,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) return -ENOMEM; } - ret = platform_get_irq(pdev, 0); - if (ret <= 0) { - dev_err(dev, "Unable to get ENET Rx IRQ\n"); - ret = ret ? : -ENXIO; - return ret; - } - pdata->rx_irq = ret; - ret = xgene_get_port_id(dev, pdata); if (ret) return ret; @@ -882,6 +938,24 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) return -ENODEV; } + ret = platform_get_irq(pdev, 0); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET Rx IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->rx_irq = ret; + + if (pdata->phy_mode != PHY_INTERFACE_MODE_RGMII) { + ret = platform_get_irq(pdev, 1); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET Tx completion IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->txc_irq = ret; + } + pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { /* Firmware may have set up the clock already. */ @@ -950,11 +1024,13 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) pdata->mac_ops = &xgene_sgmac_ops; pdata->port_ops = &xgene_sgport_ops; pdata->rm = RM1; + pdata->cq_cnt = XGENE_MAX_TXC_RINGS; break; default: pdata->mac_ops = &xgene_xgmac_ops; pdata->port_ops = &xgene_xgport_ops; pdata->rm = RM0; + pdata->cq_cnt = XGENE_MAX_TXC_RINGS; break; } @@ -977,12 +1053,38 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) } +static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + netif_napi_add(pdata->ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + netif_napi_add(pdata->ndev, napi, xgene_enet_napi, + NAPI_POLL_WEIGHT); + } +} + +static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata) +{ + struct napi_struct *napi; + + napi = &pdata->rx_ring->napi; + netif_napi_del(napi); + + if (pdata->cq_cnt) { + napi = &pdata->tx_ring->cp_ring->napi; + netif_napi_del(napi); + } +} + static int xgene_enet_probe(struct platform_device *pdev) { struct net_device *ndev; struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; - struct napi_struct *napi; struct xgene_mac_ops *mac_ops; int ret; @@ -1024,8 +1126,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (ret) goto err; - napi = &pdata->rx_ring->napi; - netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + xgene_enet_napi_add(pdata); mac_ops = pdata->mac_ops; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) ret = xgene_enet_mdio_config(pdata); @@ -1052,7 +1153,7 @@ static int xgene_enet_remove(struct platform_device *pdev) mac_ops->rx_disable(pdata); mac_ops->tx_disable(pdata); - netif_napi_del(&pdata->rx_ring->napi); + xgene_enet_napi_del(pdata); xgene_enet_mdio_remove(pdata); xgene_enet_delete_desc_rings(pdata); unregister_netdev(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index b93ed21a157f..8f3d232b09bc 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -51,6 +51,9 @@ #define START_BP_BUFNUM_1 0x2A #define START_RING_NUM_1 264 +#define IRQ_ID_SIZE 16 +#define XGENE_MAX_TXC_RINGS 1 + #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) @@ -63,6 +66,7 @@ struct xgene_enet_desc_ring { u16 tail; u16 slots; u16 irq; + char irq_name[IRQ_ID_SIZE]; u32 size; u32 state[NUM_RING_CONFIG]; void __iomem *cmd_base; @@ -117,6 +121,8 @@ struct xgene_enet_pdata { u32 cp_qcnt_hi; u32 cp_qcnt_low; u32 rx_irq; + u32 txc_irq; + u8 cq_cnt; void __iomem *eth_csr_addr; void __iomem *eth_ring_if_addr; void __iomem *eth_diag_csr_addr; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c38d5429e27a..31e14079e1d7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -964,36 +964,58 @@ static void bcmgenet_free_cb(struct enet_cb *cb) dma_unmap_addr_set(cb, dma_addr, 0); } -static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_rx_ring16_int_disable(struct bcmgenet_rx_ring *ring) { - bcmgenet_intrl2_0_writel(priv, + bcmgenet_intrl2_0_writel(ring->priv, + UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE, + INTRL2_CPU_MASK_SET); +} + +static inline void bcmgenet_rx_ring16_int_enable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_0_writel(ring->priv, + UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE, + INTRL2_CPU_MASK_CLEAR); +} + +static inline void bcmgenet_rx_ring_int_disable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_1_writel(ring->priv, + 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index), + INTRL2_CPU_MASK_SET); +} + +static inline void bcmgenet_rx_ring_int_enable(struct bcmgenet_rx_ring *ring) +{ + bcmgenet_intrl2_1_writel(ring->priv, + 1 << (UMAC_IRQ1_RX_INTR_SHIFT + ring->index), + INTRL2_CPU_MASK_CLEAR); +} + +static inline void bcmgenet_tx_ring16_int_disable(struct bcmgenet_tx_ring *ring) +{ + bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE, INTRL2_CPU_MASK_SET); } -static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring16_int_enable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_0_writel(priv, + bcmgenet_intrl2_0_writel(ring->priv, UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE, INTRL2_CPU_MASK_CLEAR); } -static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring_int_enable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_1_writel(priv, (1 << ring->index), + bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index, INTRL2_CPU_MASK_CLEAR); - priv->int1_mask &= ~(1 << ring->index); } -static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *ring) +static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_tx_ring *ring) { - bcmgenet_intrl2_1_writel(priv, (1 << ring->index), + bcmgenet_intrl2_1_writel(ring->priv, 1 << ring->index, INTRL2_CPU_MASK_SET); - priv->int1_mask |= (1 << ring->index); } /* Unlocked version of the reclaim routine */ @@ -1085,7 +1107,7 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) if (work_done == 0) { napi_complete(napi); - ring->int_enable(ring->priv, ring); + ring->int_enable(ring); return 0; } @@ -1396,11 +1418,10 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, /* bcmgenet_desc_rx - descriptor based rx process. * this could be called from bottom half, or from NAPI polling method. */ -static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, - unsigned int index, +static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int budget) { - struct bcmgenet_rx_ring *ring = &priv->rx_rings[index]; + struct bcmgenet_priv *priv = ring->priv; struct net_device *dev = priv->dev; struct enet_cb *cb; struct sk_buff *skb; @@ -1412,7 +1433,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, unsigned int discards; unsigned int chksum_ok = 0; - p_index = bcmgenet_rdma_ring_readl(priv, index, RDMA_PROD_INDEX); + p_index = bcmgenet_rdma_ring_readl(priv, ring->index, RDMA_PROD_INDEX); discards = (p_index >> DMA_P_INDEX_DISCARD_CNT_SHIFT) & DMA_P_INDEX_DISCARD_CNT_MASK; @@ -1425,7 +1446,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, /* Clear HW register when we reach 75% of maximum 0xFFFF */ if (ring->old_discards >= 0xC000) { ring->old_discards = 0; - bcmgenet_rdma_ring_writel(priv, index, 0, + bcmgenet_rdma_ring_writel(priv, ring->index, 0, RDMA_PROD_INDEX); } } @@ -1533,7 +1554,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_priv *priv, dev->stats.multicast++; /* Notify kernel */ - napi_gro_receive(&priv->napi, skb); + napi_gro_receive(&ring->napi, skb); netif_dbg(priv, rx_status, dev, "pushed up to kernel\n"); next: @@ -1544,12 +1565,29 @@ next: ring->read_ptr = ring->cb_ptr; ring->c_index = (ring->c_index + 1) & DMA_C_INDEX_MASK; - bcmgenet_rdma_ring_writel(priv, index, ring->c_index, RDMA_CONS_INDEX); + bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX); } return rxpktprocessed; } +/* Rx NAPI polling method */ +static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) +{ + struct bcmgenet_rx_ring *ring = container_of(napi, + struct bcmgenet_rx_ring, napi); + unsigned int work_done; + + work_done = bcmgenet_desc_rx(ring, budget); + + if (work_done < budget) { + napi_complete(napi); + ring->int_enable(ring); + } + + return work_done; +} + /* Assign skb to RX DMA descriptor. */ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv, struct bcmgenet_rx_ring *ring) @@ -1658,8 +1696,10 @@ static int init_umac(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; int ret; - u32 reg, cpu_mask_clear; - int index; + u32 reg; + u32 int0_enable = 0; + u32 int1_enable = 0; + int i; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); @@ -1686,15 +1726,17 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intr_disable(priv); - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE; + /* Enable Rx default queue 16 interrupts */ + int0_enable |= (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE); - dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); + /* Enable Tx default queue 16 interrupts */ + int0_enable |= (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE); /* Monitor cable plug/unplugged event for internal PHY */ if (phy_is_internal(priv->phydev)) { - cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); + int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); } else if (priv->ext_phy) { - cpu_mask_clear |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); + int0_enable |= (UMAC_IRQ_LINK_DOWN | UMAC_IRQ_LINK_UP); } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { reg = bcmgenet_bp_mc_get(priv); reg |= BIT(priv->hw_params->bp_in_en_shift); @@ -1709,13 +1751,18 @@ static int init_umac(struct bcmgenet_priv *priv) /* Enable MDIO interrupts on GENET v3+ */ if (priv->hw_params->flags & GENET_HAS_MDIO_INTR) - cpu_mask_clear |= UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR; + int0_enable |= (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); + + /* Enable Rx priority queue interrupts */ + for (i = 0; i < priv->hw_params->rx_queues; ++i) + int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i)); - bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); + /* Enable Tx priority queue interrupts */ + for (i = 0; i < priv->hw_params->tx_queues; ++i) + int1_enable |= (1 << i); - for (index = 0; index < priv->hw_params->tx_queues; index++) - bcmgenet_intrl2_1_writel(priv, (1 << index), - INTRL2_CPU_MASK_CLEAR); + bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); + bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); /* Enable rx/tx engine.*/ dev_dbg(kdev, "done init umac\n"); @@ -1734,7 +1781,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); ring->index = index; if (index == DESC_INDEX) { ring->queue = 0; @@ -1778,17 +1824,6 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); - - napi_enable(&ring->napi); -} - -static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv, - unsigned int index) -{ - struct bcmgenet_tx_ring *ring = &priv->tx_rings[index]; - - napi_disable(&ring->napi); - netif_napi_del(&ring->napi); } /* Initialize a RDMA ring */ @@ -1800,7 +1835,15 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, u32 words_per_bd = WORDS_PER_BD(priv); int ret; + ring->priv = priv; ring->index = index; + if (index == DESC_INDEX) { + ring->int_enable = bcmgenet_rx_ring16_int_enable; + ring->int_disable = bcmgenet_rx_ring16_int_disable; + } else { + ring->int_enable = bcmgenet_rx_ring_int_enable; + ring->int_disable = bcmgenet_rx_ring_int_disable; + } ring->cbs = priv->rx_cbs + start_ptr; ring->size = size; ring->c_index = 0; @@ -1836,6 +1879,62 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, return ret; } +static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + } + + ring = &priv->tx_rings[DESC_INDEX]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); +} + +static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + napi_enable(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + napi_enable(&ring->napi); +} + +static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + napi_disable(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + napi_disable(&ring->napi); +} + +static void bcmgenet_fini_tx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_tx_ring *ring; + + for (i = 0; i < priv->hw_params->tx_queues; ++i) { + ring = &priv->tx_rings[i]; + netif_napi_del(&ring->napi); + } + + ring = &priv->tx_rings[DESC_INDEX]; + netif_napi_del(&ring->napi); +} + /* Initialize Tx queues * * Queues 0-3 are priority-based, each one has 32 descriptors, @@ -1896,6 +1995,9 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1); bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2); + /* Initialize Tx NAPI */ + bcmgenet_init_tx_napi(priv); + /* Enable Tx queues */ bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -1905,6 +2007,62 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); } +static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); + } + + ring = &priv->rx_rings[DESC_INDEX]; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); +} + +static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + napi_enable(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + napi_enable(&ring->napi); +} + +static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + napi_disable(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + napi_disable(&ring->napi); +} + +static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv) +{ + unsigned int i; + struct bcmgenet_rx_ring *ring; + + for (i = 0; i < priv->hw_params->rx_queues; ++i) { + ring = &priv->rx_rings[i]; + netif_napi_del(&ring->napi); + } + + ring = &priv->rx_rings[DESC_INDEX]; + netif_napi_del(&ring->napi); +} + /* Initialize Rx queues * * Queues 0-15 are priority queues. Hardware Filtering Block (HFB) can be @@ -1954,6 +2112,9 @@ static int bcmgenet_init_rx_queues(struct net_device *dev) ring_cfg |= (1 << DESC_INDEX); dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); + /* Initialize Rx NAPI */ + bcmgenet_init_rx_napi(priv); + /* Enable rings */ bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -2037,12 +2198,8 @@ static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv) static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) { - int i; - - bcmgenet_fini_tx_ring(priv, DESC_INDEX); - - for (i = 0; i < priv->hw_params->tx_queues; i++) - bcmgenet_fini_tx_ring(priv, i); + bcmgenet_fini_rx_napi(priv); + bcmgenet_fini_tx_napi(priv); __bcmgenet_fini_dma(priv); } @@ -2056,9 +2213,6 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) netif_dbg(priv, hw, priv->dev, "%s\n", __func__); - /* Init rDma */ - bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - /* Initialize common Rx ring structures */ priv->rx_bds = priv->base + priv->hw_params->rdma_offset; priv->num_rx_bds = TOTAL_DESC; @@ -2072,25 +2226,13 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) cb->bd_addr = priv->rx_bds + i * DMA_DESC_SIZE; } - /* Initialize Rx queues */ - ret = bcmgenet_init_rx_queues(priv->dev); - if (ret) { - netdev_err(priv->dev, "failed to initialize Rx queues\n"); - bcmgenet_free_rx_buffers(priv); - kfree(priv->rx_cbs); - return ret; - } - - /* Init tDma */ - bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - /* Initialize common TX ring structures */ priv->tx_bds = priv->base + priv->hw_params->tdma_offset; priv->num_tx_bds = TOTAL_DESC; priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), GFP_KERNEL); if (!priv->tx_cbs) { - __bcmgenet_fini_dma(priv); + kfree(priv->rx_cbs); return -ENOMEM; } @@ -2099,28 +2241,26 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) cb->bd_addr = priv->tx_bds + i * DMA_DESC_SIZE; } - /* Initialize Tx queues */ - bcmgenet_init_tx_queues(priv->dev); - - return 0; -} + /* Init rDma */ + bcmgenet_rdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); -/* NAPI polling method*/ -static int bcmgenet_poll(struct napi_struct *napi, int budget) -{ - struct bcmgenet_priv *priv = container_of(napi, - struct bcmgenet_priv, napi); - unsigned int work_done; + /* Initialize Rx queues */ + ret = bcmgenet_init_rx_queues(priv->dev); + if (ret) { + netdev_err(priv->dev, "failed to initialize Rx queues\n"); + bcmgenet_free_rx_buffers(priv); + kfree(priv->rx_cbs); + kfree(priv->tx_cbs); + return ret; + } - work_done = bcmgenet_desc_rx(priv, DESC_INDEX, budget); + /* Init tDma */ + bcmgenet_tdma_writel(priv, DMA_MAX_BURST_LENGTH, DMA_SCB_BURST_SIZE); - if (work_done < budget) { - napi_complete(napi); - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE, - INTRL2_CPU_MASK_CLEAR); - } + /* Initialize Tx queues */ + bcmgenet_init_tx_queues(priv->dev); - return work_done; + return 0; } /* Interrupt bottom half */ @@ -2147,50 +2287,66 @@ static void bcmgenet_irq_task(struct work_struct *work) } } -/* bcmgenet_isr1: interrupt handler for ring buffer. */ +/* bcmgenet_isr1: handle Rx and Tx priority queues */ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; - struct bcmgenet_tx_ring *ring; + struct bcmgenet_rx_ring *rx_ring; + struct bcmgenet_tx_ring *tx_ring; unsigned int index; /* Save irq status for bottom-half processing. */ priv->irq1_stat = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); + /* clear interrupts */ bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); - /* Check the MBDONE interrupts. - * packet is done, reclaim descriptors - */ + /* Check Rx priority queue interrupts */ + for (index = 0; index < priv->hw_params->rx_queues; index++) { + if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) + continue; + + rx_ring = &priv->rx_rings[index]; + + if (likely(napi_schedule_prep(&rx_ring->napi))) { + rx_ring->int_disable(rx_ring); + __napi_schedule(&rx_ring->napi); + } + } + + /* Check Tx priority queue interrupts */ for (index = 0; index < priv->hw_params->tx_queues; index++) { if (!(priv->irq1_stat & BIT(index))) continue; - ring = &priv->tx_rings[index]; + tx_ring = &priv->tx_rings[index]; - if (likely(napi_schedule_prep(&ring->napi))) { - ring->int_disable(priv, ring); - __napi_schedule(&ring->napi); + if (likely(napi_schedule_prep(&tx_ring->napi))) { + tx_ring->int_disable(tx_ring); + __napi_schedule(&tx_ring->napi); } } return IRQ_HANDLED; } -/* bcmgenet_isr0: Handle various interrupts. */ +/* bcmgenet_isr0: handle Rx and Tx default queues + other stuff */ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; + struct bcmgenet_rx_ring *rx_ring; + struct bcmgenet_tx_ring *tx_ring; /* Save irq status for bottom-half processing. */ priv->irq0_stat = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); + /* clear interrupts */ bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); @@ -2198,25 +2354,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) "IRQ=0x%x\n", priv->irq0_stat); if (priv->irq0_stat & (UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_RXDMA_PDONE)) { - /* We use NAPI(software interrupt throttling, if - * Rx Descriptor throttling is not used. - * Disable interrupt, will be enabled in the poll method. - */ - if (likely(napi_schedule_prep(&priv->napi))) { - bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_RXDMA_BDONE, - INTRL2_CPU_MASK_SET); - __napi_schedule(&priv->napi); + rx_ring = &priv->rx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&rx_ring->napi))) { + rx_ring->int_disable(rx_ring); + __napi_schedule(&rx_ring->napi); } } - if (priv->irq0_stat & - (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { - struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX]; - if (likely(napi_schedule_prep(&ring->napi))) { - ring->int_disable(priv, ring); - __napi_schedule(&ring->napi); + if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { + tx_ring = &priv->tx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&tx_ring->napi))) { + tx_ring->int_disable(tx_ring); + __napi_schedule(&tx_ring->napi); } } + if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | UMAC_IRQ_PHY_DET_F | UMAC_IRQ_LINK_UP | @@ -2463,7 +2617,8 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ - napi_enable(&priv->napi); + bcmgenet_enable_rx_napi(priv); + bcmgenet_enable_tx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); @@ -2568,10 +2723,10 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - napi_disable(&priv->napi); phy_stop(priv->phydev); - bcmgenet_intr_disable(priv); + bcmgenet_disable_rx_napi(priv); + bcmgenet_disable_tx_napi(priv); /* Wait for pending work items to complete. Since interrupts are * disabled no new work will be scheduled. @@ -2972,7 +3127,6 @@ static int bcmgenet_probe(struct platform_device *pdev) dev->watchdog_timeo = 2 * HZ; dev->ethtool_ops = &bcmgenet_ethtool_ops; dev->netdev_ops = &bcmgenet_netdev_ops; - netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64); priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7a59879d441f..a834da1dfe4c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -310,6 +310,11 @@ struct bcmgenet_mib_counters { #define UMAC_IRQ_MDIO_DONE (1 << 23) #define UMAC_IRQ_MDIO_ERROR (1 << 24) +/* INTRL2 instance 1 definitions */ +#define UMAC_IRQ1_TX_INTR_MASK 0xFFFF +#define UMAC_IRQ1_RX_INTR_MASK 0xFFFF +#define UMAC_IRQ1_RX_INTR_SHIFT 16 + /* Register block offsets */ #define GENET_SYS_OFF 0x0000 #define GENET_GR_BRIDGE_OFF 0x0040 @@ -535,14 +540,13 @@ struct bcmgenet_tx_ring { unsigned int prod_index; /* Tx ring producer index SW copy */ unsigned int cb_ptr; /* Tx ring initial CB ptr */ unsigned int end_ptr; /* Tx ring end CB ptr */ - void (*int_enable)(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *); - void (*int_disable)(struct bcmgenet_priv *priv, - struct bcmgenet_tx_ring *); + void (*int_enable)(struct bcmgenet_tx_ring *); + void (*int_disable)(struct bcmgenet_tx_ring *); struct bcmgenet_priv *priv; }; struct bcmgenet_rx_ring { + struct napi_struct napi; /* Rx NAPI struct */ unsigned int index; /* Rx ring index */ struct enet_cb *cbs; /* Rx ring buffer control block */ unsigned int size; /* Rx ring size */ @@ -551,6 +555,9 @@ struct bcmgenet_rx_ring { unsigned int cb_ptr; /* Rx ring initial CB ptr */ unsigned int end_ptr; /* Rx ring end CB ptr */ unsigned int old_discards; + void (*int_enable)(struct bcmgenet_rx_ring *); + void (*int_disable)(struct bcmgenet_rx_ring *); + struct bcmgenet_priv *priv; }; /* device context */ @@ -558,11 +565,6 @@ struct bcmgenet_priv { void __iomem *base; enum bcmgenet_version version; struct net_device *dev; - u32 int0_mask; - u32 int1_mask; - - /* NAPI for descriptor based rx */ - struct napi_struct napi ____cacheline_aligned; /* transmit variables */ void __iomem *tx_bds; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c index 062d3c0b5818..6c8a62eefe51 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c @@ -46,17 +46,17 @@ bool cxgb_fcoe_sof_eof_supported(struct adapter *adap, struct sk_buff *skb) if ((sof != FC_SOF_I3) && (sof != FC_SOF_N3)) { dev_err(adap->pdev_dev, "Unsupported SOF 0x%x\n", sof); - return 0; + return false; } skb_copy_bits(skb, skb->len - 4, &eof, 1); if ((eof != FC_EOF_N) && (eof != FC_EOF_T)) { dev_err(adap->pdev_dev, "Unsupported EOF 0x%x\n", eof); - return 0; + return false; } - return 1; + return true; } /** diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index eb39673ed6a6..4b0494b9cc7c 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -30,11 +30,12 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> +#include <linux/cpumask.h> #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.4u" +#define DRV_VER "10.6.0.1" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -183,6 +184,7 @@ struct be_eq_obj { u16 spurious_intr; struct napi_struct napi; struct be_adapter *adapter; + cpumask_var_t affinity_mask; #ifdef CONFIG_NET_RX_BUSY_POLL #define BE_EQ_IDLE 0 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d8df78b6554d..5ff7fba9b67c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2342,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); } + free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2357,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); @@ -2448,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_tx_qs_create(struct be_adapter *adapter) { - struct be_queue_info *cq, *eq; + struct be_queue_info *cq; struct be_tx_obj *txo; + struct be_eq_obj *eqo; int status, i; adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter)); @@ -2467,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ - eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; - status = be_cmd_cq_create(adapter, cq, eq, false, 3); + eqo = &adapter->eq_obj[i % adapter->num_evt_qs]; + status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status; @@ -2480,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) status = be_cmd_txq_create(adapter, txo); if (status) return status; + + netif_set_xps_queue(adapter->netdev, eqo->affinity_mask, + eqo->idx); } dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", @@ -3028,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter) status = request_irq(vec, be_msix, 0, eqo->desc, eqo); if (status) goto err_msix; + + irq_set_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -3072,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; - int i; + int i, vec; if (!adapter->isr_registered) return; @@ -3084,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter) } /* MSIx */ - for_all_evt_queues(adapter, eqo, i) - free_irq(be_msix_vec_get(adapter, eqo), eqo); + for_all_evt_queues(adapter, eqo, i) { + vec = be_msix_vec_get(adapter, eqo); + irq_set_affinity_hint(vec, NULL); + free_irq(vec, eqo); + } done: adapter->isr_registered = false; diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 1dc628ffce2b..e3bfbd4d0136 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) nf_reset(skb); skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(skb, NULL); + ip_select_ident(sock_net(sk), skb, NULL); ip_send_check(iph); ip_local_out(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4e84236b62ce..a829930dac15 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -749,9 +749,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget) { struct receive_queue *rq = container_of(napi, struct receive_queue, napi); - unsigned int r, received = 0; + unsigned int r, received; - received += virtnet_receive(rq, budget - received); + received = virtnet_receive(rq, budget); /* Out of packets? */ if (received < budget) { diff --git a/include/net/ip.h b/include/net/ip.h index 025c61c0dffb..d0808a323763 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -318,9 +318,10 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) } u32 ip_idents_reserve(u32 hash, int segs); -void __ip_select_ident(struct iphdr *iph, int segs); +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) +static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, + struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -337,13 +338,14 @@ static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, in iph->id = 0; } } else { - __ip_select_ident(iph, segs); + __ip_select_ident(net, iph, segs); } } -static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +static inline void ip_select_ident(struct net *net, struct sk_buff *skb, + struct sock *sk) { - ip_select_ident_segs(skb, sk, 1); + ip_select_ident_segs(net, skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e7ba9758a345..65142e6af440 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); -void ipv6_proxy_select_ident(struct sk_buff *skb); +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt); +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 856f01cb51dd..c56a438c3a1e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -166,6 +166,9 @@ void sctp_remaddr_proc_exit(struct net *net); */ extern struct kmem_cache *sctp_chunk_cachep __read_mostly; extern struct kmem_cache *sctp_bucket_cachep __read_mostly; +extern long sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; /* * Section: Macros, externs, and inlines diff --git a/include/net/tcp.h b/include/net/tcp.h index 42690daa924e..963303fb96ae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -529,8 +529,6 @@ int tcp_write_wakeup(struct sock *); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); -bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb, - const char *proto); void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f0af7aa331c1..39555f3f263b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -830,12 +830,13 @@ static bool dsa_slave_dev_check(struct net_device *dev) static int dsa_slave_master_changed(struct net_device *dev) { struct net_device *master = netdev_master_upper_dev_get(dev); + struct dsa_slave_priv *p = netdev_priv(dev); int err = 0; if (master && master->rtnl_link_ops && !strcmp(master->rtnl_link_ops->kind, "bridge")) err = dsa_slave_bridge_port_join(dev, master); - else + else if (dsa_port_is_bridged(p)) err = dsa_slave_bridge_port_leave(dev); return err; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ad3f866085de..ad09213ac5b2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -370,7 +370,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 90b49e88e84a..8259e777b249 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, sk); + ip_select_ident(sock_net(sk), skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -430,7 +430,8 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); + ip_select_ident_segs(sock_net(sk), skb, sk, + skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -1379,7 +1380,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, sk); + ip_select_ident(net, skb, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 88c386cf7d85..8c4dcc46acd2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); + __ip_select_ident(sock_net(sk), iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5b188832800f..c688cd1b2110 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1642,7 +1642,8 @@ static struct notifier_block ip_mr_notifier = { * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) +static void ip_encap(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr) { struct iphdr *iph; const struct iphdr *old_iph = ip_hdr(skb); @@ -1661,7 +1662,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1758,7 +1759,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * What do we do with netfilter? -- RR */ if (vif->flags & VIFF_TUNNEL) { - ip_encap(skb, vif->local, vif->remote); + ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 923cf538fce1..56946f47d446 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -404,7 +404,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 649c8a3f0189..be8703d02ef0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -482,7 +482,7 @@ u32 ip_idents_reserve(u32 hash, int segs) } EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, int segs) +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; @@ -491,7 +491,7 @@ void __ip_select_ident(struct iphdr *iph, int segs) hash = jhash_3words((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol, + iph->protocol ^ net_hash_mix(net), ip_idents_hashrnd); id = ip_idents_reserve(hash, segs); iph->id = htons(id); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 023196f7ec37..18b80e8bc533 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5987,6 +5987,35 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, } EXPORT_SYMBOL(inet_reqsk_alloc); +/* + * Return true if a syncookie should be sent + */ +static bool tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) +{ + const char *msg = "Dropping request"; + bool want_cookie = false; + struct listen_sock *lopt; + +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + msg = "Sending cookies"; + want_cookie = true; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else +#endif + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); + + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e073517b2cc7..5aababa20a21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -856,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -/* - * Return true if a syncookie should be sent - */ -bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto) -{ - const char *msg = "Dropping request"; - bool want_cookie = false; - struct listen_sock *lopt; - -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - msg = "Sending cookies"; - want_cookie = true; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); - } else -#endif - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); - } - return want_cookie; -} -EXPORT_SYMBOL(tcp_syn_flood_action); #ifdef CONFIG_TCP_MD5SIG /* diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 91771a7c802f..35feda676464 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -63,7 +63,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - ip_select_ident(skb, NULL); + ip_select_ident(dev_net(dst->dev), skb, NULL); return 0; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 27ca79682efb..273eb26cd6d4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -299,19 +299,16 @@ static int __net_init fib6_rules_net_init(struct net *net) ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); - net->ipv6.fib6_rules_ops = ops; - - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, - 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) goto out_fib6_rules_ops; + net->ipv6.fib6_rules_ops = ops; out: return err; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e80b61b51ff..b06ad00048d5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -628,7 +628,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -775,7 +775,7 @@ slow_path: fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -1079,7 +1079,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); + ipv6_select_ident(sock_net(sk), &fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; append: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 74581f706c4d..4016a6ef9d61 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,13 +9,14 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, - struct in6_addr *src) +static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, + struct in6_addr *dst, struct in6_addr *src) { u32 hash, id; hash = __ipv6_addr_jhash(dst, hashrnd); hash = __ipv6_addr_jhash(src, hash); + hash ^= net_hash_mix(net); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -36,7 +37,7 @@ static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, * * The network header must be set before calling this. */ -void ipv6_proxy_select_ident(struct sk_buff *skb) +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; @@ -53,20 +54,21 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, + id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr, &rt->rt6i_src.addr); fhdr->identification = htonl(id); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index be2c0ba82c85..7441e1e63893 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -54,7 +54,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Set the IPv6 fragment id if not set yet */ if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); segs = NULL; goto out; @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr->nexthdr = nexthdr; fptr->reserved = 0; if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index f35c15b0de6b..bf02932b7188 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -924,7 +924,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = ttl; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f1a65398f311..f09de7fac2e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -102,11 +102,6 @@ static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -extern struct kmem_cache *sctp_bucket_cachep; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; struct percpu_counter sctp_sockets_allocated; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2e9ada10fd84..26d50c565f54 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -58,10 +58,6 @@ static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; -extern long sysctl_sctp_mem[3]; -extern int sysctl_sctp_rmem[3]; -extern int sysctl_sctp_wmem[3]; - static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 5aff0844d4d3..ae558dd7f8ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -156,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -350,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -476,17 +454,18 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -523,11 +502,13 @@ receive: tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) - goto unlock; tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) { + tipc_bclink_unlock(net); + goto unlock; + } bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; @@ -589,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } @@ -829,7 +811,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->queue_limit[0])) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; nla_nest_end(msg->skb, prop); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 169f3dd038b9..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -260,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index 8c98c4d00ad6..514466efc25c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -139,6 +139,13 @@ static void tipc_link_put(struct tipc_link *l_ptr) kref_put(&l_ptr->ref, tipc_link_release); } +static struct tipc_link *tipc_parallel_link(struct tipc_link *l) +{ + if (l->owner->active_links[0] != l) + return l->owner->active_links[0]; + return l->owner->active_links[1]; +} + static void link_init_max_pkt(struct tipc_link *l_ptr) { struct tipc_node *node = l_ptr->owner; @@ -310,7 +317,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, link_init_max_pkt(l_ptr); l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->next_out_no = 1; __skb_queue_head_init(&l_ptr->transmq); __skb_queue_head_init(&l_ptr->backlogq); @@ -368,28 +374,43 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, } /** - * link_schedule_user - schedule user for wakeup after congestion + * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link - * @oport: sending port - * @chain_sz: size of buffer chain that was attempted sent - * @imp: importance of message attempted sent + * @list: message that was attempted sent * Create pseudo msg to send back to user when congestion abates + * Only consumes message if there is an error */ -static bool link_schedule_user(struct tipc_link *link, u32 oport, - uint chain_sz, uint imp) +static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) { - struct sk_buff *buf; + struct tipc_msg *msg = buf_msg(skb_peek(list)); + int imp = msg_importance(msg); + u32 oport = msg_origport(msg); + u32 addr = link_own_addr(link); + struct sk_buff *skb; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, - link_own_addr(link), link_own_addr(link), - oport, 0, 0); - if (!buf) - return false; - TIPC_SKB_CB(buf)->chain_sz = chain_sz; - TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->wakeupq, buf); + /* This really cannot happen... */ + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + goto err; + } + /* Non-blocking sender: */ + if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) + return -ELINKCONG; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + addr, addr, oport, 0, 0); + if (!skb) + goto err; + TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + TIPC_SKB_CB(skb)->chain_imp = imp; + skb_queue_tail(&link->wakeupq, skb); link->stats.link_congs++; - return true; + return -ELINKCONG; +err: + __skb_queue_purge(list); + return -ENOBUFS; } /** @@ -398,19 +419,22 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *l) { - uint pend_qsz = skb_queue_len(&link->backlogq); + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->wakeupq, skb, tmp) { - if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) + skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = l->window + l->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + l->backlog[imp].len) >= lim) break; - pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->wakeupq); - skb_queue_tail(&link->inputq, skb); - link->owner->inputq = &link->inputq; - link->owner->action_flags |= TIPC_MSG_EVT; + skb_unlink(skb, &l->wakeupq); + skb_queue_tail(&l->inputq, skb); + l->owner->inputq = &l->inputq; + l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -424,6 +448,16 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; } +static void tipc_link_purge_backlog(struct tipc_link *l) +{ + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; +} + /** * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link @@ -432,7 +466,7 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) { __skb_queue_purge(&l_ptr->deferdq); __skb_queue_purge(&l_ptr->transmq); - __skb_queue_purge(&l_ptr->backlogq); + tipc_link_purge_backlog(l_ptr); tipc_link_reset_fragments(l_ptr); } @@ -466,13 +500,13 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->transmq); - __skb_queue_purge(&l_ptr->backlogq); __skb_queue_purge(&l_ptr->deferdq); if (!owner->inputq) owner->inputq = &l_ptr->inputq; skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; + tipc_link_purge_backlog(l_ptr); l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; @@ -696,48 +730,15 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* tipc_link_cong: determine return value and how to treat the - * sent buffer during link congestion. - * - For plain, errorless user data messages we keep the buffer and - * return -ELINKONG. - * - For all other messages we discard the buffer and return -EHOSTUNREACH - * - For TIPC internal messages we also reset the link - */ -static int tipc_link_cong(struct tipc_link *link, struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek(list); - struct tipc_msg *msg = buf_msg(skb); - int imp = msg_importance(msg); - u32 oport = msg_tot_origport(msg); - - if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); - tipc_link_reset(link); - goto drop; - } - if (unlikely(msg_errcode(msg))) - goto drop; - if (unlikely(msg_reroute_cnt(msg))) - goto drop; - if (TIPC_SKB_CB(skb)->wakeup_pending) - return -ELINKCONG; - if (link_schedule_user(link, oport, skb_queue_len(list), imp)) - return -ELINKCONG; -drop: - __skb_queue_purge(list); - return -EHOSTUNREACH; -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use * @list: chain of buffers containing message * - * Consumes the buffer chain, except when returning -ELINKCONG - * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket - * user data messages) or -EHOSTUNREACH (all other messages/senders) - * Only the socket functions tipc_send_stream() and tipc_send_packet() need - * to act on the return value, since they may need to do more send attempts. + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted */ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list) @@ -754,16 +755,14 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; - /* Match queue limit against msg importance: */ - if (unlikely(skb_queue_len(backlogq) >= link->queue_limit[imp])) - return tipc_link_cong(link, list); + /* Match backlog limit against msg importance: */ + if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) + return link_schedule_user(link, list); - /* Has valid packet limit been used ? */ if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } - /* Prepare each packet for sending, and add to relevant queue: */ skb_queue_walk_safe(list, skb, tmp) { __skb_unlink(skb, list); @@ -786,8 +785,10 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; + imp = msg_importance(buf_msg(skb)); } __skb_queue_tail(backlogq, skb); + link->backlog[imp].len++; seqno++; } link->next_out_no = seqno; @@ -808,13 +809,25 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } +/* tipc_link_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; + int rc; skb2list(skb, &head); - return tipc_link_xmit(net, &head, dnode, selector); + rc = tipc_link_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; } /** @@ -841,6 +854,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; @@ -914,6 +928,7 @@ void tipc_link_push_packets(struct tipc_link *link) if (!skb) break; msg = buf_msg(skb); + link->backlog[msg_importance(msg)].len--; msg_set_ack(msg, ack); msg_set_bcast_ack(msg, link->owner->bclink.last_in); link->rcv_unacked = 0; @@ -966,7 +981,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -978,9 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } @@ -1019,6 +1031,32 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, } } +/* link_synch(): check if all packets arrived before the synch + * point have been consumed + * Returns true if the parallel links are synched, otherwise false + */ +static bool link_synch(struct tipc_link *l) +{ + unsigned int post_synch; + struct tipc_link *pl; + + pl = tipc_parallel_link(l); + if (pl == l) + goto synched; + + /* Was last pre-synch packet added to input queue ? */ + if (less_eq(pl->next_in_no, l->synch_point)) + return false; + + /* Is it still in the input queue ? */ + post_synch = mod(pl->next_in_no - l->synch_point) - 1; + if (skb_queue_len(&pl->inputq) > post_synch) + return false; +synched: + l->flags &= ~LINK_SYNCHING; + return true; +} + static void link_retrieve_defq(struct tipc_link *link, struct sk_buff_head *list) { @@ -1079,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1149,6 +1187,14 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; goto unlock; } + /* Synchronize with parallel link if applicable */ + if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + link_handle_out_of_seq_msg(l_ptr, skb); + if (link_synch(l_ptr)) + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); @@ -1160,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -1224,6 +1271,10 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) switch (msg_user(msg)) { case CHANGEOVER_PROTOCOL: + if (msg_dup(msg)) { + link->flags |= LINK_SYNCHING; + link->synch_point = msg_seqno(msg_get_wrapped(msg)); + } if (!tipc_link_tunnel_rcv(node, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { @@ -1610,6 +1661,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); msgcount = skb_queue_len(&l_ptr->transmq); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); @@ -1817,11 +1869,11 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); l->window = win; - l->queue_limit[TIPC_LOW_IMPORTANCE] = win / 2; - l->queue_limit[TIPC_MEDIUM_IMPORTANCE] = win; - l->queue_limit[TIPC_HIGH_IMPORTANCE] = win / 2 * 3; - l->queue_limit[TIPC_CRITICAL_IMPORTANCE] = win * 2; - l->queue_limit[TIPC_SYSTEM_IMPORTANCE] = max_bulk; + l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /* tipc_link_find_owner - locate owner node of link by link's name @@ -2120,7 +2172,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, - link->queue_limit[TIPC_LOW_IMPORTANCE])) + link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; @@ -2186,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2199,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2206,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/link.h b/net/tipc/link.h index eec3ecf2d450..d2b5663643da 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -60,6 +60,7 @@ */ #define LINK_STARTED 0x0001 #define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -118,7 +119,7 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') - * @queue_limit: outbound message queue congestion thresholds (indexed by user) + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset * @max_pkt: current maximum packet size for this link @@ -166,11 +167,11 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; - u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ u32 exp_msg_count; u32 reset_checkpoint; + u32 synch_point; /* Max packet negotiation */ u32 max_pkt; @@ -180,6 +181,10 @@ struct tipc_link { /* Sending */ struct sk_buff_head transmq; struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; u32 next_out_no; u32 window; u32 last_retransmitted; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index bd3969a80dd4..d273207ede28 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -240,6 +240,15 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} /* * Word 1 @@ -372,6 +381,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); return msg_word(m, 4); } @@ -467,16 +478,6 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) msg_set_word(m, 10, n); } -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) -{ - return (struct tipc_msg *)msg_data(m); -} - /* * Constants and routines used to read and write TIPC internal message headers */ @@ -553,6 +554,14 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 15, 0x1fff, n); } +static inline bool msg_dup(struct tipc_msg *m) +{ + if (likely(msg_user(m) != CHANGEOVER_PROTOCOL)) + return false; + if (msg_type(m) != DUPLICATE_MSG) + return false; + return true; +} /* * Word 2 @@ -753,13 +762,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline u32 msg_tot_origport(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_origport(msg_get_wrapped(m)); - return msg_origport(m); -} - struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 506aaa565da7..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 26d1de1bf34d..3e4f04897c03 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,6 +125,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -459,7 +492,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +521,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ @@ -542,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index e89ac04ec2c3..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** @@ -93,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -114,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -136,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -170,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } |