Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
0002 
0003 /* Packet transmit logic for Mellanox Gigabit Ethernet driver
0004  *
0005  * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
0006  */
0007 
0008 #include <linux/skbuff.h>
0009 
0010 #include "mlxbf_gige.h"
0011 #include "mlxbf_gige_regs.h"
0012 
0013 /* Transmit Initialization
0014  * 1) Allocates TX WQE array using coherent DMA mapping
0015  * 2) Allocates TX completion counter using coherent DMA mapping
0016  */
0017 int mlxbf_gige_tx_init(struct mlxbf_gige *priv)
0018 {
0019     size_t size;
0020 
0021     size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
0022     priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size,
0023                            &priv->tx_wqe_base_dma,
0024                            GFP_KERNEL);
0025     if (!priv->tx_wqe_base)
0026         return -ENOMEM;
0027 
0028     priv->tx_wqe_next = priv->tx_wqe_base;
0029 
0030     /* Write TX WQE base address into MMIO reg */
0031     writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE);
0032 
0033     /* Allocate address for TX completion count */
0034     priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
0035                      &priv->tx_cc_dma, GFP_KERNEL);
0036     if (!priv->tx_cc) {
0037         dma_free_coherent(priv->dev, size,
0038                   priv->tx_wqe_base, priv->tx_wqe_base_dma);
0039         return -ENOMEM;
0040     }
0041 
0042     /* Write TX CC base address into MMIO reg */
0043     writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
0044 
0045     writeq(ilog2(priv->tx_q_entries),
0046            priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2);
0047 
0048     priv->prev_tx_ci = 0;
0049     priv->tx_pi = 0;
0050 
0051     return 0;
0052 }
0053 
0054 /* Transmit Deinitialization
0055  * This routine will free allocations done by mlxbf_gige_tx_init(),
0056  * namely the TX WQE array and the TX completion counter
0057  */
0058 void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv)
0059 {
0060     u64 *tx_wqe_addr;
0061     size_t size;
0062     int i;
0063 
0064     tx_wqe_addr = priv->tx_wqe_base;
0065 
0066     for (i = 0; i < priv->tx_q_entries; i++) {
0067         if (priv->tx_skb[i]) {
0068             dma_unmap_single(priv->dev, *tx_wqe_addr,
0069                      priv->tx_skb[i]->len, DMA_TO_DEVICE);
0070             dev_kfree_skb(priv->tx_skb[i]);
0071             priv->tx_skb[i] = NULL;
0072         }
0073         tx_wqe_addr += 2;
0074     }
0075 
0076     size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
0077     dma_free_coherent(priv->dev, size,
0078               priv->tx_wqe_base, priv->tx_wqe_base_dma);
0079 
0080     dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
0081               priv->tx_cc, priv->tx_cc_dma);
0082 
0083     priv->tx_wqe_base = NULL;
0084     priv->tx_wqe_base_dma = 0;
0085     priv->tx_cc = NULL;
0086     priv->tx_cc_dma = 0;
0087     priv->tx_wqe_next = NULL;
0088     writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE);
0089     writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
0090 }
0091 
0092 /* Function that returns status of TX ring:
0093  *          0: TX ring is full, i.e. there are no
0094  *             available un-used entries in TX ring.
0095  *   non-null: TX ring is not full, i.e. there are
0096  *             some available entries in TX ring.
0097  *             The non-null value is a measure of
0098  *             how many TX entries are available, but
0099  *             it is not the exact number of available
0100  *             entries (see below).
0101  *
0102  * The algorithm makes the assumption that if
0103  * (prev_tx_ci == tx_pi) then the TX ring is empty.
0104  * An empty ring actually has (tx_q_entries-1)
0105  * entries, which allows the algorithm to differentiate
0106  * the case of an empty ring vs. a full ring.
0107  */
0108 static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv)
0109 {
0110     unsigned long flags;
0111     u16 avail;
0112 
0113     spin_lock_irqsave(&priv->lock, flags);
0114 
0115     if (priv->prev_tx_ci == priv->tx_pi)
0116         avail = priv->tx_q_entries - 1;
0117     else
0118         avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi)
0119               % priv->tx_q_entries) - 1;
0120 
0121     spin_unlock_irqrestore(&priv->lock, flags);
0122 
0123     return avail;
0124 }
0125 
0126 bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv)
0127 {
0128     struct net_device_stats *stats;
0129     u16 tx_wqe_index;
0130     u64 *tx_wqe_addr;
0131     u64 tx_status;
0132     u16 tx_ci;
0133 
0134     tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS);
0135     if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL)
0136         priv->stats.tx_fifo_full++;
0137     tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX);
0138     stats = &priv->netdev->stats;
0139 
0140     /* Transmit completion logic needs to loop until the completion
0141      * index (in SW) equals TX consumer index (from HW).  These
0142      * parameters are unsigned 16-bit values and the wrap case needs
0143      * to be supported, that is TX consumer index wrapped from 0xFFFF
0144      * to 0 while TX completion index is still < 0xFFFF.
0145      */
0146     for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) {
0147         tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries;
0148         /* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX
0149          * buffer address and the 8 LSB contain information
0150          * about the TX WQE.
0151          */
0152         tx_wqe_addr = priv->tx_wqe_base +
0153                    (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS);
0154 
0155         stats->tx_packets++;
0156         stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr);
0157 
0158         dma_unmap_single(priv->dev, *tx_wqe_addr,
0159                  priv->tx_skb[tx_wqe_index]->len, DMA_TO_DEVICE);
0160         dev_consume_skb_any(priv->tx_skb[tx_wqe_index]);
0161         priv->tx_skb[tx_wqe_index] = NULL;
0162 
0163         /* Ensure completion of updates across all cores */
0164         mb();
0165     }
0166 
0167     /* Since the TX ring was likely just drained, check if TX queue
0168      * had previously been stopped and now that there are TX buffers
0169      * available the TX queue can be awakened.
0170      */
0171     if (netif_queue_stopped(priv->netdev) &&
0172         mlxbf_gige_tx_buffs_avail(priv))
0173         netif_wake_queue(priv->netdev);
0174 
0175     return true;
0176 }
0177 
0178 /* Function to advance the tx_wqe_next pointer to next TX WQE */
0179 void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv)
0180 {
0181     /* Advance tx_wqe_next pointer */
0182     priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS;
0183 
0184     /* Check if 'next' pointer is beyond end of TX ring */
0185     /* If so, set 'next' back to 'base' pointer of ring */
0186     if (priv->tx_wqe_next == (priv->tx_wqe_base +
0187                   (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS)))
0188         priv->tx_wqe_next = priv->tx_wqe_base;
0189 }
0190 
0191 netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
0192                   struct net_device *netdev)
0193 {
0194     struct mlxbf_gige *priv = netdev_priv(netdev);
0195     long buff_addr, start_dma_page, end_dma_page;
0196     struct sk_buff *tx_skb;
0197     dma_addr_t tx_buf_dma;
0198     unsigned long flags;
0199     u64 *tx_wqe_addr;
0200     u64 word2;
0201 
0202     /* If needed, linearize TX SKB as hardware DMA expects this */
0203     if (skb->len > MLXBF_GIGE_DEFAULT_BUF_SZ || skb_linearize(skb)) {
0204         dev_kfree_skb(skb);
0205         netdev->stats.tx_dropped++;
0206         return NETDEV_TX_OK;
0207     }
0208 
0209     buff_addr = (long)skb->data;
0210     start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT;
0211     end_dma_page   = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT;
0212 
0213     /* Verify that payload pointer and data length of SKB to be
0214      * transmitted does not violate the hardware DMA limitation.
0215      */
0216     if (start_dma_page != end_dma_page) {
0217         /* DMA operation would fail as-is, alloc new aligned SKB */
0218         tx_skb = mlxbf_gige_alloc_skb(priv, skb->len,
0219                           &tx_buf_dma, DMA_TO_DEVICE);
0220         if (!tx_skb) {
0221             /* Free original skb, could not alloc new aligned SKB */
0222             dev_kfree_skb(skb);
0223             netdev->stats.tx_dropped++;
0224             return NETDEV_TX_OK;
0225         }
0226 
0227         skb_put_data(tx_skb, skb->data, skb->len);
0228 
0229         /* Free the original SKB */
0230         dev_kfree_skb(skb);
0231     } else {
0232         tx_skb = skb;
0233         tx_buf_dma = dma_map_single(priv->dev, skb->data,
0234                         skb->len, DMA_TO_DEVICE);
0235         if (dma_mapping_error(priv->dev, tx_buf_dma)) {
0236             dev_kfree_skb(skb);
0237             netdev->stats.tx_dropped++;
0238             return NETDEV_TX_OK;
0239         }
0240     }
0241 
0242     /* Get address of TX WQE */
0243     tx_wqe_addr = priv->tx_wqe_next;
0244 
0245     mlxbf_gige_update_tx_wqe_next(priv);
0246 
0247     /* Put PA of buffer address into first 64-bit word of TX WQE */
0248     *tx_wqe_addr = tx_buf_dma;
0249 
0250     /* Set TX WQE pkt_len appropriately
0251      * NOTE: GigE silicon will automatically pad up to
0252      *       minimum packet length if needed.
0253      */
0254     word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK;
0255 
0256     /* Write entire 2nd word of TX WQE */
0257     *(tx_wqe_addr + 1) = word2;
0258 
0259     spin_lock_irqsave(&priv->lock, flags);
0260     priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb;
0261     priv->tx_pi++;
0262     spin_unlock_irqrestore(&priv->lock, flags);
0263 
0264     if (!netdev_xmit_more()) {
0265         /* Create memory barrier before write to TX PI */
0266         wmb();
0267         writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX);
0268     }
0269 
0270     /* Check if the last TX entry was just used */
0271     if (!mlxbf_gige_tx_buffs_avail(priv)) {
0272         /* TX ring is full, inform stack */
0273         netif_stop_queue(netdev);
0274 
0275         /* Since there is no separate "TX complete" interrupt, need
0276          * to explicitly schedule NAPI poll.  This will trigger logic
0277          * which processes TX completions, and will hopefully drain
0278          * the TX ring allowing the TX queue to be awakened.
0279          */
0280         napi_schedule(&priv->napi);
0281     }
0282 
0283     return NETDEV_TX_OK;
0284 }