Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c)  2018 Intel Corporation */
0003 
0004 #include <linux/module.h>
0005 #include <linux/types.h>
0006 #include <linux/if_vlan.h>
0007 #include <linux/aer.h>
0008 #include <linux/tcp.h>
0009 #include <linux/udp.h>
0010 #include <linux/ip.h>
0011 #include <linux/pm_runtime.h>
0012 #include <net/pkt_sched.h>
0013 #include <linux/bpf_trace.h>
0014 #include <net/xdp_sock_drv.h>
0015 #include <linux/pci.h>
0016 
0017 #include <net/ipv6.h>
0018 
0019 #include "igc.h"
0020 #include "igc_hw.h"
0021 #include "igc_tsn.h"
0022 #include "igc_xdp.h"
0023 
0024 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver"
0025 
0026 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
0027 
0028 #define IGC_XDP_PASS        0
0029 #define IGC_XDP_CONSUMED    BIT(0)
0030 #define IGC_XDP_TX      BIT(1)
0031 #define IGC_XDP_REDIRECT    BIT(2)
0032 
0033 static int debug = -1;
0034 
0035 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
0036 MODULE_DESCRIPTION(DRV_SUMMARY);
0037 MODULE_LICENSE("GPL v2");
0038 module_param(debug, int, 0);
0039 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
0040 
0041 char igc_driver_name[] = "igc";
0042 static const char igc_driver_string[] = DRV_SUMMARY;
0043 static const char igc_copyright[] =
0044     "Copyright(c) 2018 Intel Corporation.";
0045 
0046 static const struct igc_info *igc_info_tbl[] = {
0047     [board_base] = &igc_base_info,
0048 };
0049 
0050 static const struct pci_device_id igc_pci_tbl[] = {
0051     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
0052     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
0053     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
0054     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
0055     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
0056     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
0057     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
0058     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
0059     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
0060     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
0061     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
0062     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
0063     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
0064     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
0065     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
0066     { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
0067     /* required last entry */
0068     {0, }
0069 };
0070 
0071 MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
0072 
0073 enum latency_range {
0074     lowest_latency = 0,
0075     low_latency = 1,
0076     bulk_latency = 2,
0077     latency_invalid = 255
0078 };
0079 
0080 void igc_reset(struct igc_adapter *adapter)
0081 {
0082     struct net_device *dev = adapter->netdev;
0083     struct igc_hw *hw = &adapter->hw;
0084     struct igc_fc_info *fc = &hw->fc;
0085     u32 pba, hwm;
0086 
0087     /* Repartition PBA for greater than 9k MTU if required */
0088     pba = IGC_PBA_34K;
0089 
0090     /* flow control settings
0091      * The high water mark must be low enough to fit one full frame
0092      * after transmitting the pause frame.  As such we must have enough
0093      * space to allow for us to complete our current transmit and then
0094      * receive the frame that is in progress from the link partner.
0095      * Set it to:
0096      * - the full Rx FIFO size minus one full Tx plus one full Rx frame
0097      */
0098     hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
0099 
0100     fc->high_water = hwm & 0xFFFFFFF0;  /* 16-byte granularity */
0101     fc->low_water = fc->high_water - 16;
0102     fc->pause_time = 0xFFFF;
0103     fc->send_xon = 1;
0104     fc->current_mode = fc->requested_mode;
0105 
0106     hw->mac.ops.reset_hw(hw);
0107 
0108     if (hw->mac.ops.init_hw(hw))
0109         netdev_err(dev, "Error on hardware initialization\n");
0110 
0111     /* Re-establish EEE setting */
0112     igc_set_eee_i225(hw, true, true, true);
0113 
0114     if (!netif_running(adapter->netdev))
0115         igc_power_down_phy_copper_base(&adapter->hw);
0116 
0117     /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
0118     wr32(IGC_VET, ETH_P_8021Q);
0119 
0120     /* Re-enable PTP, where applicable. */
0121     igc_ptp_reset(adapter);
0122 
0123     /* Re-enable TSN offloading, where applicable. */
0124     igc_tsn_reset(adapter);
0125 
0126     igc_get_phy_info(hw);
0127 }
0128 
0129 /**
0130  * igc_power_up_link - Power up the phy link
0131  * @adapter: address of board private structure
0132  */
0133 static void igc_power_up_link(struct igc_adapter *adapter)
0134 {
0135     igc_reset_phy(&adapter->hw);
0136 
0137     igc_power_up_phy_copper(&adapter->hw);
0138 
0139     igc_setup_link(&adapter->hw);
0140 }
0141 
0142 /**
0143  * igc_release_hw_control - release control of the h/w to f/w
0144  * @adapter: address of board private structure
0145  *
0146  * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
0147  * For ASF and Pass Through versions of f/w this means that the
0148  * driver is no longer loaded.
0149  */
0150 static void igc_release_hw_control(struct igc_adapter *adapter)
0151 {
0152     struct igc_hw *hw = &adapter->hw;
0153     u32 ctrl_ext;
0154 
0155     if (!pci_device_is_present(adapter->pdev))
0156         return;
0157 
0158     /* Let firmware take over control of h/w */
0159     ctrl_ext = rd32(IGC_CTRL_EXT);
0160     wr32(IGC_CTRL_EXT,
0161          ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
0162 }
0163 
0164 /**
0165  * igc_get_hw_control - get control of the h/w from f/w
0166  * @adapter: address of board private structure
0167  *
0168  * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
0169  * For ASF and Pass Through versions of f/w this means that
0170  * the driver is loaded.
0171  */
0172 static void igc_get_hw_control(struct igc_adapter *adapter)
0173 {
0174     struct igc_hw *hw = &adapter->hw;
0175     u32 ctrl_ext;
0176 
0177     /* Let firmware know the driver has taken over */
0178     ctrl_ext = rd32(IGC_CTRL_EXT);
0179     wr32(IGC_CTRL_EXT,
0180          ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
0181 }
0182 
0183 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
0184 {
0185     dma_unmap_single(dev, dma_unmap_addr(buf, dma),
0186              dma_unmap_len(buf, len), DMA_TO_DEVICE);
0187 
0188     dma_unmap_len_set(buf, len, 0);
0189 }
0190 
0191 /**
0192  * igc_clean_tx_ring - Free Tx Buffers
0193  * @tx_ring: ring to be cleaned
0194  */
0195 static void igc_clean_tx_ring(struct igc_ring *tx_ring)
0196 {
0197     u16 i = tx_ring->next_to_clean;
0198     struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
0199     u32 xsk_frames = 0;
0200 
0201     while (i != tx_ring->next_to_use) {
0202         union igc_adv_tx_desc *eop_desc, *tx_desc;
0203 
0204         switch (tx_buffer->type) {
0205         case IGC_TX_BUFFER_TYPE_XSK:
0206             xsk_frames++;
0207             break;
0208         case IGC_TX_BUFFER_TYPE_XDP:
0209             xdp_return_frame(tx_buffer->xdpf);
0210             igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
0211             break;
0212         case IGC_TX_BUFFER_TYPE_SKB:
0213             dev_kfree_skb_any(tx_buffer->skb);
0214             igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
0215             break;
0216         default:
0217             netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
0218             break;
0219         }
0220 
0221         /* check for eop_desc to determine the end of the packet */
0222         eop_desc = tx_buffer->next_to_watch;
0223         tx_desc = IGC_TX_DESC(tx_ring, i);
0224 
0225         /* unmap remaining buffers */
0226         while (tx_desc != eop_desc) {
0227             tx_buffer++;
0228             tx_desc++;
0229             i++;
0230             if (unlikely(i == tx_ring->count)) {
0231                 i = 0;
0232                 tx_buffer = tx_ring->tx_buffer_info;
0233                 tx_desc = IGC_TX_DESC(tx_ring, 0);
0234             }
0235 
0236             /* unmap any remaining paged data */
0237             if (dma_unmap_len(tx_buffer, len))
0238                 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
0239         }
0240 
0241         tx_buffer->next_to_watch = NULL;
0242 
0243         /* move us one more past the eop_desc for start of next pkt */
0244         tx_buffer++;
0245         i++;
0246         if (unlikely(i == tx_ring->count)) {
0247             i = 0;
0248             tx_buffer = tx_ring->tx_buffer_info;
0249         }
0250     }
0251 
0252     if (tx_ring->xsk_pool && xsk_frames)
0253         xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
0254 
0255     /* reset BQL for queue */
0256     netdev_tx_reset_queue(txring_txq(tx_ring));
0257 
0258     /* reset next_to_use and next_to_clean */
0259     tx_ring->next_to_use = 0;
0260     tx_ring->next_to_clean = 0;
0261 }
0262 
0263 /**
0264  * igc_free_tx_resources - Free Tx Resources per Queue
0265  * @tx_ring: Tx descriptor ring for a specific queue
0266  *
0267  * Free all transmit software resources
0268  */
0269 void igc_free_tx_resources(struct igc_ring *tx_ring)
0270 {
0271     igc_clean_tx_ring(tx_ring);
0272 
0273     vfree(tx_ring->tx_buffer_info);
0274     tx_ring->tx_buffer_info = NULL;
0275 
0276     /* if not set, then don't free */
0277     if (!tx_ring->desc)
0278         return;
0279 
0280     dma_free_coherent(tx_ring->dev, tx_ring->size,
0281               tx_ring->desc, tx_ring->dma);
0282 
0283     tx_ring->desc = NULL;
0284 }
0285 
0286 /**
0287  * igc_free_all_tx_resources - Free Tx Resources for All Queues
0288  * @adapter: board private structure
0289  *
0290  * Free all transmit software resources
0291  */
0292 static void igc_free_all_tx_resources(struct igc_adapter *adapter)
0293 {
0294     int i;
0295 
0296     for (i = 0; i < adapter->num_tx_queues; i++)
0297         igc_free_tx_resources(adapter->tx_ring[i]);
0298 }
0299 
0300 /**
0301  * igc_clean_all_tx_rings - Free Tx Buffers for all queues
0302  * @adapter: board private structure
0303  */
0304 static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
0305 {
0306     int i;
0307 
0308     for (i = 0; i < adapter->num_tx_queues; i++)
0309         if (adapter->tx_ring[i])
0310             igc_clean_tx_ring(adapter->tx_ring[i]);
0311 }
0312 
0313 /**
0314  * igc_setup_tx_resources - allocate Tx resources (Descriptors)
0315  * @tx_ring: tx descriptor ring (for a specific queue) to setup
0316  *
0317  * Return 0 on success, negative on failure
0318  */
0319 int igc_setup_tx_resources(struct igc_ring *tx_ring)
0320 {
0321     struct net_device *ndev = tx_ring->netdev;
0322     struct device *dev = tx_ring->dev;
0323     int size = 0;
0324 
0325     size = sizeof(struct igc_tx_buffer) * tx_ring->count;
0326     tx_ring->tx_buffer_info = vzalloc(size);
0327     if (!tx_ring->tx_buffer_info)
0328         goto err;
0329 
0330     /* round up to nearest 4K */
0331     tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
0332     tx_ring->size = ALIGN(tx_ring->size, 4096);
0333 
0334     tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
0335                        &tx_ring->dma, GFP_KERNEL);
0336 
0337     if (!tx_ring->desc)
0338         goto err;
0339 
0340     tx_ring->next_to_use = 0;
0341     tx_ring->next_to_clean = 0;
0342 
0343     return 0;
0344 
0345 err:
0346     vfree(tx_ring->tx_buffer_info);
0347     netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
0348     return -ENOMEM;
0349 }
0350 
0351 /**
0352  * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
0353  * @adapter: board private structure
0354  *
0355  * Return 0 on success, negative on failure
0356  */
0357 static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
0358 {
0359     struct net_device *dev = adapter->netdev;
0360     int i, err = 0;
0361 
0362     for (i = 0; i < adapter->num_tx_queues; i++) {
0363         err = igc_setup_tx_resources(adapter->tx_ring[i]);
0364         if (err) {
0365             netdev_err(dev, "Error on Tx queue %u setup\n", i);
0366             for (i--; i >= 0; i--)
0367                 igc_free_tx_resources(adapter->tx_ring[i]);
0368             break;
0369         }
0370     }
0371 
0372     return err;
0373 }
0374 
0375 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
0376 {
0377     u16 i = rx_ring->next_to_clean;
0378 
0379     dev_kfree_skb(rx_ring->skb);
0380     rx_ring->skb = NULL;
0381 
0382     /* Free all the Rx ring sk_buffs */
0383     while (i != rx_ring->next_to_alloc) {
0384         struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
0385 
0386         /* Invalidate cache lines that may have been written to by
0387          * device so that we avoid corrupting memory.
0388          */
0389         dma_sync_single_range_for_cpu(rx_ring->dev,
0390                           buffer_info->dma,
0391                           buffer_info->page_offset,
0392                           igc_rx_bufsz(rx_ring),
0393                           DMA_FROM_DEVICE);
0394 
0395         /* free resources associated with mapping */
0396         dma_unmap_page_attrs(rx_ring->dev,
0397                      buffer_info->dma,
0398                      igc_rx_pg_size(rx_ring),
0399                      DMA_FROM_DEVICE,
0400                      IGC_RX_DMA_ATTR);
0401         __page_frag_cache_drain(buffer_info->page,
0402                     buffer_info->pagecnt_bias);
0403 
0404         i++;
0405         if (i == rx_ring->count)
0406             i = 0;
0407     }
0408 }
0409 
0410 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
0411 {
0412     struct igc_rx_buffer *bi;
0413     u16 i;
0414 
0415     for (i = 0; i < ring->count; i++) {
0416         bi = &ring->rx_buffer_info[i];
0417         if (!bi->xdp)
0418             continue;
0419 
0420         xsk_buff_free(bi->xdp);
0421         bi->xdp = NULL;
0422     }
0423 }
0424 
0425 /**
0426  * igc_clean_rx_ring - Free Rx Buffers per Queue
0427  * @ring: ring to free buffers from
0428  */
0429 static void igc_clean_rx_ring(struct igc_ring *ring)
0430 {
0431     if (ring->xsk_pool)
0432         igc_clean_rx_ring_xsk_pool(ring);
0433     else
0434         igc_clean_rx_ring_page_shared(ring);
0435 
0436     clear_ring_uses_large_buffer(ring);
0437 
0438     ring->next_to_alloc = 0;
0439     ring->next_to_clean = 0;
0440     ring->next_to_use = 0;
0441 }
0442 
0443 /**
0444  * igc_clean_all_rx_rings - Free Rx Buffers for all queues
0445  * @adapter: board private structure
0446  */
0447 static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
0448 {
0449     int i;
0450 
0451     for (i = 0; i < adapter->num_rx_queues; i++)
0452         if (adapter->rx_ring[i])
0453             igc_clean_rx_ring(adapter->rx_ring[i]);
0454 }
0455 
0456 /**
0457  * igc_free_rx_resources - Free Rx Resources
0458  * @rx_ring: ring to clean the resources from
0459  *
0460  * Free all receive software resources
0461  */
0462 void igc_free_rx_resources(struct igc_ring *rx_ring)
0463 {
0464     igc_clean_rx_ring(rx_ring);
0465 
0466     xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
0467 
0468     vfree(rx_ring->rx_buffer_info);
0469     rx_ring->rx_buffer_info = NULL;
0470 
0471     /* if not set, then don't free */
0472     if (!rx_ring->desc)
0473         return;
0474 
0475     dma_free_coherent(rx_ring->dev, rx_ring->size,
0476               rx_ring->desc, rx_ring->dma);
0477 
0478     rx_ring->desc = NULL;
0479 }
0480 
0481 /**
0482  * igc_free_all_rx_resources - Free Rx Resources for All Queues
0483  * @adapter: board private structure
0484  *
0485  * Free all receive software resources
0486  */
0487 static void igc_free_all_rx_resources(struct igc_adapter *adapter)
0488 {
0489     int i;
0490 
0491     for (i = 0; i < adapter->num_rx_queues; i++)
0492         igc_free_rx_resources(adapter->rx_ring[i]);
0493 }
0494 
0495 /**
0496  * igc_setup_rx_resources - allocate Rx resources (Descriptors)
0497  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
0498  *
0499  * Returns 0 on success, negative on failure
0500  */
0501 int igc_setup_rx_resources(struct igc_ring *rx_ring)
0502 {
0503     struct net_device *ndev = rx_ring->netdev;
0504     struct device *dev = rx_ring->dev;
0505     u8 index = rx_ring->queue_index;
0506     int size, desc_len, res;
0507 
0508     /* XDP RX-queue info */
0509     if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
0510         xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
0511     res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
0512                    rx_ring->q_vector->napi.napi_id);
0513     if (res < 0) {
0514         netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
0515                index);
0516         return res;
0517     }
0518 
0519     size = sizeof(struct igc_rx_buffer) * rx_ring->count;
0520     rx_ring->rx_buffer_info = vzalloc(size);
0521     if (!rx_ring->rx_buffer_info)
0522         goto err;
0523 
0524     desc_len = sizeof(union igc_adv_rx_desc);
0525 
0526     /* Round up to nearest 4K */
0527     rx_ring->size = rx_ring->count * desc_len;
0528     rx_ring->size = ALIGN(rx_ring->size, 4096);
0529 
0530     rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
0531                        &rx_ring->dma, GFP_KERNEL);
0532 
0533     if (!rx_ring->desc)
0534         goto err;
0535 
0536     rx_ring->next_to_alloc = 0;
0537     rx_ring->next_to_clean = 0;
0538     rx_ring->next_to_use = 0;
0539 
0540     return 0;
0541 
0542 err:
0543     xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
0544     vfree(rx_ring->rx_buffer_info);
0545     rx_ring->rx_buffer_info = NULL;
0546     netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
0547     return -ENOMEM;
0548 }
0549 
0550 /**
0551  * igc_setup_all_rx_resources - wrapper to allocate Rx resources
0552  *                                (Descriptors) for all queues
0553  * @adapter: board private structure
0554  *
0555  * Return 0 on success, negative on failure
0556  */
0557 static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
0558 {
0559     struct net_device *dev = adapter->netdev;
0560     int i, err = 0;
0561 
0562     for (i = 0; i < adapter->num_rx_queues; i++) {
0563         err = igc_setup_rx_resources(adapter->rx_ring[i]);
0564         if (err) {
0565             netdev_err(dev, "Error on Rx queue %u setup\n", i);
0566             for (i--; i >= 0; i--)
0567                 igc_free_rx_resources(adapter->rx_ring[i]);
0568             break;
0569         }
0570     }
0571 
0572     return err;
0573 }
0574 
0575 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
0576                           struct igc_ring *ring)
0577 {
0578     if (!igc_xdp_is_enabled(adapter) ||
0579         !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
0580         return NULL;
0581 
0582     return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
0583 }
0584 
0585 /**
0586  * igc_configure_rx_ring - Configure a receive ring after Reset
0587  * @adapter: board private structure
0588  * @ring: receive ring to be configured
0589  *
0590  * Configure the Rx unit of the MAC after a reset.
0591  */
0592 static void igc_configure_rx_ring(struct igc_adapter *adapter,
0593                   struct igc_ring *ring)
0594 {
0595     struct igc_hw *hw = &adapter->hw;
0596     union igc_adv_rx_desc *rx_desc;
0597     int reg_idx = ring->reg_idx;
0598     u32 srrctl = 0, rxdctl = 0;
0599     u64 rdba = ring->dma;
0600     u32 buf_size;
0601 
0602     xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
0603     ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
0604     if (ring->xsk_pool) {
0605         WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
0606                            MEM_TYPE_XSK_BUFF_POOL,
0607                            NULL));
0608         xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
0609     } else {
0610         WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
0611                            MEM_TYPE_PAGE_SHARED,
0612                            NULL));
0613     }
0614 
0615     if (igc_xdp_is_enabled(adapter))
0616         set_ring_uses_large_buffer(ring);
0617 
0618     /* disable the queue */
0619     wr32(IGC_RXDCTL(reg_idx), 0);
0620 
0621     /* Set DMA base address registers */
0622     wr32(IGC_RDBAL(reg_idx),
0623          rdba & 0x00000000ffffffffULL);
0624     wr32(IGC_RDBAH(reg_idx), rdba >> 32);
0625     wr32(IGC_RDLEN(reg_idx),
0626          ring->count * sizeof(union igc_adv_rx_desc));
0627 
0628     /* initialize head and tail */
0629     ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
0630     wr32(IGC_RDH(reg_idx), 0);
0631     writel(0, ring->tail);
0632 
0633     /* reset next-to- use/clean to place SW in sync with hardware */
0634     ring->next_to_clean = 0;
0635     ring->next_to_use = 0;
0636 
0637     if (ring->xsk_pool)
0638         buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
0639     else if (ring_uses_large_buffer(ring))
0640         buf_size = IGC_RXBUFFER_3072;
0641     else
0642         buf_size = IGC_RXBUFFER_2048;
0643 
0644     srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
0645     srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
0646     srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
0647 
0648     wr32(IGC_SRRCTL(reg_idx), srrctl);
0649 
0650     rxdctl |= IGC_RX_PTHRESH;
0651     rxdctl |= IGC_RX_HTHRESH << 8;
0652     rxdctl |= IGC_RX_WTHRESH << 16;
0653 
0654     /* initialize rx_buffer_info */
0655     memset(ring->rx_buffer_info, 0,
0656            sizeof(struct igc_rx_buffer) * ring->count);
0657 
0658     /* initialize Rx descriptor 0 */
0659     rx_desc = IGC_RX_DESC(ring, 0);
0660     rx_desc->wb.upper.length = 0;
0661 
0662     /* enable receive descriptor fetching */
0663     rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
0664 
0665     wr32(IGC_RXDCTL(reg_idx), rxdctl);
0666 }
0667 
0668 /**
0669  * igc_configure_rx - Configure receive Unit after Reset
0670  * @adapter: board private structure
0671  *
0672  * Configure the Rx unit of the MAC after a reset.
0673  */
0674 static void igc_configure_rx(struct igc_adapter *adapter)
0675 {
0676     int i;
0677 
0678     /* Setup the HW Rx Head and Tail Descriptor Pointers and
0679      * the Base and Length of the Rx Descriptor Ring
0680      */
0681     for (i = 0; i < adapter->num_rx_queues; i++)
0682         igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
0683 }
0684 
0685 /**
0686  * igc_configure_tx_ring - Configure transmit ring after Reset
0687  * @adapter: board private structure
0688  * @ring: tx ring to configure
0689  *
0690  * Configure a transmit ring after a reset.
0691  */
0692 static void igc_configure_tx_ring(struct igc_adapter *adapter,
0693                   struct igc_ring *ring)
0694 {
0695     struct igc_hw *hw = &adapter->hw;
0696     int reg_idx = ring->reg_idx;
0697     u64 tdba = ring->dma;
0698     u32 txdctl = 0;
0699 
0700     ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
0701 
0702     /* disable the queue */
0703     wr32(IGC_TXDCTL(reg_idx), 0);
0704     wrfl();
0705     mdelay(10);
0706 
0707     wr32(IGC_TDLEN(reg_idx),
0708          ring->count * sizeof(union igc_adv_tx_desc));
0709     wr32(IGC_TDBAL(reg_idx),
0710          tdba & 0x00000000ffffffffULL);
0711     wr32(IGC_TDBAH(reg_idx), tdba >> 32);
0712 
0713     ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
0714     wr32(IGC_TDH(reg_idx), 0);
0715     writel(0, ring->tail);
0716 
0717     txdctl |= IGC_TX_PTHRESH;
0718     txdctl |= IGC_TX_HTHRESH << 8;
0719     txdctl |= IGC_TX_WTHRESH << 16;
0720 
0721     txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
0722     wr32(IGC_TXDCTL(reg_idx), txdctl);
0723 }
0724 
0725 /**
0726  * igc_configure_tx - Configure transmit Unit after Reset
0727  * @adapter: board private structure
0728  *
0729  * Configure the Tx unit of the MAC after a reset.
0730  */
0731 static void igc_configure_tx(struct igc_adapter *adapter)
0732 {
0733     int i;
0734 
0735     for (i = 0; i < adapter->num_tx_queues; i++)
0736         igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
0737 }
0738 
0739 /**
0740  * igc_setup_mrqc - configure the multiple receive queue control registers
0741  * @adapter: Board private structure
0742  */
0743 static void igc_setup_mrqc(struct igc_adapter *adapter)
0744 {
0745     struct igc_hw *hw = &adapter->hw;
0746     u32 j, num_rx_queues;
0747     u32 mrqc, rxcsum;
0748     u32 rss_key[10];
0749 
0750     netdev_rss_key_fill(rss_key, sizeof(rss_key));
0751     for (j = 0; j < 10; j++)
0752         wr32(IGC_RSSRK(j), rss_key[j]);
0753 
0754     num_rx_queues = adapter->rss_queues;
0755 
0756     if (adapter->rss_indir_tbl_init != num_rx_queues) {
0757         for (j = 0; j < IGC_RETA_SIZE; j++)
0758             adapter->rss_indir_tbl[j] =
0759             (j * num_rx_queues) / IGC_RETA_SIZE;
0760         adapter->rss_indir_tbl_init = num_rx_queues;
0761     }
0762     igc_write_rss_indir_tbl(adapter);
0763 
0764     /* Disable raw packet checksumming so that RSS hash is placed in
0765      * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
0766      * offloads as they are enabled by default
0767      */
0768     rxcsum = rd32(IGC_RXCSUM);
0769     rxcsum |= IGC_RXCSUM_PCSD;
0770 
0771     /* Enable Receive Checksum Offload for SCTP */
0772     rxcsum |= IGC_RXCSUM_CRCOFL;
0773 
0774     /* Don't need to set TUOFL or IPOFL, they default to 1 */
0775     wr32(IGC_RXCSUM, rxcsum);
0776 
0777     /* Generate RSS hash based on packet types, TCP/UDP
0778      * port numbers and/or IPv4/v6 src and dst addresses
0779      */
0780     mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
0781            IGC_MRQC_RSS_FIELD_IPV4_TCP |
0782            IGC_MRQC_RSS_FIELD_IPV6 |
0783            IGC_MRQC_RSS_FIELD_IPV6_TCP |
0784            IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
0785 
0786     if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
0787         mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
0788     if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
0789         mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
0790 
0791     mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
0792 
0793     wr32(IGC_MRQC, mrqc);
0794 }
0795 
0796 /**
0797  * igc_setup_rctl - configure the receive control registers
0798  * @adapter: Board private structure
0799  */
0800 static void igc_setup_rctl(struct igc_adapter *adapter)
0801 {
0802     struct igc_hw *hw = &adapter->hw;
0803     u32 rctl;
0804 
0805     rctl = rd32(IGC_RCTL);
0806 
0807     rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
0808     rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
0809 
0810     rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
0811         (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
0812 
0813     /* enable stripping of CRC. Newer features require
0814      * that the HW strips the CRC.
0815      */
0816     rctl |= IGC_RCTL_SECRC;
0817 
0818     /* disable store bad packets and clear size bits. */
0819     rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
0820 
0821     /* enable LPE to allow for reception of jumbo frames */
0822     rctl |= IGC_RCTL_LPE;
0823 
0824     /* disable queue 0 to prevent tail write w/o re-config */
0825     wr32(IGC_RXDCTL(0), 0);
0826 
0827     /* This is useful for sniffing bad packets. */
0828     if (adapter->netdev->features & NETIF_F_RXALL) {
0829         /* UPE and MPE will be handled by normal PROMISC logic
0830          * in set_rx_mode
0831          */
0832         rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
0833              IGC_RCTL_BAM | /* RX All Bcast Pkts */
0834              IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
0835 
0836         rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
0837               IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
0838     }
0839 
0840     wr32(IGC_RCTL, rctl);
0841 }
0842 
0843 /**
0844  * igc_setup_tctl - configure the transmit control registers
0845  * @adapter: Board private structure
0846  */
0847 static void igc_setup_tctl(struct igc_adapter *adapter)
0848 {
0849     struct igc_hw *hw = &adapter->hw;
0850     u32 tctl;
0851 
0852     /* disable queue 0 which icould be enabled by default */
0853     wr32(IGC_TXDCTL(0), 0);
0854 
0855     /* Program the Transmit Control Register */
0856     tctl = rd32(IGC_TCTL);
0857     tctl &= ~IGC_TCTL_CT;
0858     tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
0859         (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
0860 
0861     /* Enable transmits */
0862     tctl |= IGC_TCTL_EN;
0863 
0864     wr32(IGC_TCTL, tctl);
0865 }
0866 
0867 /**
0868  * igc_set_mac_filter_hw() - Set MAC address filter in hardware
0869  * @adapter: Pointer to adapter where the filter should be set
0870  * @index: Filter index
0871  * @type: MAC address filter type (source or destination)
0872  * @addr: MAC address
0873  * @queue: If non-negative, queue assignment feature is enabled and frames
0874  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
0875  *         assignment is disabled.
0876  */
0877 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
0878                   enum igc_mac_filter_type type,
0879                   const u8 *addr, int queue)
0880 {
0881     struct net_device *dev = adapter->netdev;
0882     struct igc_hw *hw = &adapter->hw;
0883     u32 ral, rah;
0884 
0885     if (WARN_ON(index >= hw->mac.rar_entry_count))
0886         return;
0887 
0888     ral = le32_to_cpup((__le32 *)(addr));
0889     rah = le16_to_cpup((__le16 *)(addr + 4));
0890 
0891     if (type == IGC_MAC_FILTER_TYPE_SRC) {
0892         rah &= ~IGC_RAH_ASEL_MASK;
0893         rah |= IGC_RAH_ASEL_SRC_ADDR;
0894     }
0895 
0896     if (queue >= 0) {
0897         rah &= ~IGC_RAH_QSEL_MASK;
0898         rah |= (queue << IGC_RAH_QSEL_SHIFT);
0899         rah |= IGC_RAH_QSEL_ENABLE;
0900     }
0901 
0902     rah |= IGC_RAH_AV;
0903 
0904     wr32(IGC_RAL(index), ral);
0905     wr32(IGC_RAH(index), rah);
0906 
0907     netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
0908 }
0909 
0910 /**
0911  * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
0912  * @adapter: Pointer to adapter where the filter should be cleared
0913  * @index: Filter index
0914  */
0915 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
0916 {
0917     struct net_device *dev = adapter->netdev;
0918     struct igc_hw *hw = &adapter->hw;
0919 
0920     if (WARN_ON(index >= hw->mac.rar_entry_count))
0921         return;
0922 
0923     wr32(IGC_RAL(index), 0);
0924     wr32(IGC_RAH(index), 0);
0925 
0926     netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
0927 }
0928 
0929 /* Set default MAC address for the PF in the first RAR entry */
0930 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
0931 {
0932     struct net_device *dev = adapter->netdev;
0933     u8 *addr = adapter->hw.mac.addr;
0934 
0935     netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
0936 
0937     igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
0938 }
0939 
0940 /**
0941  * igc_set_mac - Change the Ethernet Address of the NIC
0942  * @netdev: network interface device structure
0943  * @p: pointer to an address structure
0944  *
0945  * Returns 0 on success, negative on failure
0946  */
0947 static int igc_set_mac(struct net_device *netdev, void *p)
0948 {
0949     struct igc_adapter *adapter = netdev_priv(netdev);
0950     struct igc_hw *hw = &adapter->hw;
0951     struct sockaddr *addr = p;
0952 
0953     if (!is_valid_ether_addr(addr->sa_data))
0954         return -EADDRNOTAVAIL;
0955 
0956     eth_hw_addr_set(netdev, addr->sa_data);
0957     memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
0958 
0959     /* set the correct pool for the new PF MAC address in entry 0 */
0960     igc_set_default_mac_filter(adapter);
0961 
0962     return 0;
0963 }
0964 
0965 /**
0966  *  igc_write_mc_addr_list - write multicast addresses to MTA
0967  *  @netdev: network interface device structure
0968  *
0969  *  Writes multicast address list to the MTA hash table.
0970  *  Returns: -ENOMEM on failure
0971  *           0 on no addresses written
0972  *           X on writing X addresses to MTA
0973  **/
0974 static int igc_write_mc_addr_list(struct net_device *netdev)
0975 {
0976     struct igc_adapter *adapter = netdev_priv(netdev);
0977     struct igc_hw *hw = &adapter->hw;
0978     struct netdev_hw_addr *ha;
0979     u8  *mta_list;
0980     int i;
0981 
0982     if (netdev_mc_empty(netdev)) {
0983         /* nothing to program, so clear mc list */
0984         igc_update_mc_addr_list(hw, NULL, 0);
0985         return 0;
0986     }
0987 
0988     mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
0989     if (!mta_list)
0990         return -ENOMEM;
0991 
0992     /* The shared function expects a packed array of only addresses. */
0993     i = 0;
0994     netdev_for_each_mc_addr(ha, netdev)
0995         memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
0996 
0997     igc_update_mc_addr_list(hw, mta_list, i);
0998     kfree(mta_list);
0999 
1000     return netdev_mc_count(netdev);
1001 }
1002 
1003 static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
1004 {
1005     ktime_t cycle_time = adapter->cycle_time;
1006     ktime_t base_time = adapter->base_time;
1007     u32 launchtime;
1008 
1009     /* FIXME: when using ETF together with taprio, we may have a
1010      * case where 'delta' is larger than the cycle_time, this may
1011      * cause problems if we don't read the current value of
1012      * IGC_BASET, as the value writen into the launchtime
1013      * descriptor field may be misinterpreted.
1014      */
1015     div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
1016 
1017     return cpu_to_le32(launchtime);
1018 }
1019 
1020 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1021                 struct igc_tx_buffer *first,
1022                 u32 vlan_macip_lens, u32 type_tucmd,
1023                 u32 mss_l4len_idx)
1024 {
1025     struct igc_adv_tx_context_desc *context_desc;
1026     u16 i = tx_ring->next_to_use;
1027 
1028     context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1029 
1030     i++;
1031     tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1032 
1033     /* set bits to identify this as an advanced context descriptor */
1034     type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1035 
1036     /* For i225, context index must be unique per ring. */
1037     if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1038         mss_l4len_idx |= tx_ring->reg_idx << 4;
1039 
1040     context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1041     context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1042     context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
1043 
1044     /* We assume there is always a valid Tx time available. Invalid times
1045      * should have been handled by the upper layers.
1046      */
1047     if (tx_ring->launchtime_enable) {
1048         struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1049         ktime_t txtime = first->skb->tstamp;
1050 
1051         skb_txtime_consumed(first->skb);
1052         context_desc->launch_time = igc_tx_launchtime(adapter,
1053                                   txtime);
1054     } else {
1055         context_desc->launch_time = 0;
1056     }
1057 }
1058 
1059 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
1060 {
1061     struct sk_buff *skb = first->skb;
1062     u32 vlan_macip_lens = 0;
1063     u32 type_tucmd = 0;
1064 
1065     if (skb->ip_summed != CHECKSUM_PARTIAL) {
1066 csum_failed:
1067         if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1068             !tx_ring->launchtime_enable)
1069             return;
1070         goto no_csum;
1071     }
1072 
1073     switch (skb->csum_offset) {
1074     case offsetof(struct tcphdr, check):
1075         type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1076         fallthrough;
1077     case offsetof(struct udphdr, check):
1078         break;
1079     case offsetof(struct sctphdr, checksum):
1080         /* validate that this is actually an SCTP request */
1081         if (skb_csum_is_sctp(skb)) {
1082             type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1083             break;
1084         }
1085         fallthrough;
1086     default:
1087         skb_checksum_help(skb);
1088         goto csum_failed;
1089     }
1090 
1091     /* update TX checksum flag */
1092     first->tx_flags |= IGC_TX_FLAGS_CSUM;
1093     vlan_macip_lens = skb_checksum_start_offset(skb) -
1094               skb_network_offset(skb);
1095 no_csum:
1096     vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1097     vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1098 
1099     igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
1100 }
1101 
1102 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1103 {
1104     struct net_device *netdev = tx_ring->netdev;
1105 
1106     netif_stop_subqueue(netdev, tx_ring->queue_index);
1107 
1108     /* memory barriier comment */
1109     smp_mb();
1110 
1111     /* We need to check again in a case another CPU has just
1112      * made room available.
1113      */
1114     if (igc_desc_unused(tx_ring) < size)
1115         return -EBUSY;
1116 
1117     /* A reprieve! */
1118     netif_wake_subqueue(netdev, tx_ring->queue_index);
1119 
1120     u64_stats_update_begin(&tx_ring->tx_syncp2);
1121     tx_ring->tx_stats.restart_queue2++;
1122     u64_stats_update_end(&tx_ring->tx_syncp2);
1123 
1124     return 0;
1125 }
1126 
1127 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1128 {
1129     if (igc_desc_unused(tx_ring) >= size)
1130         return 0;
1131     return __igc_maybe_stop_tx(tx_ring, size);
1132 }
1133 
1134 #define IGC_SET_FLAG(_input, _flag, _result) \
1135     (((_flag) <= (_result)) ?               \
1136      ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :  \
1137      ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1138 
1139 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1140 {
1141     /* set type for advanced descriptor with frame checksum insertion */
1142     u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1143                IGC_ADVTXD_DCMD_DEXT |
1144                IGC_ADVTXD_DCMD_IFCS;
1145 
1146     /* set HW vlan bit if vlan is present */
1147     cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1148                  IGC_ADVTXD_DCMD_VLE);
1149 
1150     /* set segmentation bits for TSO */
1151     cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1152                  (IGC_ADVTXD_DCMD_TSE));
1153 
1154     /* set timestamp bit if present */
1155     cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1156                  (IGC_ADVTXD_MAC_TSTAMP));
1157 
1158     /* insert frame checksum */
1159     cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1160 
1161     return cmd_type;
1162 }
1163 
1164 static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1165                  union igc_adv_tx_desc *tx_desc,
1166                  u32 tx_flags, unsigned int paylen)
1167 {
1168     u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1169 
1170     /* insert L4 checksum */
1171     olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1172               ((IGC_TXD_POPTS_TXSM << 8) /
1173               IGC_TX_FLAGS_CSUM);
1174 
1175     /* insert IPv4 checksum */
1176     olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1177               (((IGC_TXD_POPTS_IXSM << 8)) /
1178               IGC_TX_FLAGS_IPV4);
1179 
1180     tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1181 }
1182 
1183 static int igc_tx_map(struct igc_ring *tx_ring,
1184               struct igc_tx_buffer *first,
1185               const u8 hdr_len)
1186 {
1187     struct sk_buff *skb = first->skb;
1188     struct igc_tx_buffer *tx_buffer;
1189     union igc_adv_tx_desc *tx_desc;
1190     u32 tx_flags = first->tx_flags;
1191     skb_frag_t *frag;
1192     u16 i = tx_ring->next_to_use;
1193     unsigned int data_len, size;
1194     dma_addr_t dma;
1195     u32 cmd_type;
1196 
1197     cmd_type = igc_tx_cmd_type(skb, tx_flags);
1198     tx_desc = IGC_TX_DESC(tx_ring, i);
1199 
1200     igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1201 
1202     size = skb_headlen(skb);
1203     data_len = skb->data_len;
1204 
1205     dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1206 
1207     tx_buffer = first;
1208 
1209     for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1210         if (dma_mapping_error(tx_ring->dev, dma))
1211             goto dma_error;
1212 
1213         /* record length, and DMA address */
1214         dma_unmap_len_set(tx_buffer, len, size);
1215         dma_unmap_addr_set(tx_buffer, dma, dma);
1216 
1217         tx_desc->read.buffer_addr = cpu_to_le64(dma);
1218 
1219         while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1220             tx_desc->read.cmd_type_len =
1221                 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1222 
1223             i++;
1224             tx_desc++;
1225             if (i == tx_ring->count) {
1226                 tx_desc = IGC_TX_DESC(tx_ring, 0);
1227                 i = 0;
1228             }
1229             tx_desc->read.olinfo_status = 0;
1230 
1231             dma += IGC_MAX_DATA_PER_TXD;
1232             size -= IGC_MAX_DATA_PER_TXD;
1233 
1234             tx_desc->read.buffer_addr = cpu_to_le64(dma);
1235         }
1236 
1237         if (likely(!data_len))
1238             break;
1239 
1240         tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1241 
1242         i++;
1243         tx_desc++;
1244         if (i == tx_ring->count) {
1245             tx_desc = IGC_TX_DESC(tx_ring, 0);
1246             i = 0;
1247         }
1248         tx_desc->read.olinfo_status = 0;
1249 
1250         size = skb_frag_size(frag);
1251         data_len -= size;
1252 
1253         dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1254                        size, DMA_TO_DEVICE);
1255 
1256         tx_buffer = &tx_ring->tx_buffer_info[i];
1257     }
1258 
1259     /* write last descriptor with RS and EOP bits */
1260     cmd_type |= size | IGC_TXD_DCMD;
1261     tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1262 
1263     netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1264 
1265     /* set the timestamp */
1266     first->time_stamp = jiffies;
1267 
1268     skb_tx_timestamp(skb);
1269 
1270     /* Force memory writes to complete before letting h/w know there
1271      * are new descriptors to fetch.  (Only applicable for weak-ordered
1272      * memory model archs, such as IA-64).
1273      *
1274      * We also need this memory barrier to make certain all of the
1275      * status bits have been updated before next_to_watch is written.
1276      */
1277     wmb();
1278 
1279     /* set next_to_watch value indicating a packet is present */
1280     first->next_to_watch = tx_desc;
1281 
1282     i++;
1283     if (i == tx_ring->count)
1284         i = 0;
1285 
1286     tx_ring->next_to_use = i;
1287 
1288     /* Make sure there is space in the ring for the next send. */
1289     igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1290 
1291     if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1292         writel(i, tx_ring->tail);
1293     }
1294 
1295     return 0;
1296 dma_error:
1297     netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1298     tx_buffer = &tx_ring->tx_buffer_info[i];
1299 
1300     /* clear dma mappings for failed tx_buffer_info map */
1301     while (tx_buffer != first) {
1302         if (dma_unmap_len(tx_buffer, len))
1303             igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1304 
1305         if (i-- == 0)
1306             i += tx_ring->count;
1307         tx_buffer = &tx_ring->tx_buffer_info[i];
1308     }
1309 
1310     if (dma_unmap_len(tx_buffer, len))
1311         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1312 
1313     dev_kfree_skb_any(tx_buffer->skb);
1314     tx_buffer->skb = NULL;
1315 
1316     tx_ring->next_to_use = i;
1317 
1318     return -1;
1319 }
1320 
1321 static int igc_tso(struct igc_ring *tx_ring,
1322            struct igc_tx_buffer *first,
1323            u8 *hdr_len)
1324 {
1325     u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1326     struct sk_buff *skb = first->skb;
1327     union {
1328         struct iphdr *v4;
1329         struct ipv6hdr *v6;
1330         unsigned char *hdr;
1331     } ip;
1332     union {
1333         struct tcphdr *tcp;
1334         struct udphdr *udp;
1335         unsigned char *hdr;
1336     } l4;
1337     u32 paylen, l4_offset;
1338     int err;
1339 
1340     if (skb->ip_summed != CHECKSUM_PARTIAL)
1341         return 0;
1342 
1343     if (!skb_is_gso(skb))
1344         return 0;
1345 
1346     err = skb_cow_head(skb, 0);
1347     if (err < 0)
1348         return err;
1349 
1350     ip.hdr = skb_network_header(skb);
1351     l4.hdr = skb_checksum_start(skb);
1352 
1353     /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1354     type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1355 
1356     /* initialize outer IP header fields */
1357     if (ip.v4->version == 4) {
1358         unsigned char *csum_start = skb_checksum_start(skb);
1359         unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1360 
1361         /* IP header will have to cancel out any data that
1362          * is not a part of the outer IP header
1363          */
1364         ip.v4->check = csum_fold(csum_partial(trans_start,
1365                               csum_start - trans_start,
1366                               0));
1367         type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1368 
1369         ip.v4->tot_len = 0;
1370         first->tx_flags |= IGC_TX_FLAGS_TSO |
1371                    IGC_TX_FLAGS_CSUM |
1372                    IGC_TX_FLAGS_IPV4;
1373     } else {
1374         ip.v6->payload_len = 0;
1375         first->tx_flags |= IGC_TX_FLAGS_TSO |
1376                    IGC_TX_FLAGS_CSUM;
1377     }
1378 
1379     /* determine offset of inner transport header */
1380     l4_offset = l4.hdr - skb->data;
1381 
1382     /* remove payload length from inner checksum */
1383     paylen = skb->len - l4_offset;
1384     if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1385         /* compute length of segmentation header */
1386         *hdr_len = (l4.tcp->doff * 4) + l4_offset;
1387         csum_replace_by_diff(&l4.tcp->check,
1388                      (__force __wsum)htonl(paylen));
1389     } else {
1390         /* compute length of segmentation header */
1391         *hdr_len = sizeof(*l4.udp) + l4_offset;
1392         csum_replace_by_diff(&l4.udp->check,
1393                      (__force __wsum)htonl(paylen));
1394     }
1395 
1396     /* update gso size and bytecount with header size */
1397     first->gso_segs = skb_shinfo(skb)->gso_segs;
1398     first->bytecount += (first->gso_segs - 1) * *hdr_len;
1399 
1400     /* MSS L4LEN IDX */
1401     mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1402     mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1403 
1404     /* VLAN MACLEN IPLEN */
1405     vlan_macip_lens = l4.hdr - ip.hdr;
1406     vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1407     vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1408 
1409     igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
1410             type_tucmd, mss_l4len_idx);
1411 
1412     return 1;
1413 }
1414 
1415 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1416                        struct igc_ring *tx_ring)
1417 {
1418     u16 count = TXD_USE_COUNT(skb_headlen(skb));
1419     __be16 protocol = vlan_get_protocol(skb);
1420     struct igc_tx_buffer *first;
1421     u32 tx_flags = 0;
1422     unsigned short f;
1423     u8 hdr_len = 0;
1424     int tso = 0;
1425 
1426     /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1427      *  + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1428      *  + 2 desc gap to keep tail from touching head,
1429      *  + 1 desc for context descriptor,
1430      * otherwise try next time
1431      */
1432     for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1433         count += TXD_USE_COUNT(skb_frag_size(
1434                         &skb_shinfo(skb)->frags[f]));
1435 
1436     if (igc_maybe_stop_tx(tx_ring, count + 3)) {
1437         /* this is a hard error */
1438         return NETDEV_TX_BUSY;
1439     }
1440 
1441     /* record the location of the first descriptor for this packet */
1442     first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1443     first->type = IGC_TX_BUFFER_TYPE_SKB;
1444     first->skb = skb;
1445     first->bytecount = skb->len;
1446     first->gso_segs = 1;
1447 
1448     if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1449         struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1450 
1451         /* FIXME: add support for retrieving timestamps from
1452          * the other timer registers before skipping the
1453          * timestamping request.
1454          */
1455         if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
1456             !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
1457                        &adapter->state)) {
1458             skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1459             tx_flags |= IGC_TX_FLAGS_TSTAMP;
1460 
1461             adapter->ptp_tx_skb = skb_get(skb);
1462             adapter->ptp_tx_start = jiffies;
1463         } else {
1464             adapter->tx_hwtstamp_skipped++;
1465         }
1466     }
1467 
1468     if (skb_vlan_tag_present(skb)) {
1469         tx_flags |= IGC_TX_FLAGS_VLAN;
1470         tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1471     }
1472 
1473     /* record initial flags and protocol */
1474     first->tx_flags = tx_flags;
1475     first->protocol = protocol;
1476 
1477     tso = igc_tso(tx_ring, first, &hdr_len);
1478     if (tso < 0)
1479         goto out_drop;
1480     else if (!tso)
1481         igc_tx_csum(tx_ring, first);
1482 
1483     igc_tx_map(tx_ring, first, hdr_len);
1484 
1485     return NETDEV_TX_OK;
1486 
1487 out_drop:
1488     dev_kfree_skb_any(first->skb);
1489     first->skb = NULL;
1490 
1491     return NETDEV_TX_OK;
1492 }
1493 
1494 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1495                             struct sk_buff *skb)
1496 {
1497     unsigned int r_idx = skb->queue_mapping;
1498 
1499     if (r_idx >= adapter->num_tx_queues)
1500         r_idx = r_idx % adapter->num_tx_queues;
1501 
1502     return adapter->tx_ring[r_idx];
1503 }
1504 
1505 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1506                   struct net_device *netdev)
1507 {
1508     struct igc_adapter *adapter = netdev_priv(netdev);
1509 
1510     /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1511      * in order to meet this minimum size requirement.
1512      */
1513     if (skb->len < 17) {
1514         if (skb_padto(skb, 17))
1515             return NETDEV_TX_OK;
1516         skb->len = 17;
1517     }
1518 
1519     return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1520 }
1521 
1522 static void igc_rx_checksum(struct igc_ring *ring,
1523                 union igc_adv_rx_desc *rx_desc,
1524                 struct sk_buff *skb)
1525 {
1526     skb_checksum_none_assert(skb);
1527 
1528     /* Ignore Checksum bit is set */
1529     if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1530         return;
1531 
1532     /* Rx checksum disabled via ethtool */
1533     if (!(ring->netdev->features & NETIF_F_RXCSUM))
1534         return;
1535 
1536     /* TCP/UDP checksum error bit is set */
1537     if (igc_test_staterr(rx_desc,
1538                  IGC_RXDEXT_STATERR_L4E |
1539                  IGC_RXDEXT_STATERR_IPE)) {
1540         /* work around errata with sctp packets where the TCPE aka
1541          * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1542          * packets (aka let the stack check the crc32c)
1543          */
1544         if (!(skb->len == 60 &&
1545               test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1546             u64_stats_update_begin(&ring->rx_syncp);
1547             ring->rx_stats.csum_err++;
1548             u64_stats_update_end(&ring->rx_syncp);
1549         }
1550         /* let the stack verify checksum errors */
1551         return;
1552     }
1553     /* It must be a TCP or UDP packet with a valid checksum */
1554     if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1555                       IGC_RXD_STAT_UDPCS))
1556         skb->ip_summed = CHECKSUM_UNNECESSARY;
1557 
1558     netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1559            le32_to_cpu(rx_desc->wb.upper.status_error));
1560 }
1561 
1562 static inline void igc_rx_hash(struct igc_ring *ring,
1563                    union igc_adv_rx_desc *rx_desc,
1564                    struct sk_buff *skb)
1565 {
1566     if (ring->netdev->features & NETIF_F_RXHASH)
1567         skb_set_hash(skb,
1568                  le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1569                  PKT_HASH_TYPE_L3);
1570 }
1571 
1572 static void igc_rx_vlan(struct igc_ring *rx_ring,
1573             union igc_adv_rx_desc *rx_desc,
1574             struct sk_buff *skb)
1575 {
1576     struct net_device *dev = rx_ring->netdev;
1577     u16 vid;
1578 
1579     if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1580         igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1581         if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1582             test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1583             vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1584         else
1585             vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1586 
1587         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1588     }
1589 }
1590 
1591 /**
1592  * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1593  * @rx_ring: rx descriptor ring packet is being transacted on
1594  * @rx_desc: pointer to the EOP Rx descriptor
1595  * @skb: pointer to current skb being populated
1596  *
1597  * This function checks the ring, descriptor, and packet information in order
1598  * to populate the hash, checksum, VLAN, protocol, and other fields within the
1599  * skb.
1600  */
1601 static void igc_process_skb_fields(struct igc_ring *rx_ring,
1602                    union igc_adv_rx_desc *rx_desc,
1603                    struct sk_buff *skb)
1604 {
1605     igc_rx_hash(rx_ring, rx_desc, skb);
1606 
1607     igc_rx_checksum(rx_ring, rx_desc, skb);
1608 
1609     igc_rx_vlan(rx_ring, rx_desc, skb);
1610 
1611     skb_record_rx_queue(skb, rx_ring->queue_index);
1612 
1613     skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1614 }
1615 
1616 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1617 {
1618     bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1619     struct igc_adapter *adapter = netdev_priv(netdev);
1620     struct igc_hw *hw = &adapter->hw;
1621     u32 ctrl;
1622 
1623     ctrl = rd32(IGC_CTRL);
1624 
1625     if (enable) {
1626         /* enable VLAN tag insert/strip */
1627         ctrl |= IGC_CTRL_VME;
1628     } else {
1629         /* disable VLAN tag insert/strip */
1630         ctrl &= ~IGC_CTRL_VME;
1631     }
1632     wr32(IGC_CTRL, ctrl);
1633 }
1634 
1635 static void igc_restore_vlan(struct igc_adapter *adapter)
1636 {
1637     igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1638 }
1639 
1640 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1641                            const unsigned int size,
1642                            int *rx_buffer_pgcnt)
1643 {
1644     struct igc_rx_buffer *rx_buffer;
1645 
1646     rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1647     *rx_buffer_pgcnt =
1648 #if (PAGE_SIZE < 8192)
1649         page_count(rx_buffer->page);
1650 #else
1651         0;
1652 #endif
1653     prefetchw(rx_buffer->page);
1654 
1655     /* we are reusing so sync this buffer for CPU use */
1656     dma_sync_single_range_for_cpu(rx_ring->dev,
1657                       rx_buffer->dma,
1658                       rx_buffer->page_offset,
1659                       size,
1660                       DMA_FROM_DEVICE);
1661 
1662     rx_buffer->pagecnt_bias--;
1663 
1664     return rx_buffer;
1665 }
1666 
1667 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1668                    unsigned int truesize)
1669 {
1670 #if (PAGE_SIZE < 8192)
1671     buffer->page_offset ^= truesize;
1672 #else
1673     buffer->page_offset += truesize;
1674 #endif
1675 }
1676 
1677 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1678                           unsigned int size)
1679 {
1680     unsigned int truesize;
1681 
1682 #if (PAGE_SIZE < 8192)
1683     truesize = igc_rx_pg_size(ring) / 2;
1684 #else
1685     truesize = ring_uses_build_skb(ring) ?
1686            SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1687            SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1688            SKB_DATA_ALIGN(size);
1689 #endif
1690     return truesize;
1691 }
1692 
1693 /**
1694  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1695  * @rx_ring: rx descriptor ring to transact packets on
1696  * @rx_buffer: buffer containing page to add
1697  * @skb: sk_buff to place the data into
1698  * @size: size of buffer to be added
1699  *
1700  * This function will add the data contained in rx_buffer->page to the skb.
1701  */
1702 static void igc_add_rx_frag(struct igc_ring *rx_ring,
1703                 struct igc_rx_buffer *rx_buffer,
1704                 struct sk_buff *skb,
1705                 unsigned int size)
1706 {
1707     unsigned int truesize;
1708 
1709 #if (PAGE_SIZE < 8192)
1710     truesize = igc_rx_pg_size(rx_ring) / 2;
1711 #else
1712     truesize = ring_uses_build_skb(rx_ring) ?
1713            SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1714            SKB_DATA_ALIGN(size);
1715 #endif
1716     skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1717             rx_buffer->page_offset, size, truesize);
1718 
1719     igc_rx_buffer_flip(rx_buffer, truesize);
1720 }
1721 
1722 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1723                      struct igc_rx_buffer *rx_buffer,
1724                      struct xdp_buff *xdp)
1725 {
1726     unsigned int size = xdp->data_end - xdp->data;
1727     unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1728     unsigned int metasize = xdp->data - xdp->data_meta;
1729     struct sk_buff *skb;
1730 
1731     /* prefetch first cache line of first page */
1732     net_prefetch(xdp->data_meta);
1733 
1734     /* build an skb around the page buffer */
1735     skb = napi_build_skb(xdp->data_hard_start, truesize);
1736     if (unlikely(!skb))
1737         return NULL;
1738 
1739     /* update pointers within the skb to store the data */
1740     skb_reserve(skb, xdp->data - xdp->data_hard_start);
1741     __skb_put(skb, size);
1742     if (metasize)
1743         skb_metadata_set(skb, metasize);
1744 
1745     igc_rx_buffer_flip(rx_buffer, truesize);
1746     return skb;
1747 }
1748 
1749 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1750                      struct igc_rx_buffer *rx_buffer,
1751                      struct xdp_buff *xdp,
1752                      ktime_t timestamp)
1753 {
1754     unsigned int metasize = xdp->data - xdp->data_meta;
1755     unsigned int size = xdp->data_end - xdp->data;
1756     unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1757     void *va = xdp->data;
1758     unsigned int headlen;
1759     struct sk_buff *skb;
1760 
1761     /* prefetch first cache line of first page */
1762     net_prefetch(xdp->data_meta);
1763 
1764     /* allocate a skb to store the frags */
1765     skb = napi_alloc_skb(&rx_ring->q_vector->napi,
1766                  IGC_RX_HDR_LEN + metasize);
1767     if (unlikely(!skb))
1768         return NULL;
1769 
1770     if (timestamp)
1771         skb_hwtstamps(skb)->hwtstamp = timestamp;
1772 
1773     /* Determine available headroom for copy */
1774     headlen = size;
1775     if (headlen > IGC_RX_HDR_LEN)
1776         headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1777 
1778     /* align pull length to size of long to optimize memcpy performance */
1779     memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
1780            ALIGN(headlen + metasize, sizeof(long)));
1781 
1782     if (metasize) {
1783         skb_metadata_set(skb, metasize);
1784         __skb_pull(skb, metasize);
1785     }
1786 
1787     /* update all of the pointers */
1788     size -= headlen;
1789     if (size) {
1790         skb_add_rx_frag(skb, 0, rx_buffer->page,
1791                 (va + headlen) - page_address(rx_buffer->page),
1792                 size, truesize);
1793         igc_rx_buffer_flip(rx_buffer, truesize);
1794     } else {
1795         rx_buffer->pagecnt_bias++;
1796     }
1797 
1798     return skb;
1799 }
1800 
1801 /**
1802  * igc_reuse_rx_page - page flip buffer and store it back on the ring
1803  * @rx_ring: rx descriptor ring to store buffers on
1804  * @old_buff: donor buffer to have page reused
1805  *
1806  * Synchronizes page for reuse by the adapter
1807  */
1808 static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1809                   struct igc_rx_buffer *old_buff)
1810 {
1811     u16 nta = rx_ring->next_to_alloc;
1812     struct igc_rx_buffer *new_buff;
1813 
1814     new_buff = &rx_ring->rx_buffer_info[nta];
1815 
1816     /* update, and store next to alloc */
1817     nta++;
1818     rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1819 
1820     /* Transfer page from old buffer to new buffer.
1821      * Move each member individually to avoid possible store
1822      * forwarding stalls.
1823      */
1824     new_buff->dma       = old_buff->dma;
1825     new_buff->page      = old_buff->page;
1826     new_buff->page_offset   = old_buff->page_offset;
1827     new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1828 }
1829 
1830 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1831                   int rx_buffer_pgcnt)
1832 {
1833     unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1834     struct page *page = rx_buffer->page;
1835 
1836     /* avoid re-using remote and pfmemalloc pages */
1837     if (!dev_page_is_reusable(page))
1838         return false;
1839 
1840 #if (PAGE_SIZE < 8192)
1841     /* if we are only owner of page we can reuse it */
1842     if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
1843         return false;
1844 #else
1845 #define IGC_LAST_OFFSET \
1846     (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1847 
1848     if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1849         return false;
1850 #endif
1851 
1852     /* If we have drained the page fragment pool we need to update
1853      * the pagecnt_bias and page count so that we fully restock the
1854      * number of references the driver holds.
1855      */
1856     if (unlikely(pagecnt_bias == 1)) {
1857         page_ref_add(page, USHRT_MAX - 1);
1858         rx_buffer->pagecnt_bias = USHRT_MAX;
1859     }
1860 
1861     return true;
1862 }
1863 
1864 /**
1865  * igc_is_non_eop - process handling of non-EOP buffers
1866  * @rx_ring: Rx ring being processed
1867  * @rx_desc: Rx descriptor for current buffer
1868  *
1869  * This function updates next to clean.  If the buffer is an EOP buffer
1870  * this function exits returning false, otherwise it will place the
1871  * sk_buff in the next buffer to be chained and return true indicating
1872  * that this is in fact a non-EOP buffer.
1873  */
1874 static bool igc_is_non_eop(struct igc_ring *rx_ring,
1875                union igc_adv_rx_desc *rx_desc)
1876 {
1877     u32 ntc = rx_ring->next_to_clean + 1;
1878 
1879     /* fetch, update, and store next to clean */
1880     ntc = (ntc < rx_ring->count) ? ntc : 0;
1881     rx_ring->next_to_clean = ntc;
1882 
1883     prefetch(IGC_RX_DESC(rx_ring, ntc));
1884 
1885     if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1886         return false;
1887 
1888     return true;
1889 }
1890 
1891 /**
1892  * igc_cleanup_headers - Correct corrupted or empty headers
1893  * @rx_ring: rx descriptor ring packet is being transacted on
1894  * @rx_desc: pointer to the EOP Rx descriptor
1895  * @skb: pointer to current skb being fixed
1896  *
1897  * Address the case where we are pulling data in on pages only
1898  * and as such no data is present in the skb header.
1899  *
1900  * In addition if skb is not at least 60 bytes we need to pad it so that
1901  * it is large enough to qualify as a valid Ethernet frame.
1902  *
1903  * Returns true if an error was encountered and skb was freed.
1904  */
1905 static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1906                 union igc_adv_rx_desc *rx_desc,
1907                 struct sk_buff *skb)
1908 {
1909     /* XDP packets use error pointer so abort at this point */
1910     if (IS_ERR(skb))
1911         return true;
1912 
1913     if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
1914         struct net_device *netdev = rx_ring->netdev;
1915 
1916         if (!(netdev->features & NETIF_F_RXALL)) {
1917             dev_kfree_skb_any(skb);
1918             return true;
1919         }
1920     }
1921 
1922     /* if eth_skb_pad returns an error the skb was freed */
1923     if (eth_skb_pad(skb))
1924         return true;
1925 
1926     return false;
1927 }
1928 
1929 static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1930                   struct igc_rx_buffer *rx_buffer,
1931                   int rx_buffer_pgcnt)
1932 {
1933     if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
1934         /* hand second half of page back to the ring */
1935         igc_reuse_rx_page(rx_ring, rx_buffer);
1936     } else {
1937         /* We are not reusing the buffer so unmap it and free
1938          * any references we are holding to it
1939          */
1940         dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1941                      igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1942                      IGC_RX_DMA_ATTR);
1943         __page_frag_cache_drain(rx_buffer->page,
1944                     rx_buffer->pagecnt_bias);
1945     }
1946 
1947     /* clear contents of rx_buffer */
1948     rx_buffer->page = NULL;
1949 }
1950 
1951 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1952 {
1953     struct igc_adapter *adapter = rx_ring->q_vector->adapter;
1954 
1955     if (ring_uses_build_skb(rx_ring))
1956         return IGC_SKB_PAD;
1957     if (igc_xdp_is_enabled(adapter))
1958         return XDP_PACKET_HEADROOM;
1959 
1960     return 0;
1961 }
1962 
1963 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1964                   struct igc_rx_buffer *bi)
1965 {
1966     struct page *page = bi->page;
1967     dma_addr_t dma;
1968 
1969     /* since we are recycling buffers we should seldom need to alloc */
1970     if (likely(page))
1971         return true;
1972 
1973     /* alloc new page for storage */
1974     page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1975     if (unlikely(!page)) {
1976         rx_ring->rx_stats.alloc_failed++;
1977         return false;
1978     }
1979 
1980     /* map page for use */
1981     dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1982                  igc_rx_pg_size(rx_ring),
1983                  DMA_FROM_DEVICE,
1984                  IGC_RX_DMA_ATTR);
1985 
1986     /* if mapping failed free memory back to system since
1987      * there isn't much point in holding memory we can't use
1988      */
1989     if (dma_mapping_error(rx_ring->dev, dma)) {
1990         __free_page(page);
1991 
1992         rx_ring->rx_stats.alloc_failed++;
1993         return false;
1994     }
1995 
1996     bi->dma = dma;
1997     bi->page = page;
1998     bi->page_offset = igc_rx_offset(rx_ring);
1999     page_ref_add(page, USHRT_MAX - 1);
2000     bi->pagecnt_bias = USHRT_MAX;
2001 
2002     return true;
2003 }
2004 
2005 /**
2006  * igc_alloc_rx_buffers - Replace used receive buffers; packet split
2007  * @rx_ring: rx descriptor ring
2008  * @cleaned_count: number of buffers to clean
2009  */
2010 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
2011 {
2012     union igc_adv_rx_desc *rx_desc;
2013     u16 i = rx_ring->next_to_use;
2014     struct igc_rx_buffer *bi;
2015     u16 bufsz;
2016 
2017     /* nothing to do */
2018     if (!cleaned_count)
2019         return;
2020 
2021     rx_desc = IGC_RX_DESC(rx_ring, i);
2022     bi = &rx_ring->rx_buffer_info[i];
2023     i -= rx_ring->count;
2024 
2025     bufsz = igc_rx_bufsz(rx_ring);
2026 
2027     do {
2028         if (!igc_alloc_mapped_page(rx_ring, bi))
2029             break;
2030 
2031         /* sync the buffer for use by the device */
2032         dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2033                          bi->page_offset, bufsz,
2034                          DMA_FROM_DEVICE);
2035 
2036         /* Refresh the desc even if buffer_addrs didn't change
2037          * because each write-back erases this info.
2038          */
2039         rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2040 
2041         rx_desc++;
2042         bi++;
2043         i++;
2044         if (unlikely(!i)) {
2045             rx_desc = IGC_RX_DESC(rx_ring, 0);
2046             bi = rx_ring->rx_buffer_info;
2047             i -= rx_ring->count;
2048         }
2049 
2050         /* clear the length for the next_to_use descriptor */
2051         rx_desc->wb.upper.length = 0;
2052 
2053         cleaned_count--;
2054     } while (cleaned_count);
2055 
2056     i += rx_ring->count;
2057 
2058     if (rx_ring->next_to_use != i) {
2059         /* record the next descriptor to use */
2060         rx_ring->next_to_use = i;
2061 
2062         /* update next to alloc since we have filled the ring */
2063         rx_ring->next_to_alloc = i;
2064 
2065         /* Force memory writes to complete before letting h/w
2066          * know there are new descriptors to fetch.  (Only
2067          * applicable for weak-ordered memory model archs,
2068          * such as IA-64).
2069          */
2070         wmb();
2071         writel(i, rx_ring->tail);
2072     }
2073 }
2074 
2075 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2076 {
2077     union igc_adv_rx_desc *desc;
2078     u16 i = ring->next_to_use;
2079     struct igc_rx_buffer *bi;
2080     dma_addr_t dma;
2081     bool ok = true;
2082 
2083     if (!count)
2084         return ok;
2085 
2086     desc = IGC_RX_DESC(ring, i);
2087     bi = &ring->rx_buffer_info[i];
2088     i -= ring->count;
2089 
2090     do {
2091         bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2092         if (!bi->xdp) {
2093             ok = false;
2094             break;
2095         }
2096 
2097         dma = xsk_buff_xdp_get_dma(bi->xdp);
2098         desc->read.pkt_addr = cpu_to_le64(dma);
2099 
2100         desc++;
2101         bi++;
2102         i++;
2103         if (unlikely(!i)) {
2104             desc = IGC_RX_DESC(ring, 0);
2105             bi = ring->rx_buffer_info;
2106             i -= ring->count;
2107         }
2108 
2109         /* Clear the length for the next_to_use descriptor. */
2110         desc->wb.upper.length = 0;
2111 
2112         count--;
2113     } while (count);
2114 
2115     i += ring->count;
2116 
2117     if (ring->next_to_use != i) {
2118         ring->next_to_use = i;
2119 
2120         /* Force memory writes to complete before letting h/w
2121          * know there are new descriptors to fetch.  (Only
2122          * applicable for weak-ordered memory model archs,
2123          * such as IA-64).
2124          */
2125         wmb();
2126         writel(i, ring->tail);
2127     }
2128 
2129     return ok;
2130 }
2131 
2132 static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
2133                   struct xdp_frame *xdpf,
2134                   struct igc_ring *ring)
2135 {
2136     dma_addr_t dma;
2137 
2138     dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
2139     if (dma_mapping_error(ring->dev, dma)) {
2140         netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
2141         return -ENOMEM;
2142     }
2143 
2144     buffer->type = IGC_TX_BUFFER_TYPE_XDP;
2145     buffer->xdpf = xdpf;
2146     buffer->protocol = 0;
2147     buffer->bytecount = xdpf->len;
2148     buffer->gso_segs = 1;
2149     buffer->time_stamp = jiffies;
2150     dma_unmap_len_set(buffer, len, xdpf->len);
2151     dma_unmap_addr_set(buffer, dma, dma);
2152     return 0;
2153 }
2154 
2155 /* This function requires __netif_tx_lock is held by the caller. */
2156 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2157                       struct xdp_frame *xdpf)
2158 {
2159     struct igc_tx_buffer *buffer;
2160     union igc_adv_tx_desc *desc;
2161     u32 cmd_type, olinfo_status;
2162     int err;
2163 
2164     if (!igc_desc_unused(ring))
2165         return -EBUSY;
2166 
2167     buffer = &ring->tx_buffer_info[ring->next_to_use];
2168     err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
2169     if (err)
2170         return err;
2171 
2172     cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2173            IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2174            buffer->bytecount;
2175     olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2176 
2177     desc = IGC_TX_DESC(ring, ring->next_to_use);
2178     desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2179     desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2180     desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
2181 
2182     netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
2183 
2184     buffer->next_to_watch = desc;
2185 
2186     ring->next_to_use++;
2187     if (ring->next_to_use == ring->count)
2188         ring->next_to_use = 0;
2189 
2190     return 0;
2191 }
2192 
2193 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2194                         int cpu)
2195 {
2196     int index = cpu;
2197 
2198     if (unlikely(index < 0))
2199         index = 0;
2200 
2201     while (index >= adapter->num_tx_queues)
2202         index -= adapter->num_tx_queues;
2203 
2204     return adapter->tx_ring[index];
2205 }
2206 
2207 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2208 {
2209     struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2210     int cpu = smp_processor_id();
2211     struct netdev_queue *nq;
2212     struct igc_ring *ring;
2213     int res;
2214 
2215     if (unlikely(!xdpf))
2216         return -EFAULT;
2217 
2218     ring = igc_xdp_get_tx_ring(adapter, cpu);
2219     nq = txring_txq(ring);
2220 
2221     __netif_tx_lock(nq, cpu);
2222     res = igc_xdp_init_tx_descriptor(ring, xdpf);
2223     __netif_tx_unlock(nq);
2224     return res;
2225 }
2226 
2227 /* This function assumes rcu_read_lock() is held by the caller. */
2228 static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2229                   struct bpf_prog *prog,
2230                   struct xdp_buff *xdp)
2231 {
2232     u32 act = bpf_prog_run_xdp(prog, xdp);
2233 
2234     switch (act) {
2235     case XDP_PASS:
2236         return IGC_XDP_PASS;
2237     case XDP_TX:
2238         if (igc_xdp_xmit_back(adapter, xdp) < 0)
2239             goto out_failure;
2240         return IGC_XDP_TX;
2241     case XDP_REDIRECT:
2242         if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2243             goto out_failure;
2244         return IGC_XDP_REDIRECT;
2245         break;
2246     default:
2247         bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
2248         fallthrough;
2249     case XDP_ABORTED:
2250 out_failure:
2251         trace_xdp_exception(adapter->netdev, prog, act);
2252         fallthrough;
2253     case XDP_DROP:
2254         return IGC_XDP_CONSUMED;
2255     }
2256 }
2257 
2258 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2259                     struct xdp_buff *xdp)
2260 {
2261     struct bpf_prog *prog;
2262     int res;
2263 
2264     prog = READ_ONCE(adapter->xdp_prog);
2265     if (!prog) {
2266         res = IGC_XDP_PASS;
2267         goto out;
2268     }
2269 
2270     res = __igc_xdp_run_prog(adapter, prog, xdp);
2271 
2272 out:
2273     return ERR_PTR(-res);
2274 }
2275 
2276 /* This function assumes __netif_tx_lock is held by the caller. */
2277 static void igc_flush_tx_descriptors(struct igc_ring *ring)
2278 {
2279     /* Once tail pointer is updated, hardware can fetch the descriptors
2280      * any time so we issue a write membar here to ensure all memory
2281      * writes are complete before the tail pointer is updated.
2282      */
2283     wmb();
2284     writel(ring->next_to_use, ring->tail);
2285 }
2286 
2287 static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2288 {
2289     int cpu = smp_processor_id();
2290     struct netdev_queue *nq;
2291     struct igc_ring *ring;
2292 
2293     if (status & IGC_XDP_TX) {
2294         ring = igc_xdp_get_tx_ring(adapter, cpu);
2295         nq = txring_txq(ring);
2296 
2297         __netif_tx_lock(nq, cpu);
2298         igc_flush_tx_descriptors(ring);
2299         __netif_tx_unlock(nq);
2300     }
2301 
2302     if (status & IGC_XDP_REDIRECT)
2303         xdp_do_flush();
2304 }
2305 
2306 static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2307                 unsigned int packets, unsigned int bytes)
2308 {
2309     struct igc_ring *ring = q_vector->rx.ring;
2310 
2311     u64_stats_update_begin(&ring->rx_syncp);
2312     ring->rx_stats.packets += packets;
2313     ring->rx_stats.bytes += bytes;
2314     u64_stats_update_end(&ring->rx_syncp);
2315 
2316     q_vector->rx.total_packets += packets;
2317     q_vector->rx.total_bytes += bytes;
2318 }
2319 
2320 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2321 {
2322     unsigned int total_bytes = 0, total_packets = 0;
2323     struct igc_adapter *adapter = q_vector->adapter;
2324     struct igc_ring *rx_ring = q_vector->rx.ring;
2325     struct sk_buff *skb = rx_ring->skb;
2326     u16 cleaned_count = igc_desc_unused(rx_ring);
2327     int xdp_status = 0, rx_buffer_pgcnt;
2328 
2329     while (likely(total_packets < budget)) {
2330         union igc_adv_rx_desc *rx_desc;
2331         struct igc_rx_buffer *rx_buffer;
2332         unsigned int size, truesize;
2333         ktime_t timestamp = 0;
2334         struct xdp_buff xdp;
2335         int pkt_offset = 0;
2336         void *pktbuf;
2337 
2338         /* return some buffers to hardware, one at a time is too slow */
2339         if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2340             igc_alloc_rx_buffers(rx_ring, cleaned_count);
2341             cleaned_count = 0;
2342         }
2343 
2344         rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2345         size = le16_to_cpu(rx_desc->wb.upper.length);
2346         if (!size)
2347             break;
2348 
2349         /* This memory barrier is needed to keep us from reading
2350          * any other fields out of the rx_desc until we know the
2351          * descriptor has been written back
2352          */
2353         dma_rmb();
2354 
2355         rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2356         truesize = igc_get_rx_frame_truesize(rx_ring, size);
2357 
2358         pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2359 
2360         if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2361             timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2362                             pktbuf);
2363             pkt_offset = IGC_TS_HDR_LEN;
2364             size -= IGC_TS_HDR_LEN;
2365         }
2366 
2367         if (!skb) {
2368             xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
2369             xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
2370                      igc_rx_offset(rx_ring) + pkt_offset,
2371                      size, true);
2372 
2373             skb = igc_xdp_run_prog(adapter, &xdp);
2374         }
2375 
2376         if (IS_ERR(skb)) {
2377             unsigned int xdp_res = -PTR_ERR(skb);
2378 
2379             switch (xdp_res) {
2380             case IGC_XDP_CONSUMED:
2381                 rx_buffer->pagecnt_bias++;
2382                 break;
2383             case IGC_XDP_TX:
2384             case IGC_XDP_REDIRECT:
2385                 igc_rx_buffer_flip(rx_buffer, truesize);
2386                 xdp_status |= xdp_res;
2387                 break;
2388             }
2389 
2390             total_packets++;
2391             total_bytes += size;
2392         } else if (skb)
2393             igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2394         else if (ring_uses_build_skb(rx_ring))
2395             skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
2396         else
2397             skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
2398                         timestamp);
2399 
2400         /* exit if we failed to retrieve a buffer */
2401         if (!skb) {
2402             rx_ring->rx_stats.alloc_failed++;
2403             rx_buffer->pagecnt_bias++;
2404             break;
2405         }
2406 
2407         igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2408         cleaned_count++;
2409 
2410         /* fetch next buffer in frame if non-eop */
2411         if (igc_is_non_eop(rx_ring, rx_desc))
2412             continue;
2413 
2414         /* verify the packet layout is correct */
2415         if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2416             skb = NULL;
2417             continue;
2418         }
2419 
2420         /* probably a little skewed due to removing CRC */
2421         total_bytes += skb->len;
2422 
2423         /* populate checksum, VLAN, and protocol */
2424         igc_process_skb_fields(rx_ring, rx_desc, skb);
2425 
2426         napi_gro_receive(&q_vector->napi, skb);
2427 
2428         /* reset skb pointer */
2429         skb = NULL;
2430 
2431         /* update budget accounting */
2432         total_packets++;
2433     }
2434 
2435     if (xdp_status)
2436         igc_finalize_xdp(adapter, xdp_status);
2437 
2438     /* place incomplete frames back on ring for completion */
2439     rx_ring->skb = skb;
2440 
2441     igc_update_rx_stats(q_vector, total_packets, total_bytes);
2442 
2443     if (cleaned_count)
2444         igc_alloc_rx_buffers(rx_ring, cleaned_count);
2445 
2446     return total_packets;
2447 }
2448 
2449 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2450                         struct xdp_buff *xdp)
2451 {
2452     unsigned int totalsize = xdp->data_end - xdp->data_meta;
2453     unsigned int metasize = xdp->data - xdp->data_meta;
2454     struct sk_buff *skb;
2455 
2456     net_prefetch(xdp->data_meta);
2457 
2458     skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
2459                    GFP_ATOMIC | __GFP_NOWARN);
2460     if (unlikely(!skb))
2461         return NULL;
2462 
2463     memcpy(__skb_put(skb, totalsize), xdp->data_meta,
2464            ALIGN(totalsize, sizeof(long)));
2465 
2466     if (metasize) {
2467         skb_metadata_set(skb, metasize);
2468         __skb_pull(skb, metasize);
2469     }
2470 
2471     return skb;
2472 }
2473 
2474 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2475                 union igc_adv_rx_desc *desc,
2476                 struct xdp_buff *xdp,
2477                 ktime_t timestamp)
2478 {
2479     struct igc_ring *ring = q_vector->rx.ring;
2480     struct sk_buff *skb;
2481 
2482     skb = igc_construct_skb_zc(ring, xdp);
2483     if (!skb) {
2484         ring->rx_stats.alloc_failed++;
2485         return;
2486     }
2487 
2488     if (timestamp)
2489         skb_hwtstamps(skb)->hwtstamp = timestamp;
2490 
2491     if (igc_cleanup_headers(ring, desc, skb))
2492         return;
2493 
2494     igc_process_skb_fields(ring, desc, skb);
2495     napi_gro_receive(&q_vector->napi, skb);
2496 }
2497 
2498 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2499 {
2500     struct igc_adapter *adapter = q_vector->adapter;
2501     struct igc_ring *ring = q_vector->rx.ring;
2502     u16 cleaned_count = igc_desc_unused(ring);
2503     int total_bytes = 0, total_packets = 0;
2504     u16 ntc = ring->next_to_clean;
2505     struct bpf_prog *prog;
2506     bool failure = false;
2507     int xdp_status = 0;
2508 
2509     rcu_read_lock();
2510 
2511     prog = READ_ONCE(adapter->xdp_prog);
2512 
2513     while (likely(total_packets < budget)) {
2514         union igc_adv_rx_desc *desc;
2515         struct igc_rx_buffer *bi;
2516         ktime_t timestamp = 0;
2517         unsigned int size;
2518         int res;
2519 
2520         desc = IGC_RX_DESC(ring, ntc);
2521         size = le16_to_cpu(desc->wb.upper.length);
2522         if (!size)
2523             break;
2524 
2525         /* This memory barrier is needed to keep us from reading
2526          * any other fields out of the rx_desc until we know the
2527          * descriptor has been written back
2528          */
2529         dma_rmb();
2530 
2531         bi = &ring->rx_buffer_info[ntc];
2532 
2533         if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2534             timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2535                             bi->xdp->data);
2536 
2537             bi->xdp->data += IGC_TS_HDR_LEN;
2538 
2539             /* HW timestamp has been copied into local variable. Metadata
2540              * length when XDP program is called should be 0.
2541              */
2542             bi->xdp->data_meta += IGC_TS_HDR_LEN;
2543             size -= IGC_TS_HDR_LEN;
2544         }
2545 
2546         bi->xdp->data_end = bi->xdp->data + size;
2547         xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2548 
2549         res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2550         switch (res) {
2551         case IGC_XDP_PASS:
2552             igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2553             fallthrough;
2554         case IGC_XDP_CONSUMED:
2555             xsk_buff_free(bi->xdp);
2556             break;
2557         case IGC_XDP_TX:
2558         case IGC_XDP_REDIRECT:
2559             xdp_status |= res;
2560             break;
2561         }
2562 
2563         bi->xdp = NULL;
2564         total_bytes += size;
2565         total_packets++;
2566         cleaned_count++;
2567         ntc++;
2568         if (ntc == ring->count)
2569             ntc = 0;
2570     }
2571 
2572     ring->next_to_clean = ntc;
2573     rcu_read_unlock();
2574 
2575     if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2576         failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2577 
2578     if (xdp_status)
2579         igc_finalize_xdp(adapter, xdp_status);
2580 
2581     igc_update_rx_stats(q_vector, total_packets, total_bytes);
2582 
2583     if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2584         if (failure || ring->next_to_clean == ring->next_to_use)
2585             xsk_set_rx_need_wakeup(ring->xsk_pool);
2586         else
2587             xsk_clear_rx_need_wakeup(ring->xsk_pool);
2588         return total_packets;
2589     }
2590 
2591     return failure ? budget : total_packets;
2592 }
2593 
2594 static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2595                 unsigned int packets, unsigned int bytes)
2596 {
2597     struct igc_ring *ring = q_vector->tx.ring;
2598 
2599     u64_stats_update_begin(&ring->tx_syncp);
2600     ring->tx_stats.bytes += bytes;
2601     ring->tx_stats.packets += packets;
2602     u64_stats_update_end(&ring->tx_syncp);
2603 
2604     q_vector->tx.total_bytes += bytes;
2605     q_vector->tx.total_packets += packets;
2606 }
2607 
2608 static void igc_xdp_xmit_zc(struct igc_ring *ring)
2609 {
2610     struct xsk_buff_pool *pool = ring->xsk_pool;
2611     struct netdev_queue *nq = txring_txq(ring);
2612     union igc_adv_tx_desc *tx_desc = NULL;
2613     int cpu = smp_processor_id();
2614     u16 ntu = ring->next_to_use;
2615     struct xdp_desc xdp_desc;
2616     u16 budget;
2617 
2618     if (!netif_carrier_ok(ring->netdev))
2619         return;
2620 
2621     __netif_tx_lock(nq, cpu);
2622 
2623     budget = igc_desc_unused(ring);
2624 
2625     while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2626         u32 cmd_type, olinfo_status;
2627         struct igc_tx_buffer *bi;
2628         dma_addr_t dma;
2629 
2630         cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2631                IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2632                xdp_desc.len;
2633         olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2634 
2635         dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2636         xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2637 
2638         tx_desc = IGC_TX_DESC(ring, ntu);
2639         tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2640         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2641         tx_desc->read.buffer_addr = cpu_to_le64(dma);
2642 
2643         bi = &ring->tx_buffer_info[ntu];
2644         bi->type = IGC_TX_BUFFER_TYPE_XSK;
2645         bi->protocol = 0;
2646         bi->bytecount = xdp_desc.len;
2647         bi->gso_segs = 1;
2648         bi->time_stamp = jiffies;
2649         bi->next_to_watch = tx_desc;
2650 
2651         netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2652 
2653         ntu++;
2654         if (ntu == ring->count)
2655             ntu = 0;
2656     }
2657 
2658     ring->next_to_use = ntu;
2659     if (tx_desc) {
2660         igc_flush_tx_descriptors(ring);
2661         xsk_tx_release(pool);
2662     }
2663 
2664     __netif_tx_unlock(nq);
2665 }
2666 
2667 /**
2668  * igc_clean_tx_irq - Reclaim resources after transmit completes
2669  * @q_vector: pointer to q_vector containing needed info
2670  * @napi_budget: Used to determine if we are in netpoll
2671  *
2672  * returns true if ring is completely cleaned
2673  */
2674 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2675 {
2676     struct igc_adapter *adapter = q_vector->adapter;
2677     unsigned int total_bytes = 0, total_packets = 0;
2678     unsigned int budget = q_vector->tx.work_limit;
2679     struct igc_ring *tx_ring = q_vector->tx.ring;
2680     unsigned int i = tx_ring->next_to_clean;
2681     struct igc_tx_buffer *tx_buffer;
2682     union igc_adv_tx_desc *tx_desc;
2683     u32 xsk_frames = 0;
2684 
2685     if (test_bit(__IGC_DOWN, &adapter->state))
2686         return true;
2687 
2688     tx_buffer = &tx_ring->tx_buffer_info[i];
2689     tx_desc = IGC_TX_DESC(tx_ring, i);
2690     i -= tx_ring->count;
2691 
2692     do {
2693         union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2694 
2695         /* if next_to_watch is not set then there is no work pending */
2696         if (!eop_desc)
2697             break;
2698 
2699         /* prevent any other reads prior to eop_desc */
2700         smp_rmb();
2701 
2702         /* if DD is not set pending work has not been completed */
2703         if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2704             break;
2705 
2706         /* clear next_to_watch to prevent false hangs */
2707         tx_buffer->next_to_watch = NULL;
2708 
2709         /* update the statistics for this packet */
2710         total_bytes += tx_buffer->bytecount;
2711         total_packets += tx_buffer->gso_segs;
2712 
2713         switch (tx_buffer->type) {
2714         case IGC_TX_BUFFER_TYPE_XSK:
2715             xsk_frames++;
2716             break;
2717         case IGC_TX_BUFFER_TYPE_XDP:
2718             xdp_return_frame(tx_buffer->xdpf);
2719             igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2720             break;
2721         case IGC_TX_BUFFER_TYPE_SKB:
2722             napi_consume_skb(tx_buffer->skb, napi_budget);
2723             igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2724             break;
2725         default:
2726             netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2727             break;
2728         }
2729 
2730         /* clear last DMA location and unmap remaining buffers */
2731         while (tx_desc != eop_desc) {
2732             tx_buffer++;
2733             tx_desc++;
2734             i++;
2735             if (unlikely(!i)) {
2736                 i -= tx_ring->count;
2737                 tx_buffer = tx_ring->tx_buffer_info;
2738                 tx_desc = IGC_TX_DESC(tx_ring, 0);
2739             }
2740 
2741             /* unmap any remaining paged data */
2742             if (dma_unmap_len(tx_buffer, len))
2743                 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2744         }
2745 
2746         /* move us one more past the eop_desc for start of next pkt */
2747         tx_buffer++;
2748         tx_desc++;
2749         i++;
2750         if (unlikely(!i)) {
2751             i -= tx_ring->count;
2752             tx_buffer = tx_ring->tx_buffer_info;
2753             tx_desc = IGC_TX_DESC(tx_ring, 0);
2754         }
2755 
2756         /* issue prefetch for next Tx descriptor */
2757         prefetch(tx_desc);
2758 
2759         /* update budget accounting */
2760         budget--;
2761     } while (likely(budget));
2762 
2763     netdev_tx_completed_queue(txring_txq(tx_ring),
2764                   total_packets, total_bytes);
2765 
2766     i += tx_ring->count;
2767     tx_ring->next_to_clean = i;
2768 
2769     igc_update_tx_stats(q_vector, total_packets, total_bytes);
2770 
2771     if (tx_ring->xsk_pool) {
2772         if (xsk_frames)
2773             xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2774         if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
2775             xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
2776         igc_xdp_xmit_zc(tx_ring);
2777     }
2778 
2779     if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
2780         struct igc_hw *hw = &adapter->hw;
2781 
2782         /* Detect a transmit hang in hardware, this serializes the
2783          * check with the clearing of time_stamp and movement of i
2784          */
2785         clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2786         if (tx_buffer->next_to_watch &&
2787             time_after(jiffies, tx_buffer->time_stamp +
2788             (adapter->tx_timeout_factor * HZ)) &&
2789             !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
2790             /* detected Tx unit hang */
2791             netdev_err(tx_ring->netdev,
2792                    "Detected Tx Unit Hang\n"
2793                    "  Tx Queue             <%d>\n"
2794                    "  TDH                  <%x>\n"
2795                    "  TDT                  <%x>\n"
2796                    "  next_to_use          <%x>\n"
2797                    "  next_to_clean        <%x>\n"
2798                    "buffer_info[next_to_clean]\n"
2799                    "  time_stamp           <%lx>\n"
2800                    "  next_to_watch        <%p>\n"
2801                    "  jiffies              <%lx>\n"
2802                    "  desc.status          <%x>\n",
2803                    tx_ring->queue_index,
2804                    rd32(IGC_TDH(tx_ring->reg_idx)),
2805                    readl(tx_ring->tail),
2806                    tx_ring->next_to_use,
2807                    tx_ring->next_to_clean,
2808                    tx_buffer->time_stamp,
2809                    tx_buffer->next_to_watch,
2810                    jiffies,
2811                    tx_buffer->next_to_watch->wb.status);
2812             netif_stop_subqueue(tx_ring->netdev,
2813                         tx_ring->queue_index);
2814 
2815             /* we are about to reset, no point in enabling stuff */
2816             return true;
2817         }
2818     }
2819 
2820 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
2821     if (unlikely(total_packets &&
2822              netif_carrier_ok(tx_ring->netdev) &&
2823              igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
2824         /* Make sure that anybody stopping the queue after this
2825          * sees the new next_to_clean.
2826          */
2827         smp_mb();
2828         if (__netif_subqueue_stopped(tx_ring->netdev,
2829                          tx_ring->queue_index) &&
2830             !(test_bit(__IGC_DOWN, &adapter->state))) {
2831             netif_wake_subqueue(tx_ring->netdev,
2832                         tx_ring->queue_index);
2833 
2834             u64_stats_update_begin(&tx_ring->tx_syncp);
2835             tx_ring->tx_stats.restart_queue++;
2836             u64_stats_update_end(&tx_ring->tx_syncp);
2837         }
2838     }
2839 
2840     return !!budget;
2841 }
2842 
2843 static int igc_find_mac_filter(struct igc_adapter *adapter,
2844                    enum igc_mac_filter_type type, const u8 *addr)
2845 {
2846     struct igc_hw *hw = &adapter->hw;
2847     int max_entries = hw->mac.rar_entry_count;
2848     u32 ral, rah;
2849     int i;
2850 
2851     for (i = 0; i < max_entries; i++) {
2852         ral = rd32(IGC_RAL(i));
2853         rah = rd32(IGC_RAH(i));
2854 
2855         if (!(rah & IGC_RAH_AV))
2856             continue;
2857         if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
2858             continue;
2859         if ((rah & IGC_RAH_RAH_MASK) !=
2860             le16_to_cpup((__le16 *)(addr + 4)))
2861             continue;
2862         if (ral != le32_to_cpup((__le32 *)(addr)))
2863             continue;
2864 
2865         return i;
2866     }
2867 
2868     return -1;
2869 }
2870 
2871 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
2872 {
2873     struct igc_hw *hw = &adapter->hw;
2874     int max_entries = hw->mac.rar_entry_count;
2875     u32 rah;
2876     int i;
2877 
2878     for (i = 0; i < max_entries; i++) {
2879         rah = rd32(IGC_RAH(i));
2880 
2881         if (!(rah & IGC_RAH_AV))
2882             return i;
2883     }
2884 
2885     return -1;
2886 }
2887 
2888 /**
2889  * igc_add_mac_filter() - Add MAC address filter
2890  * @adapter: Pointer to adapter where the filter should be added
2891  * @type: MAC address filter type (source or destination)
2892  * @addr: MAC address
2893  * @queue: If non-negative, queue assignment feature is enabled and frames
2894  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
2895  *         assignment is disabled.
2896  *
2897  * Return: 0 in case of success, negative errno code otherwise.
2898  */
2899 static int igc_add_mac_filter(struct igc_adapter *adapter,
2900                   enum igc_mac_filter_type type, const u8 *addr,
2901                   int queue)
2902 {
2903     struct net_device *dev = adapter->netdev;
2904     int index;
2905 
2906     index = igc_find_mac_filter(adapter, type, addr);
2907     if (index >= 0)
2908         goto update_filter;
2909 
2910     index = igc_get_avail_mac_filter_slot(adapter);
2911     if (index < 0)
2912         return -ENOSPC;
2913 
2914     netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
2915            index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2916            addr, queue);
2917 
2918 update_filter:
2919     igc_set_mac_filter_hw(adapter, index, type, addr, queue);
2920     return 0;
2921 }
2922 
2923 /**
2924  * igc_del_mac_filter() - Delete MAC address filter
2925  * @adapter: Pointer to adapter where the filter should be deleted from
2926  * @type: MAC address filter type (source or destination)
2927  * @addr: MAC address
2928  */
2929 static void igc_del_mac_filter(struct igc_adapter *adapter,
2930                    enum igc_mac_filter_type type, const u8 *addr)
2931 {
2932     struct net_device *dev = adapter->netdev;
2933     int index;
2934 
2935     index = igc_find_mac_filter(adapter, type, addr);
2936     if (index < 0)
2937         return;
2938 
2939     if (index == 0) {
2940         /* If this is the default filter, we don't actually delete it.
2941          * We just reset to its default value i.e. disable queue
2942          * assignment.
2943          */
2944         netdev_dbg(dev, "Disable default MAC filter queue assignment");
2945 
2946         igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
2947     } else {
2948         netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
2949                index,
2950                type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2951                addr);
2952 
2953         igc_clear_mac_filter_hw(adapter, index);
2954     }
2955 }
2956 
2957 /**
2958  * igc_add_vlan_prio_filter() - Add VLAN priority filter
2959  * @adapter: Pointer to adapter where the filter should be added
2960  * @prio: VLAN priority value
2961  * @queue: Queue number which matching frames are assigned to
2962  *
2963  * Return: 0 in case of success, negative errno code otherwise.
2964  */
2965 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
2966                     int queue)
2967 {
2968     struct net_device *dev = adapter->netdev;
2969     struct igc_hw *hw = &adapter->hw;
2970     u32 vlanpqf;
2971 
2972     vlanpqf = rd32(IGC_VLANPQF);
2973 
2974     if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
2975         netdev_dbg(dev, "VLAN priority filter already in use\n");
2976         return -EEXIST;
2977     }
2978 
2979     vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
2980     vlanpqf |= IGC_VLANPQF_VALID(prio);
2981 
2982     wr32(IGC_VLANPQF, vlanpqf);
2983 
2984     netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
2985            prio, queue);
2986     return 0;
2987 }
2988 
2989 /**
2990  * igc_del_vlan_prio_filter() - Delete VLAN priority filter
2991  * @adapter: Pointer to adapter where the filter should be deleted from
2992  * @prio: VLAN priority value
2993  */
2994 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
2995 {
2996     struct igc_hw *hw = &adapter->hw;
2997     u32 vlanpqf;
2998 
2999     vlanpqf = rd32(IGC_VLANPQF);
3000 
3001     vlanpqf &= ~IGC_VLANPQF_VALID(prio);
3002     vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
3003 
3004     wr32(IGC_VLANPQF, vlanpqf);
3005 
3006     netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
3007            prio);
3008 }
3009 
3010 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
3011 {
3012     struct igc_hw *hw = &adapter->hw;
3013     int i;
3014 
3015     for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3016         u32 etqf = rd32(IGC_ETQF(i));
3017 
3018         if (!(etqf & IGC_ETQF_FILTER_ENABLE))
3019             return i;
3020     }
3021 
3022     return -1;
3023 }
3024 
3025 /**
3026  * igc_add_etype_filter() - Add ethertype filter
3027  * @adapter: Pointer to adapter where the filter should be added
3028  * @etype: Ethertype value
3029  * @queue: If non-negative, queue assignment feature is enabled and frames
3030  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3031  *         assignment is disabled.
3032  *
3033  * Return: 0 in case of success, negative errno code otherwise.
3034  */
3035 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3036                 int queue)
3037 {
3038     struct igc_hw *hw = &adapter->hw;
3039     int index;
3040     u32 etqf;
3041 
3042     index = igc_get_avail_etype_filter_slot(adapter);
3043     if (index < 0)
3044         return -ENOSPC;
3045 
3046     etqf = rd32(IGC_ETQF(index));
3047 
3048     etqf &= ~IGC_ETQF_ETYPE_MASK;
3049     etqf |= etype;
3050 
3051     if (queue >= 0) {
3052         etqf &= ~IGC_ETQF_QUEUE_MASK;
3053         etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3054         etqf |= IGC_ETQF_QUEUE_ENABLE;
3055     }
3056 
3057     etqf |= IGC_ETQF_FILTER_ENABLE;
3058 
3059     wr32(IGC_ETQF(index), etqf);
3060 
3061     netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3062            etype, queue);
3063     return 0;
3064 }
3065 
3066 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3067 {
3068     struct igc_hw *hw = &adapter->hw;
3069     int i;
3070 
3071     for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3072         u32 etqf = rd32(IGC_ETQF(i));
3073 
3074         if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3075             return i;
3076     }
3077 
3078     return -1;
3079 }
3080 
3081 /**
3082  * igc_del_etype_filter() - Delete ethertype filter
3083  * @adapter: Pointer to adapter where the filter should be deleted from
3084  * @etype: Ethertype value
3085  */
3086 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3087 {
3088     struct igc_hw *hw = &adapter->hw;
3089     int index;
3090 
3091     index = igc_find_etype_filter(adapter, etype);
3092     if (index < 0)
3093         return;
3094 
3095     wr32(IGC_ETQF(index), 0);
3096 
3097     netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3098            etype);
3099 }
3100 
3101 static int igc_flex_filter_select(struct igc_adapter *adapter,
3102                   struct igc_flex_filter *input,
3103                   u32 *fhft)
3104 {
3105     struct igc_hw *hw = &adapter->hw;
3106     u8 fhft_index;
3107     u32 fhftsl;
3108 
3109     if (input->index >= MAX_FLEX_FILTER) {
3110         dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
3111         return -EINVAL;
3112     }
3113 
3114     /* Indirect table select register */
3115     fhftsl = rd32(IGC_FHFTSL);
3116     fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
3117     switch (input->index) {
3118     case 0 ... 7:
3119         fhftsl |= 0x00;
3120         break;
3121     case 8 ... 15:
3122         fhftsl |= 0x01;
3123         break;
3124     case 16 ... 23:
3125         fhftsl |= 0x02;
3126         break;
3127     case 24 ... 31:
3128         fhftsl |= 0x03;
3129         break;
3130     }
3131     wr32(IGC_FHFTSL, fhftsl);
3132 
3133     /* Normalize index down to host table register */
3134     fhft_index = input->index % 8;
3135 
3136     *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
3137         IGC_FHFT_EXT(fhft_index - 4);
3138 
3139     return 0;
3140 }
3141 
3142 static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
3143                     struct igc_flex_filter *input)
3144 {
3145     struct device *dev = &adapter->pdev->dev;
3146     struct igc_hw *hw = &adapter->hw;
3147     u8 *data = input->data;
3148     u8 *mask = input->mask;
3149     u32 queuing;
3150     u32 fhft;
3151     u32 wufc;
3152     int ret;
3153     int i;
3154 
3155     /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
3156      * out early to avoid surprises later.
3157      */
3158     if (input->length % 8 != 0) {
3159         dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
3160         return -EINVAL;
3161     }
3162 
3163     /* Select corresponding flex filter register and get base for host table. */
3164     ret = igc_flex_filter_select(adapter, input, &fhft);
3165     if (ret)
3166         return ret;
3167 
3168     /* When adding a filter globally disable flex filter feature. That is
3169      * recommended within the datasheet.
3170      */
3171     wufc = rd32(IGC_WUFC);
3172     wufc &= ~IGC_WUFC_FLEX_HQ;
3173     wr32(IGC_WUFC, wufc);
3174 
3175     /* Configure filter */
3176     queuing = input->length & IGC_FHFT_LENGTH_MASK;
3177     queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
3178     queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
3179 
3180     if (input->immediate_irq)
3181         queuing |= IGC_FHFT_IMM_INT;
3182 
3183     if (input->drop)
3184         queuing |= IGC_FHFT_DROP;
3185 
3186     wr32(fhft + 0xFC, queuing);
3187 
3188     /* Write data (128 byte) and mask (128 bit) */
3189     for (i = 0; i < 16; ++i) {
3190         const size_t data_idx = i * 8;
3191         const size_t row_idx = i * 16;
3192         u32 dw0 =
3193             (data[data_idx + 0] << 0) |
3194             (data[data_idx + 1] << 8) |
3195             (data[data_idx + 2] << 16) |
3196             (data[data_idx + 3] << 24);
3197         u32 dw1 =
3198             (data[data_idx + 4] << 0) |
3199             (data[data_idx + 5] << 8) |
3200             (data[data_idx + 6] << 16) |
3201             (data[data_idx + 7] << 24);
3202         u32 tmp;
3203 
3204         /* Write row: dw0, dw1 and mask */
3205         wr32(fhft + row_idx, dw0);
3206         wr32(fhft + row_idx + 4, dw1);
3207 
3208         /* mask is only valid for MASK(7, 0) */
3209         tmp = rd32(fhft + row_idx + 8);
3210         tmp &= ~GENMASK(7, 0);
3211         tmp |= mask[i];
3212         wr32(fhft + row_idx + 8, tmp);
3213     }
3214 
3215     /* Enable filter. */
3216     wufc |= IGC_WUFC_FLEX_HQ;
3217     if (input->index > 8) {
3218         /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
3219         u32 wufc_ext = rd32(IGC_WUFC_EXT);
3220 
3221         wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
3222 
3223         wr32(IGC_WUFC_EXT, wufc_ext);
3224     } else {
3225         wufc |= (IGC_WUFC_FLX0 << input->index);
3226     }
3227     wr32(IGC_WUFC, wufc);
3228 
3229     dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
3230         input->index);
3231 
3232     return 0;
3233 }
3234 
3235 static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
3236                       const void *src, unsigned int offset,
3237                       size_t len, const void *mask)
3238 {
3239     int i;
3240 
3241     /* data */
3242     memcpy(&flex->data[offset], src, len);
3243 
3244     /* mask */
3245     for (i = 0; i < len; ++i) {
3246         const unsigned int idx = i + offset;
3247         const u8 *ptr = mask;
3248 
3249         if (mask) {
3250             if (ptr[i] & 0xff)
3251                 flex->mask[idx / 8] |= BIT(idx % 8);
3252 
3253             continue;
3254         }
3255 
3256         flex->mask[idx / 8] |= BIT(idx % 8);
3257     }
3258 }
3259 
3260 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
3261 {
3262     struct igc_hw *hw = &adapter->hw;
3263     u32 wufc, wufc_ext;
3264     int i;
3265 
3266     wufc = rd32(IGC_WUFC);
3267     wufc_ext = rd32(IGC_WUFC_EXT);
3268 
3269     for (i = 0; i < MAX_FLEX_FILTER; i++) {
3270         if (i < 8) {
3271             if (!(wufc & (IGC_WUFC_FLX0 << i)))
3272                 return i;
3273         } else {
3274             if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
3275                 return i;
3276         }
3277     }
3278 
3279     return -ENOSPC;
3280 }
3281 
3282 static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
3283 {
3284     struct igc_hw *hw = &adapter->hw;
3285     u32 wufc, wufc_ext;
3286 
3287     wufc = rd32(IGC_WUFC);
3288     wufc_ext = rd32(IGC_WUFC_EXT);
3289 
3290     if (wufc & IGC_WUFC_FILTER_MASK)
3291         return true;
3292 
3293     if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
3294         return true;
3295 
3296     return false;
3297 }
3298 
3299 static int igc_add_flex_filter(struct igc_adapter *adapter,
3300                    struct igc_nfc_rule *rule)
3301 {
3302     struct igc_flex_filter flex = { };
3303     struct igc_nfc_filter *filter = &rule->filter;
3304     unsigned int eth_offset, user_offset;
3305     int ret, index;
3306     bool vlan;
3307 
3308     index = igc_find_avail_flex_filter_slot(adapter);
3309     if (index < 0)
3310         return -ENOSPC;
3311 
3312     /* Construct the flex filter:
3313      *  -> dest_mac [6]
3314      *  -> src_mac [6]
3315      *  -> tpid [2]
3316      *  -> vlan tci [2]
3317      *  -> ether type [2]
3318      *  -> user data [8]
3319      *  -> = 26 bytes => 32 length
3320      */
3321     flex.index    = index;
3322     flex.length   = 32;
3323     flex.rx_queue = rule->action;
3324 
3325     vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
3326     eth_offset = vlan ? 16 : 12;
3327     user_offset = vlan ? 18 : 14;
3328 
3329     /* Add destination MAC  */
3330     if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3331         igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
3332                       ETH_ALEN, NULL);
3333 
3334     /* Add source MAC */
3335     if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3336         igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
3337                       ETH_ALEN, NULL);
3338 
3339     /* Add VLAN etype */
3340     if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
3341         igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
3342                       sizeof(filter->vlan_etype),
3343                       NULL);
3344 
3345     /* Add VLAN TCI */
3346     if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
3347         igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
3348                       sizeof(filter->vlan_tci), NULL);
3349 
3350     /* Add Ether type */
3351     if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3352         __be16 etype = cpu_to_be16(filter->etype);
3353 
3354         igc_flex_filter_add_field(&flex, &etype, eth_offset,
3355                       sizeof(etype), NULL);
3356     }
3357 
3358     /* Add user data */
3359     if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
3360         igc_flex_filter_add_field(&flex, &filter->user_data,
3361                       user_offset,
3362                       sizeof(filter->user_data),
3363                       filter->user_mask);
3364 
3365     /* Add it down to the hardware and enable it. */
3366     ret = igc_write_flex_filter_ll(adapter, &flex);
3367     if (ret)
3368         return ret;
3369 
3370     filter->flex_index = index;
3371 
3372     return 0;
3373 }
3374 
3375 static void igc_del_flex_filter(struct igc_adapter *adapter,
3376                 u16 reg_index)
3377 {
3378     struct igc_hw *hw = &adapter->hw;
3379     u32 wufc;
3380 
3381     /* Just disable the filter. The filter table itself is kept
3382      * intact. Another flex_filter_add() should override the "old" data
3383      * then.
3384      */
3385     if (reg_index > 8) {
3386         u32 wufc_ext = rd32(IGC_WUFC_EXT);
3387 
3388         wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
3389         wr32(IGC_WUFC_EXT, wufc_ext);
3390     } else {
3391         wufc = rd32(IGC_WUFC);
3392 
3393         wufc &= ~(IGC_WUFC_FLX0 << reg_index);
3394         wr32(IGC_WUFC, wufc);
3395     }
3396 
3397     if (igc_flex_filter_in_use(adapter))
3398         return;
3399 
3400     /* No filters are in use, we may disable flex filters */
3401     wufc = rd32(IGC_WUFC);
3402     wufc &= ~IGC_WUFC_FLEX_HQ;
3403     wr32(IGC_WUFC, wufc);
3404 }
3405 
3406 static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3407                    struct igc_nfc_rule *rule)
3408 {
3409     int err;
3410 
3411     if (rule->flex) {
3412         return igc_add_flex_filter(adapter, rule);
3413     }
3414 
3415     if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3416         err = igc_add_etype_filter(adapter, rule->filter.etype,
3417                        rule->action);
3418         if (err)
3419             return err;
3420     }
3421 
3422     if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3423         err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3424                      rule->filter.src_addr, rule->action);
3425         if (err)
3426             return err;
3427     }
3428 
3429     if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3430         err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3431                      rule->filter.dst_addr, rule->action);
3432         if (err)
3433             return err;
3434     }
3435 
3436     if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3437         int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3438                VLAN_PRIO_SHIFT;
3439 
3440         err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3441         if (err)
3442             return err;
3443     }
3444 
3445     return 0;
3446 }
3447 
3448 static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3449                  const struct igc_nfc_rule *rule)
3450 {
3451     if (rule->flex) {
3452         igc_del_flex_filter(adapter, rule->filter.flex_index);
3453         return;
3454     }
3455 
3456     if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3457         igc_del_etype_filter(adapter, rule->filter.etype);
3458 
3459     if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3460         int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3461                VLAN_PRIO_SHIFT;
3462 
3463         igc_del_vlan_prio_filter(adapter, prio);
3464     }
3465 
3466     if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3467         igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3468                    rule->filter.src_addr);
3469 
3470     if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3471         igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3472                    rule->filter.dst_addr);
3473 }
3474 
3475 /**
3476  * igc_get_nfc_rule() - Get NFC rule
3477  * @adapter: Pointer to adapter
3478  * @location: Rule location
3479  *
3480  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3481  *
3482  * Return: Pointer to NFC rule at @location. If not found, NULL.
3483  */
3484 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3485                       u32 location)
3486 {
3487     struct igc_nfc_rule *rule;
3488 
3489     list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3490         if (rule->location == location)
3491             return rule;
3492         if (rule->location > location)
3493             break;
3494     }
3495 
3496     return NULL;
3497 }
3498 
3499 /**
3500  * igc_del_nfc_rule() - Delete NFC rule
3501  * @adapter: Pointer to adapter
3502  * @rule: Pointer to rule to be deleted
3503  *
3504  * Disable NFC rule in hardware and delete it from adapter.
3505  *
3506  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3507  */
3508 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3509 {
3510     igc_disable_nfc_rule(adapter, rule);
3511 
3512     list_del(&rule->list);
3513     adapter->nfc_rule_count--;
3514 
3515     kfree(rule);
3516 }
3517 
3518 static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3519 {
3520     struct igc_nfc_rule *rule, *tmp;
3521 
3522     mutex_lock(&adapter->nfc_rule_lock);
3523 
3524     list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3525         igc_del_nfc_rule(adapter, rule);
3526 
3527     mutex_unlock(&adapter->nfc_rule_lock);
3528 }
3529 
3530 /**
3531  * igc_add_nfc_rule() - Add NFC rule
3532  * @adapter: Pointer to adapter
3533  * @rule: Pointer to rule to be added
3534  *
3535  * Enable NFC rule in hardware and add it to adapter.
3536  *
3537  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3538  *
3539  * Return: 0 on success, negative errno on failure.
3540  */
3541 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3542 {
3543     struct igc_nfc_rule *pred, *cur;
3544     int err;
3545 
3546     err = igc_enable_nfc_rule(adapter, rule);
3547     if (err)
3548         return err;
3549 
3550     pred = NULL;
3551     list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3552         if (cur->location >= rule->location)
3553             break;
3554         pred = cur;
3555     }
3556 
3557     list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3558     adapter->nfc_rule_count++;
3559     return 0;
3560 }
3561 
3562 static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3563 {
3564     struct igc_nfc_rule *rule;
3565 
3566     mutex_lock(&adapter->nfc_rule_lock);
3567 
3568     list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3569         igc_enable_nfc_rule(adapter, rule);
3570 
3571     mutex_unlock(&adapter->nfc_rule_lock);
3572 }
3573 
3574 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3575 {
3576     struct igc_adapter *adapter = netdev_priv(netdev);
3577 
3578     return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3579 }
3580 
3581 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3582 {
3583     struct igc_adapter *adapter = netdev_priv(netdev);
3584 
3585     igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3586     return 0;
3587 }
3588 
3589 /**
3590  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3591  * @netdev: network interface device structure
3592  *
3593  * The set_rx_mode entry point is called whenever the unicast or multicast
3594  * address lists or the network interface flags are updated.  This routine is
3595  * responsible for configuring the hardware for proper unicast, multicast,
3596  * promiscuous mode, and all-multi behavior.
3597  */
3598 static void igc_set_rx_mode(struct net_device *netdev)
3599 {
3600     struct igc_adapter *adapter = netdev_priv(netdev);
3601     struct igc_hw *hw = &adapter->hw;
3602     u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3603     int count;
3604 
3605     /* Check for Promiscuous and All Multicast modes */
3606     if (netdev->flags & IFF_PROMISC) {
3607         rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3608     } else {
3609         if (netdev->flags & IFF_ALLMULTI) {
3610             rctl |= IGC_RCTL_MPE;
3611         } else {
3612             /* Write addresses to the MTA, if the attempt fails
3613              * then we should just turn on promiscuous mode so
3614              * that we can at least receive multicast traffic
3615              */
3616             count = igc_write_mc_addr_list(netdev);
3617             if (count < 0)
3618                 rctl |= IGC_RCTL_MPE;
3619         }
3620     }
3621 
3622     /* Write addresses to available RAR registers, if there is not
3623      * sufficient space to store all the addresses then enable
3624      * unicast promiscuous mode
3625      */
3626     if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3627         rctl |= IGC_RCTL_UPE;
3628 
3629     /* update state of unicast and multicast */
3630     rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3631     wr32(IGC_RCTL, rctl);
3632 
3633 #if (PAGE_SIZE < 8192)
3634     if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3635         rlpml = IGC_MAX_FRAME_BUILD_SKB;
3636 #endif
3637     wr32(IGC_RLPML, rlpml);
3638 }
3639 
3640 /**
3641  * igc_configure - configure the hardware for RX and TX
3642  * @adapter: private board structure
3643  */
3644 static void igc_configure(struct igc_adapter *adapter)
3645 {
3646     struct net_device *netdev = adapter->netdev;
3647     int i = 0;
3648 
3649     igc_get_hw_control(adapter);
3650     igc_set_rx_mode(netdev);
3651 
3652     igc_restore_vlan(adapter);
3653 
3654     igc_setup_tctl(adapter);
3655     igc_setup_mrqc(adapter);
3656     igc_setup_rctl(adapter);
3657 
3658     igc_set_default_mac_filter(adapter);
3659     igc_restore_nfc_rules(adapter);
3660 
3661     igc_configure_tx(adapter);
3662     igc_configure_rx(adapter);
3663 
3664     igc_rx_fifo_flush_base(&adapter->hw);
3665 
3666     /* call igc_desc_unused which always leaves
3667      * at least 1 descriptor unused to make sure
3668      * next_to_use != next_to_clean
3669      */
3670     for (i = 0; i < adapter->num_rx_queues; i++) {
3671         struct igc_ring *ring = adapter->rx_ring[i];
3672 
3673         if (ring->xsk_pool)
3674             igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3675         else
3676             igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3677     }
3678 }
3679 
3680 /**
3681  * igc_write_ivar - configure ivar for given MSI-X vector
3682  * @hw: pointer to the HW structure
3683  * @msix_vector: vector number we are allocating to a given ring
3684  * @index: row index of IVAR register to write within IVAR table
3685  * @offset: column offset of in IVAR, should be multiple of 8
3686  *
3687  * The IVAR table consists of 2 columns,
3688  * each containing an cause allocation for an Rx and Tx ring, and a
3689  * variable number of rows depending on the number of queues supported.
3690  */
3691 static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3692                int index, int offset)
3693 {
3694     u32 ivar = array_rd32(IGC_IVAR0, index);
3695 
3696     /* clear any bits that are currently set */
3697     ivar &= ~((u32)0xFF << offset);
3698 
3699     /* write vector and valid bit */
3700     ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3701 
3702     array_wr32(IGC_IVAR0, index, ivar);
3703 }
3704 
3705 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3706 {
3707     struct igc_adapter *adapter = q_vector->adapter;
3708     struct igc_hw *hw = &adapter->hw;
3709     int rx_queue = IGC_N0_QUEUE;
3710     int tx_queue = IGC_N0_QUEUE;
3711 
3712     if (q_vector->rx.ring)
3713         rx_queue = q_vector->rx.ring->reg_idx;
3714     if (q_vector->tx.ring)
3715         tx_queue = q_vector->tx.ring->reg_idx;
3716 
3717     switch (hw->mac.type) {
3718     case igc_i225:
3719         if (rx_queue > IGC_N0_QUEUE)
3720             igc_write_ivar(hw, msix_vector,
3721                        rx_queue >> 1,
3722                        (rx_queue & 0x1) << 4);
3723         if (tx_queue > IGC_N0_QUEUE)
3724             igc_write_ivar(hw, msix_vector,
3725                        tx_queue >> 1,
3726                        ((tx_queue & 0x1) << 4) + 8);
3727         q_vector->eims_value = BIT(msix_vector);
3728         break;
3729     default:
3730         WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3731         break;
3732     }
3733 
3734     /* add q_vector eims value to global eims_enable_mask */
3735     adapter->eims_enable_mask |= q_vector->eims_value;
3736 
3737     /* configure q_vector to set itr on first interrupt */
3738     q_vector->set_itr = 1;
3739 }
3740 
3741 /**
3742  * igc_configure_msix - Configure MSI-X hardware
3743  * @adapter: Pointer to adapter structure
3744  *
3745  * igc_configure_msix sets up the hardware to properly
3746  * generate MSI-X interrupts.
3747  */
3748 static void igc_configure_msix(struct igc_adapter *adapter)
3749 {
3750     struct igc_hw *hw = &adapter->hw;
3751     int i, vector = 0;
3752     u32 tmp;
3753 
3754     adapter->eims_enable_mask = 0;
3755 
3756     /* set vector for other causes, i.e. link changes */
3757     switch (hw->mac.type) {
3758     case igc_i225:
3759         /* Turn on MSI-X capability first, or our settings
3760          * won't stick.  And it will take days to debug.
3761          */
3762         wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3763              IGC_GPIE_PBA | IGC_GPIE_EIAME |
3764              IGC_GPIE_NSICR);
3765 
3766         /* enable msix_other interrupt */
3767         adapter->eims_other = BIT(vector);
3768         tmp = (vector++ | IGC_IVAR_VALID) << 8;
3769 
3770         wr32(IGC_IVAR_MISC, tmp);
3771         break;
3772     default:
3773         /* do nothing, since nothing else supports MSI-X */
3774         break;
3775     } /* switch (hw->mac.type) */
3776 
3777     adapter->eims_enable_mask |= adapter->eims_other;
3778 
3779     for (i = 0; i < adapter->num_q_vectors; i++)
3780         igc_assign_vector(adapter->q_vector[i], vector++);
3781 
3782     wrfl();
3783 }
3784 
3785 /**
3786  * igc_irq_enable - Enable default interrupt generation settings
3787  * @adapter: board private structure
3788  */
3789 static void igc_irq_enable(struct igc_adapter *adapter)
3790 {
3791     struct igc_hw *hw = &adapter->hw;
3792 
3793     if (adapter->msix_entries) {
3794         u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3795         u32 regval = rd32(IGC_EIAC);
3796 
3797         wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3798         regval = rd32(IGC_EIAM);
3799         wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3800         wr32(IGC_EIMS, adapter->eims_enable_mask);
3801         wr32(IGC_IMS, ims);
3802     } else {
3803         wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3804         wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3805     }
3806 }
3807 
3808 /**
3809  * igc_irq_disable - Mask off interrupt generation on the NIC
3810  * @adapter: board private structure
3811  */
3812 static void igc_irq_disable(struct igc_adapter *adapter)
3813 {
3814     struct igc_hw *hw = &adapter->hw;
3815 
3816     if (adapter->msix_entries) {
3817         u32 regval = rd32(IGC_EIAM);
3818 
3819         wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3820         wr32(IGC_EIMC, adapter->eims_enable_mask);
3821         regval = rd32(IGC_EIAC);
3822         wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3823     }
3824 
3825     wr32(IGC_IAM, 0);
3826     wr32(IGC_IMC, ~0);
3827     wrfl();
3828 
3829     if (adapter->msix_entries) {
3830         int vector = 0, i;
3831 
3832         synchronize_irq(adapter->msix_entries[vector++].vector);
3833 
3834         for (i = 0; i < adapter->num_q_vectors; i++)
3835             synchronize_irq(adapter->msix_entries[vector++].vector);
3836     } else {
3837         synchronize_irq(adapter->pdev->irq);
3838     }
3839 }
3840 
3841 void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3842                   const u32 max_rss_queues)
3843 {
3844     /* Determine if we need to pair queues. */
3845     /* If rss_queues > half of max_rss_queues, pair the queues in
3846      * order to conserve interrupts due to limited supply.
3847      */
3848     if (adapter->rss_queues > (max_rss_queues / 2))
3849         adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3850     else
3851         adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3852 }
3853 
3854 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3855 {
3856     return IGC_MAX_RX_QUEUES;
3857 }
3858 
3859 static void igc_init_queue_configuration(struct igc_adapter *adapter)
3860 {
3861     u32 max_rss_queues;
3862 
3863     max_rss_queues = igc_get_max_rss_queues(adapter);
3864     adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3865 
3866     igc_set_flag_queue_pairs(adapter, max_rss_queues);
3867 }
3868 
3869 /**
3870  * igc_reset_q_vector - Reset config for interrupt vector
3871  * @adapter: board private structure to initialize
3872  * @v_idx: Index of vector to be reset
3873  *
3874  * If NAPI is enabled it will delete any references to the
3875  * NAPI struct. This is preparation for igc_free_q_vector.
3876  */
3877 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
3878 {
3879     struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3880 
3881     /* if we're coming from igc_set_interrupt_capability, the vectors are
3882      * not yet allocated
3883      */
3884     if (!q_vector)
3885         return;
3886 
3887     if (q_vector->tx.ring)
3888         adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
3889 
3890     if (q_vector->rx.ring)
3891         adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
3892 
3893     netif_napi_del(&q_vector->napi);
3894 }
3895 
3896 /**
3897  * igc_free_q_vector - Free memory allocated for specific interrupt vector
3898  * @adapter: board private structure to initialize
3899  * @v_idx: Index of vector to be freed
3900  *
3901  * This function frees the memory allocated to the q_vector.
3902  */
3903 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
3904 {
3905     struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3906 
3907     adapter->q_vector[v_idx] = NULL;
3908 
3909     /* igc_get_stats64() might access the rings on this vector,
3910      * we must wait a grace period before freeing it.
3911      */
3912     if (q_vector)
3913         kfree_rcu(q_vector, rcu);
3914 }
3915 
3916 /**
3917  * igc_free_q_vectors - Free memory allocated for interrupt vectors
3918  * @adapter: board private structure to initialize
3919  *
3920  * This function frees the memory allocated to the q_vectors.  In addition if
3921  * NAPI is enabled it will delete any references to the NAPI struct prior
3922  * to freeing the q_vector.
3923  */
3924 static void igc_free_q_vectors(struct igc_adapter *adapter)
3925 {
3926     int v_idx = adapter->num_q_vectors;
3927 
3928     adapter->num_tx_queues = 0;
3929     adapter->num_rx_queues = 0;
3930     adapter->num_q_vectors = 0;
3931 
3932     while (v_idx--) {
3933         igc_reset_q_vector(adapter, v_idx);
3934         igc_free_q_vector(adapter, v_idx);
3935     }
3936 }
3937 
3938 /**
3939  * igc_update_itr - update the dynamic ITR value based on statistics
3940  * @q_vector: pointer to q_vector
3941  * @ring_container: ring info to update the itr for
3942  *
3943  * Stores a new ITR value based on packets and byte
3944  * counts during the last interrupt.  The advantage of per interrupt
3945  * computation is faster updates and more accurate ITR for the current
3946  * traffic pattern.  Constants in this function were computed
3947  * based on theoretical maximum wire speed and thresholds were set based
3948  * on testing data as well as attempting to minimize response time
3949  * while increasing bulk throughput.
3950  * NOTE: These calculations are only valid when operating in a single-
3951  * queue environment.
3952  */
3953 static void igc_update_itr(struct igc_q_vector *q_vector,
3954                struct igc_ring_container *ring_container)
3955 {
3956     unsigned int packets = ring_container->total_packets;
3957     unsigned int bytes = ring_container->total_bytes;
3958     u8 itrval = ring_container->itr;
3959 
3960     /* no packets, exit with status unchanged */
3961     if (packets == 0)
3962         return;
3963 
3964     switch (itrval) {
3965     case lowest_latency:
3966         /* handle TSO and jumbo frames */
3967         if (bytes / packets > 8000)
3968             itrval = bulk_latency;
3969         else if ((packets < 5) && (bytes > 512))
3970             itrval = low_latency;
3971         break;
3972     case low_latency:  /* 50 usec aka 20000 ints/s */
3973         if (bytes > 10000) {
3974             /* this if handles the TSO accounting */
3975             if (bytes / packets > 8000)
3976                 itrval = bulk_latency;
3977             else if ((packets < 10) || ((bytes / packets) > 1200))
3978                 itrval = bulk_latency;
3979             else if ((packets > 35))
3980                 itrval = lowest_latency;
3981         } else if (bytes / packets > 2000) {
3982             itrval = bulk_latency;
3983         } else if (packets <= 2 && bytes < 512) {
3984             itrval = lowest_latency;
3985         }
3986         break;
3987     case bulk_latency: /* 250 usec aka 4000 ints/s */
3988         if (bytes > 25000) {
3989             if (packets > 35)
3990                 itrval = low_latency;
3991         } else if (bytes < 1500) {
3992             itrval = low_latency;
3993         }
3994         break;
3995     }
3996 
3997     /* clear work counters since we have the values we need */
3998     ring_container->total_bytes = 0;
3999     ring_container->total_packets = 0;
4000 
4001     /* write updated itr to ring container */
4002     ring_container->itr = itrval;
4003 }
4004 
4005 static void igc_set_itr(struct igc_q_vector *q_vector)
4006 {
4007     struct igc_adapter *adapter = q_vector->adapter;
4008     u32 new_itr = q_vector->itr_val;
4009     u8 current_itr = 0;
4010 
4011     /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4012     switch (adapter->link_speed) {
4013     case SPEED_10:
4014     case SPEED_100:
4015         current_itr = 0;
4016         new_itr = IGC_4K_ITR;
4017         goto set_itr_now;
4018     default:
4019         break;
4020     }
4021 
4022     igc_update_itr(q_vector, &q_vector->tx);
4023     igc_update_itr(q_vector, &q_vector->rx);
4024 
4025     current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4026 
4027     /* conservative mode (itr 3) eliminates the lowest_latency setting */
4028     if (current_itr == lowest_latency &&
4029         ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4030         (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4031         current_itr = low_latency;
4032 
4033     switch (current_itr) {
4034     /* counts and packets in update_itr are dependent on these numbers */
4035     case lowest_latency:
4036         new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
4037         break;
4038     case low_latency:
4039         new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
4040         break;
4041     case bulk_latency:
4042         new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
4043         break;
4044     default:
4045         break;
4046     }
4047 
4048 set_itr_now:
4049     if (new_itr != q_vector->itr_val) {
4050         /* this attempts to bias the interrupt rate towards Bulk
4051          * by adding intermediate steps when interrupt rate is
4052          * increasing
4053          */
4054         new_itr = new_itr > q_vector->itr_val ?
4055               max((new_itr * q_vector->itr_val) /
4056               (new_itr + (q_vector->itr_val >> 2)),
4057               new_itr) : new_itr;
4058         /* Don't write the value here; it resets the adapter's
4059          * internal timer, and causes us to delay far longer than
4060          * we should between interrupts.  Instead, we write the ITR
4061          * value at the beginning of the next interrupt so the timing
4062          * ends up being correct.
4063          */
4064         q_vector->itr_val = new_itr;
4065         q_vector->set_itr = 1;
4066     }
4067 }
4068 
4069 static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
4070 {
4071     int v_idx = adapter->num_q_vectors;
4072 
4073     if (adapter->msix_entries) {
4074         pci_disable_msix(adapter->pdev);
4075         kfree(adapter->msix_entries);
4076         adapter->msix_entries = NULL;
4077     } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
4078         pci_disable_msi(adapter->pdev);
4079     }
4080 
4081     while (v_idx--)
4082         igc_reset_q_vector(adapter, v_idx);
4083 }
4084 
4085 /**
4086  * igc_set_interrupt_capability - set MSI or MSI-X if supported
4087  * @adapter: Pointer to adapter structure
4088  * @msix: boolean value for MSI-X capability
4089  *
4090  * Attempt to configure interrupts using the best available
4091  * capabilities of the hardware and kernel.
4092  */
4093 static void igc_set_interrupt_capability(struct igc_adapter *adapter,
4094                      bool msix)
4095 {
4096     int numvecs, i;
4097     int err;
4098 
4099     if (!msix)
4100         goto msi_only;
4101     adapter->flags |= IGC_FLAG_HAS_MSIX;
4102 
4103     /* Number of supported queues. */
4104     adapter->num_rx_queues = adapter->rss_queues;
4105 
4106     adapter->num_tx_queues = adapter->rss_queues;
4107 
4108     /* start with one vector for every Rx queue */
4109     numvecs = adapter->num_rx_queues;
4110 
4111     /* if Tx handler is separate add 1 for every Tx queue */
4112     if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
4113         numvecs += adapter->num_tx_queues;
4114 
4115     /* store the number of vectors reserved for queues */
4116     adapter->num_q_vectors = numvecs;
4117 
4118     /* add 1 vector for link status interrupts */
4119     numvecs++;
4120 
4121     adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
4122                     GFP_KERNEL);
4123 
4124     if (!adapter->msix_entries)
4125         return;
4126 
4127     /* populate entry values */
4128     for (i = 0; i < numvecs; i++)
4129         adapter->msix_entries[i].entry = i;
4130 
4131     err = pci_enable_msix_range(adapter->pdev,
4132                     adapter->msix_entries,
4133                     numvecs,
4134                     numvecs);
4135     if (err > 0)
4136         return;
4137 
4138     kfree(adapter->msix_entries);
4139     adapter->msix_entries = NULL;
4140 
4141     igc_reset_interrupt_capability(adapter);
4142 
4143 msi_only:
4144     adapter->flags &= ~IGC_FLAG_HAS_MSIX;
4145 
4146     adapter->rss_queues = 1;
4147     adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4148     adapter->num_rx_queues = 1;
4149     adapter->num_tx_queues = 1;
4150     adapter->num_q_vectors = 1;
4151     if (!pci_enable_msi(adapter->pdev))
4152         adapter->flags |= IGC_FLAG_HAS_MSI;
4153 }
4154 
4155 /**
4156  * igc_update_ring_itr - update the dynamic ITR value based on packet size
4157  * @q_vector: pointer to q_vector
4158  *
4159  * Stores a new ITR value based on strictly on packet size.  This
4160  * algorithm is less sophisticated than that used in igc_update_itr,
4161  * due to the difficulty of synchronizing statistics across multiple
4162  * receive rings.  The divisors and thresholds used by this function
4163  * were determined based on theoretical maximum wire speed and testing
4164  * data, in order to minimize response time while increasing bulk
4165  * throughput.
4166  * NOTE: This function is called only when operating in a multiqueue
4167  * receive environment.
4168  */
4169 static void igc_update_ring_itr(struct igc_q_vector *q_vector)
4170 {
4171     struct igc_adapter *adapter = q_vector->adapter;
4172     int new_val = q_vector->itr_val;
4173     int avg_wire_size = 0;
4174     unsigned int packets;
4175 
4176     /* For non-gigabit speeds, just fix the interrupt rate at 4000
4177      * ints/sec - ITR timer value of 120 ticks.
4178      */
4179     switch (adapter->link_speed) {
4180     case SPEED_10:
4181     case SPEED_100:
4182         new_val = IGC_4K_ITR;
4183         goto set_itr_val;
4184     default:
4185         break;
4186     }
4187 
4188     packets = q_vector->rx.total_packets;
4189     if (packets)
4190         avg_wire_size = q_vector->rx.total_bytes / packets;
4191 
4192     packets = q_vector->tx.total_packets;
4193     if (packets)
4194         avg_wire_size = max_t(u32, avg_wire_size,
4195                       q_vector->tx.total_bytes / packets);
4196 
4197     /* if avg_wire_size isn't set no work was done */
4198     if (!avg_wire_size)
4199         goto clear_counts;
4200 
4201     /* Add 24 bytes to size to account for CRC, preamble, and gap */
4202     avg_wire_size += 24;
4203 
4204     /* Don't starve jumbo frames */
4205     avg_wire_size = min(avg_wire_size, 3000);
4206 
4207     /* Give a little boost to mid-size frames */
4208     if (avg_wire_size > 300 && avg_wire_size < 1200)
4209         new_val = avg_wire_size / 3;
4210     else
4211         new_val = avg_wire_size / 2;
4212 
4213     /* conservative mode (itr 3) eliminates the lowest_latency setting */
4214     if (new_val < IGC_20K_ITR &&
4215         ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4216         (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4217         new_val = IGC_20K_ITR;
4218 
4219 set_itr_val:
4220     if (new_val != q_vector->itr_val) {
4221         q_vector->itr_val = new_val;
4222         q_vector->set_itr = 1;
4223     }
4224 clear_counts:
4225     q_vector->rx.total_bytes = 0;
4226     q_vector->rx.total_packets = 0;
4227     q_vector->tx.total_bytes = 0;
4228     q_vector->tx.total_packets = 0;
4229 }
4230 
4231 static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
4232 {
4233     struct igc_adapter *adapter = q_vector->adapter;
4234     struct igc_hw *hw = &adapter->hw;
4235 
4236     if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
4237         (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
4238         if (adapter->num_q_vectors == 1)
4239             igc_set_itr(q_vector);
4240         else
4241             igc_update_ring_itr(q_vector);
4242     }
4243 
4244     if (!test_bit(__IGC_DOWN, &adapter->state)) {
4245         if (adapter->msix_entries)
4246             wr32(IGC_EIMS, q_vector->eims_value);
4247         else
4248             igc_irq_enable(adapter);
4249     }
4250 }
4251 
4252 static void igc_add_ring(struct igc_ring *ring,
4253              struct igc_ring_container *head)
4254 {
4255     head->ring = ring;
4256     head->count++;
4257 }
4258 
4259 /**
4260  * igc_cache_ring_register - Descriptor ring to register mapping
4261  * @adapter: board private structure to initialize
4262  *
4263  * Once we know the feature-set enabled for the device, we'll cache
4264  * the register offset the descriptor ring is assigned to.
4265  */
4266 static void igc_cache_ring_register(struct igc_adapter *adapter)
4267 {
4268     int i = 0, j = 0;
4269 
4270     switch (adapter->hw.mac.type) {
4271     case igc_i225:
4272     default:
4273         for (; i < adapter->num_rx_queues; i++)
4274             adapter->rx_ring[i]->reg_idx = i;
4275         for (; j < adapter->num_tx_queues; j++)
4276             adapter->tx_ring[j]->reg_idx = j;
4277         break;
4278     }
4279 }
4280 
4281 /**
4282  * igc_poll - NAPI Rx polling callback
4283  * @napi: napi polling structure
4284  * @budget: count of how many packets we should handle
4285  */
4286 static int igc_poll(struct napi_struct *napi, int budget)
4287 {
4288     struct igc_q_vector *q_vector = container_of(napi,
4289                              struct igc_q_vector,
4290                              napi);
4291     struct igc_ring *rx_ring = q_vector->rx.ring;
4292     bool clean_complete = true;
4293     int work_done = 0;
4294 
4295     if (q_vector->tx.ring)
4296         clean_complete = igc_clean_tx_irq(q_vector, budget);
4297 
4298     if (rx_ring) {
4299         int cleaned = rx_ring->xsk_pool ?
4300                   igc_clean_rx_irq_zc(q_vector, budget) :
4301                   igc_clean_rx_irq(q_vector, budget);
4302 
4303         work_done += cleaned;
4304         if (cleaned >= budget)
4305             clean_complete = false;
4306     }
4307 
4308     /* If all work not completed, return budget and keep polling */
4309     if (!clean_complete)
4310         return budget;
4311 
4312     /* Exit the polling mode, but don't re-enable interrupts if stack might
4313      * poll us due to busy-polling
4314      */
4315     if (likely(napi_complete_done(napi, work_done)))
4316         igc_ring_irq_enable(q_vector);
4317 
4318     return min(work_done, budget - 1);
4319 }
4320 
4321 /**
4322  * igc_alloc_q_vector - Allocate memory for a single interrupt vector
4323  * @adapter: board private structure to initialize
4324  * @v_count: q_vectors allocated on adapter, used for ring interleaving
4325  * @v_idx: index of vector in adapter struct
4326  * @txr_count: total number of Tx rings to allocate
4327  * @txr_idx: index of first Tx ring to allocate
4328  * @rxr_count: total number of Rx rings to allocate
4329  * @rxr_idx: index of first Rx ring to allocate
4330  *
4331  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
4332  */
4333 static int igc_alloc_q_vector(struct igc_adapter *adapter,
4334                   unsigned int v_count, unsigned int v_idx,
4335                   unsigned int txr_count, unsigned int txr_idx,
4336                   unsigned int rxr_count, unsigned int rxr_idx)
4337 {
4338     struct igc_q_vector *q_vector;
4339     struct igc_ring *ring;
4340     int ring_count;
4341 
4342     /* igc only supports 1 Tx and/or 1 Rx queue per vector */
4343     if (txr_count > 1 || rxr_count > 1)
4344         return -ENOMEM;
4345 
4346     ring_count = txr_count + rxr_count;
4347 
4348     /* allocate q_vector and rings */
4349     q_vector = adapter->q_vector[v_idx];
4350     if (!q_vector)
4351         q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4352                    GFP_KERNEL);
4353     else
4354         memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4355     if (!q_vector)
4356         return -ENOMEM;
4357 
4358     /* initialize NAPI */
4359     netif_napi_add(adapter->netdev, &q_vector->napi,
4360                igc_poll, 64);
4361 
4362     /* tie q_vector and adapter together */
4363     adapter->q_vector[v_idx] = q_vector;
4364     q_vector->adapter = adapter;
4365 
4366     /* initialize work limits */
4367     q_vector->tx.work_limit = adapter->tx_work_limit;
4368 
4369     /* initialize ITR configuration */
4370     q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4371     q_vector->itr_val = IGC_START_ITR;
4372 
4373     /* initialize pointer to rings */
4374     ring = q_vector->ring;
4375 
4376     /* initialize ITR */
4377     if (rxr_count) {
4378         /* rx or rx/tx vector */
4379         if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4380             q_vector->itr_val = adapter->rx_itr_setting;
4381     } else {
4382         /* tx only vector */
4383         if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4384             q_vector->itr_val = adapter->tx_itr_setting;
4385     }
4386 
4387     if (txr_count) {
4388         /* assign generic ring traits */
4389         ring->dev = &adapter->pdev->dev;
4390         ring->netdev = adapter->netdev;
4391 
4392         /* configure backlink on ring */
4393         ring->q_vector = q_vector;
4394 
4395         /* update q_vector Tx values */
4396         igc_add_ring(ring, &q_vector->tx);
4397 
4398         /* apply Tx specific ring traits */
4399         ring->count = adapter->tx_ring_count;
4400         ring->queue_index = txr_idx;
4401 
4402         /* assign ring to adapter */
4403         adapter->tx_ring[txr_idx] = ring;
4404 
4405         /* push pointer to next ring */
4406         ring++;
4407     }
4408 
4409     if (rxr_count) {
4410         /* assign generic ring traits */
4411         ring->dev = &adapter->pdev->dev;
4412         ring->netdev = adapter->netdev;
4413 
4414         /* configure backlink on ring */
4415         ring->q_vector = q_vector;
4416 
4417         /* update q_vector Rx values */
4418         igc_add_ring(ring, &q_vector->rx);
4419 
4420         /* apply Rx specific ring traits */
4421         ring->count = adapter->rx_ring_count;
4422         ring->queue_index = rxr_idx;
4423 
4424         /* assign ring to adapter */
4425         adapter->rx_ring[rxr_idx] = ring;
4426     }
4427 
4428     return 0;
4429 }
4430 
4431 /**
4432  * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4433  * @adapter: board private structure to initialize
4434  *
4435  * We allocate one q_vector per queue interrupt.  If allocation fails we
4436  * return -ENOMEM.
4437  */
4438 static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4439 {
4440     int rxr_remaining = adapter->num_rx_queues;
4441     int txr_remaining = adapter->num_tx_queues;
4442     int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4443     int q_vectors = adapter->num_q_vectors;
4444     int err;
4445 
4446     if (q_vectors >= (rxr_remaining + txr_remaining)) {
4447         for (; rxr_remaining; v_idx++) {
4448             err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4449                          0, 0, 1, rxr_idx);
4450 
4451             if (err)
4452                 goto err_out;
4453 
4454             /* update counts and index */
4455             rxr_remaining--;
4456             rxr_idx++;
4457         }
4458     }
4459 
4460     for (; v_idx < q_vectors; v_idx++) {
4461         int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4462         int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4463 
4464         err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4465                      tqpv, txr_idx, rqpv, rxr_idx);
4466 
4467         if (err)
4468             goto err_out;
4469 
4470         /* update counts and index */
4471         rxr_remaining -= rqpv;
4472         txr_remaining -= tqpv;
4473         rxr_idx++;
4474         txr_idx++;
4475     }
4476 
4477     return 0;
4478 
4479 err_out:
4480     adapter->num_tx_queues = 0;
4481     adapter->num_rx_queues = 0;
4482     adapter->num_q_vectors = 0;
4483 
4484     while (v_idx--)
4485         igc_free_q_vector(adapter, v_idx);
4486 
4487     return -ENOMEM;
4488 }
4489 
4490 /**
4491  * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4492  * @adapter: Pointer to adapter structure
4493  * @msix: boolean for MSI-X capability
4494  *
4495  * This function initializes the interrupts and allocates all of the queues.
4496  */
4497 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4498 {
4499     struct net_device *dev = adapter->netdev;
4500     int err = 0;
4501 
4502     igc_set_interrupt_capability(adapter, msix);
4503 
4504     err = igc_alloc_q_vectors(adapter);
4505     if (err) {
4506         netdev_err(dev, "Unable to allocate memory for vectors\n");
4507         goto err_alloc_q_vectors;
4508     }
4509 
4510     igc_cache_ring_register(adapter);
4511 
4512     return 0;
4513 
4514 err_alloc_q_vectors:
4515     igc_reset_interrupt_capability(adapter);
4516     return err;
4517 }
4518 
4519 /**
4520  * igc_sw_init - Initialize general software structures (struct igc_adapter)
4521  * @adapter: board private structure to initialize
4522  *
4523  * igc_sw_init initializes the Adapter private data structure.
4524  * Fields are initialized based on PCI device information and
4525  * OS network device settings (MTU size).
4526  */
4527 static int igc_sw_init(struct igc_adapter *adapter)
4528 {
4529     struct net_device *netdev = adapter->netdev;
4530     struct pci_dev *pdev = adapter->pdev;
4531     struct igc_hw *hw = &adapter->hw;
4532 
4533     pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4534 
4535     /* set default ring sizes */
4536     adapter->tx_ring_count = IGC_DEFAULT_TXD;
4537     adapter->rx_ring_count = IGC_DEFAULT_RXD;
4538 
4539     /* set default ITR values */
4540     adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4541     adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4542 
4543     /* set default work limits */
4544     adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4545 
4546     /* adjust max frame to be at least the size of a standard frame */
4547     adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4548                 VLAN_HLEN;
4549     adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4550 
4551     mutex_init(&adapter->nfc_rule_lock);
4552     INIT_LIST_HEAD(&adapter->nfc_rule_list);
4553     adapter->nfc_rule_count = 0;
4554 
4555     spin_lock_init(&adapter->stats64_lock);
4556     /* Assume MSI-X interrupts, will be checked during IRQ allocation */
4557     adapter->flags |= IGC_FLAG_HAS_MSIX;
4558 
4559     igc_init_queue_configuration(adapter);
4560 
4561     /* This call may decrease the number of queues */
4562     if (igc_init_interrupt_scheme(adapter, true)) {
4563         netdev_err(netdev, "Unable to allocate memory for queues\n");
4564         return -ENOMEM;
4565     }
4566 
4567     /* Explicitly disable IRQ since the NIC can be in any state. */
4568     igc_irq_disable(adapter);
4569 
4570     set_bit(__IGC_DOWN, &adapter->state);
4571 
4572     return 0;
4573 }
4574 
4575 /**
4576  * igc_up - Open the interface and prepare it to handle traffic
4577  * @adapter: board private structure
4578  */
4579 void igc_up(struct igc_adapter *adapter)
4580 {
4581     struct igc_hw *hw = &adapter->hw;
4582     int i = 0;
4583 
4584     /* hardware has been reset, we need to reload some things */
4585     igc_configure(adapter);
4586 
4587     clear_bit(__IGC_DOWN, &adapter->state);
4588 
4589     for (i = 0; i < adapter->num_q_vectors; i++)
4590         napi_enable(&adapter->q_vector[i]->napi);
4591 
4592     if (adapter->msix_entries)
4593         igc_configure_msix(adapter);
4594     else
4595         igc_assign_vector(adapter->q_vector[0], 0);
4596 
4597     /* Clear any pending interrupts. */
4598     rd32(IGC_ICR);
4599     igc_irq_enable(adapter);
4600 
4601     netif_tx_start_all_queues(adapter->netdev);
4602 
4603     /* start the watchdog. */
4604     hw->mac.get_link_status = true;
4605     schedule_work(&adapter->watchdog_task);
4606 }
4607 
4608 /**
4609  * igc_update_stats - Update the board statistics counters
4610  * @adapter: board private structure
4611  */
4612 void igc_update_stats(struct igc_adapter *adapter)
4613 {
4614     struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4615     struct pci_dev *pdev = adapter->pdev;
4616     struct igc_hw *hw = &adapter->hw;
4617     u64 _bytes, _packets;
4618     u64 bytes, packets;
4619     unsigned int start;
4620     u32 mpc;
4621     int i;
4622 
4623     /* Prevent stats update while adapter is being reset, or if the pci
4624      * connection is down.
4625      */
4626     if (adapter->link_speed == 0)
4627         return;
4628     if (pci_channel_offline(pdev))
4629         return;
4630 
4631     packets = 0;
4632     bytes = 0;
4633 
4634     rcu_read_lock();
4635     for (i = 0; i < adapter->num_rx_queues; i++) {
4636         struct igc_ring *ring = adapter->rx_ring[i];
4637         u32 rqdpc = rd32(IGC_RQDPC(i));
4638 
4639         if (hw->mac.type >= igc_i225)
4640             wr32(IGC_RQDPC(i), 0);
4641 
4642         if (rqdpc) {
4643             ring->rx_stats.drops += rqdpc;
4644             net_stats->rx_fifo_errors += rqdpc;
4645         }
4646 
4647         do {
4648             start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
4649             _bytes = ring->rx_stats.bytes;
4650             _packets = ring->rx_stats.packets;
4651         } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
4652         bytes += _bytes;
4653         packets += _packets;
4654     }
4655 
4656     net_stats->rx_bytes = bytes;
4657     net_stats->rx_packets = packets;
4658 
4659     packets = 0;
4660     bytes = 0;
4661     for (i = 0; i < adapter->num_tx_queues; i++) {
4662         struct igc_ring *ring = adapter->tx_ring[i];
4663 
4664         do {
4665             start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
4666             _bytes = ring->tx_stats.bytes;
4667             _packets = ring->tx_stats.packets;
4668         } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
4669         bytes += _bytes;
4670         packets += _packets;
4671     }
4672     net_stats->tx_bytes = bytes;
4673     net_stats->tx_packets = packets;
4674     rcu_read_unlock();
4675 
4676     /* read stats registers */
4677     adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4678     adapter->stats.gprc += rd32(IGC_GPRC);
4679     adapter->stats.gorc += rd32(IGC_GORCL);
4680     rd32(IGC_GORCH); /* clear GORCL */
4681     adapter->stats.bprc += rd32(IGC_BPRC);
4682     adapter->stats.mprc += rd32(IGC_MPRC);
4683     adapter->stats.roc += rd32(IGC_ROC);
4684 
4685     adapter->stats.prc64 += rd32(IGC_PRC64);
4686     adapter->stats.prc127 += rd32(IGC_PRC127);
4687     adapter->stats.prc255 += rd32(IGC_PRC255);
4688     adapter->stats.prc511 += rd32(IGC_PRC511);
4689     adapter->stats.prc1023 += rd32(IGC_PRC1023);
4690     adapter->stats.prc1522 += rd32(IGC_PRC1522);
4691     adapter->stats.tlpic += rd32(IGC_TLPIC);
4692     adapter->stats.rlpic += rd32(IGC_RLPIC);
4693     adapter->stats.hgptc += rd32(IGC_HGPTC);
4694 
4695     mpc = rd32(IGC_MPC);
4696     adapter->stats.mpc += mpc;
4697     net_stats->rx_fifo_errors += mpc;
4698     adapter->stats.scc += rd32(IGC_SCC);
4699     adapter->stats.ecol += rd32(IGC_ECOL);
4700     adapter->stats.mcc += rd32(IGC_MCC);
4701     adapter->stats.latecol += rd32(IGC_LATECOL);
4702     adapter->stats.dc += rd32(IGC_DC);
4703     adapter->stats.rlec += rd32(IGC_RLEC);
4704     adapter->stats.xonrxc += rd32(IGC_XONRXC);
4705     adapter->stats.xontxc += rd32(IGC_XONTXC);
4706     adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4707     adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4708     adapter->stats.fcruc += rd32(IGC_FCRUC);
4709     adapter->stats.gptc += rd32(IGC_GPTC);
4710     adapter->stats.gotc += rd32(IGC_GOTCL);
4711     rd32(IGC_GOTCH); /* clear GOTCL */
4712     adapter->stats.rnbc += rd32(IGC_RNBC);
4713     adapter->stats.ruc += rd32(IGC_RUC);
4714     adapter->stats.rfc += rd32(IGC_RFC);
4715     adapter->stats.rjc += rd32(IGC_RJC);
4716     adapter->stats.tor += rd32(IGC_TORH);
4717     adapter->stats.tot += rd32(IGC_TOTH);
4718     adapter->stats.tpr += rd32(IGC_TPR);
4719 
4720     adapter->stats.ptc64 += rd32(IGC_PTC64);
4721     adapter->stats.ptc127 += rd32(IGC_PTC127);
4722     adapter->stats.ptc255 += rd32(IGC_PTC255);
4723     adapter->stats.ptc511 += rd32(IGC_PTC511);
4724     adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4725     adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4726 
4727     adapter->stats.mptc += rd32(IGC_MPTC);
4728     adapter->stats.bptc += rd32(IGC_BPTC);
4729 
4730     adapter->stats.tpt += rd32(IGC_TPT);
4731     adapter->stats.colc += rd32(IGC_COLC);
4732     adapter->stats.colc += rd32(IGC_RERC);
4733 
4734     adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4735 
4736     adapter->stats.tsctc += rd32(IGC_TSCTC);
4737 
4738     adapter->stats.iac += rd32(IGC_IAC);
4739 
4740     /* Fill out the OS statistics structure */
4741     net_stats->multicast = adapter->stats.mprc;
4742     net_stats->collisions = adapter->stats.colc;
4743 
4744     /* Rx Errors */
4745 
4746     /* RLEC on some newer hardware can be incorrect so build
4747      * our own version based on RUC and ROC
4748      */
4749     net_stats->rx_errors = adapter->stats.rxerrc +
4750         adapter->stats.crcerrs + adapter->stats.algnerrc +
4751         adapter->stats.ruc + adapter->stats.roc +
4752         adapter->stats.cexterr;
4753     net_stats->rx_length_errors = adapter->stats.ruc +
4754                       adapter->stats.roc;
4755     net_stats->rx_crc_errors = adapter->stats.crcerrs;
4756     net_stats->rx_frame_errors = adapter->stats.algnerrc;
4757     net_stats->rx_missed_errors = adapter->stats.mpc;
4758 
4759     /* Tx Errors */
4760     net_stats->tx_errors = adapter->stats.ecol +
4761                    adapter->stats.latecol;
4762     net_stats->tx_aborted_errors = adapter->stats.ecol;
4763     net_stats->tx_window_errors = adapter->stats.latecol;
4764     net_stats->tx_carrier_errors = adapter->stats.tncrs;
4765 
4766     /* Tx Dropped needs to be maintained elsewhere */
4767 
4768     /* Management Stats */
4769     adapter->stats.mgptc += rd32(IGC_MGTPTC);
4770     adapter->stats.mgprc += rd32(IGC_MGTPRC);
4771     adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4772 }
4773 
4774 /**
4775  * igc_down - Close the interface
4776  * @adapter: board private structure
4777  */
4778 void igc_down(struct igc_adapter *adapter)
4779 {
4780     struct net_device *netdev = adapter->netdev;
4781     struct igc_hw *hw = &adapter->hw;
4782     u32 tctl, rctl;
4783     int i = 0;
4784 
4785     set_bit(__IGC_DOWN, &adapter->state);
4786 
4787     igc_ptp_suspend(adapter);
4788 
4789     if (pci_device_is_present(adapter->pdev)) {
4790         /* disable receives in the hardware */
4791         rctl = rd32(IGC_RCTL);
4792         wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
4793         /* flush and sleep below */
4794     }
4795     /* set trans_start so we don't get spurious watchdogs during reset */
4796     netif_trans_update(netdev);
4797 
4798     netif_carrier_off(netdev);
4799     netif_tx_stop_all_queues(netdev);
4800 
4801     if (pci_device_is_present(adapter->pdev)) {
4802         /* disable transmits in the hardware */
4803         tctl = rd32(IGC_TCTL);
4804         tctl &= ~IGC_TCTL_EN;
4805         wr32(IGC_TCTL, tctl);
4806         /* flush both disables and wait for them to finish */
4807         wrfl();
4808         usleep_range(10000, 20000);
4809 
4810         igc_irq_disable(adapter);
4811     }
4812 
4813     adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4814 
4815     for (i = 0; i < adapter->num_q_vectors; i++) {
4816         if (adapter->q_vector[i]) {
4817             napi_synchronize(&adapter->q_vector[i]->napi);
4818             napi_disable(&adapter->q_vector[i]->napi);
4819         }
4820     }
4821 
4822     del_timer_sync(&adapter->watchdog_timer);
4823     del_timer_sync(&adapter->phy_info_timer);
4824 
4825     /* record the stats before reset*/
4826     spin_lock(&adapter->stats64_lock);
4827     igc_update_stats(adapter);
4828     spin_unlock(&adapter->stats64_lock);
4829 
4830     adapter->link_speed = 0;
4831     adapter->link_duplex = 0;
4832 
4833     if (!pci_channel_offline(adapter->pdev))
4834         igc_reset(adapter);
4835 
4836     /* clear VLAN promisc flag so VFTA will be updated if necessary */
4837     adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
4838 
4839     igc_clean_all_tx_rings(adapter);
4840     igc_clean_all_rx_rings(adapter);
4841 }
4842 
4843 void igc_reinit_locked(struct igc_adapter *adapter)
4844 {
4845     while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4846         usleep_range(1000, 2000);
4847     igc_down(adapter);
4848     igc_up(adapter);
4849     clear_bit(__IGC_RESETTING, &adapter->state);
4850 }
4851 
4852 static void igc_reset_task(struct work_struct *work)
4853 {
4854     struct igc_adapter *adapter;
4855 
4856     adapter = container_of(work, struct igc_adapter, reset_task);
4857 
4858     rtnl_lock();
4859     /* If we're already down or resetting, just bail */
4860     if (test_bit(__IGC_DOWN, &adapter->state) ||
4861         test_bit(__IGC_RESETTING, &adapter->state)) {
4862         rtnl_unlock();
4863         return;
4864     }
4865 
4866     igc_rings_dump(adapter);
4867     igc_regs_dump(adapter);
4868     netdev_err(adapter->netdev, "Reset adapter\n");
4869     igc_reinit_locked(adapter);
4870     rtnl_unlock();
4871 }
4872 
4873 /**
4874  * igc_change_mtu - Change the Maximum Transfer Unit
4875  * @netdev: network interface device structure
4876  * @new_mtu: new value for maximum frame size
4877  *
4878  * Returns 0 on success, negative on failure
4879  */
4880 static int igc_change_mtu(struct net_device *netdev, int new_mtu)
4881 {
4882     int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4883     struct igc_adapter *adapter = netdev_priv(netdev);
4884 
4885     if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
4886         netdev_dbg(netdev, "Jumbo frames not supported with XDP");
4887         return -EINVAL;
4888     }
4889 
4890     /* adjust max frame to be at least the size of a standard frame */
4891     if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
4892         max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
4893 
4894     while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4895         usleep_range(1000, 2000);
4896 
4897     /* igc_down has a dependency on max_frame_size */
4898     adapter->max_frame_size = max_frame;
4899 
4900     if (netif_running(netdev))
4901         igc_down(adapter);
4902 
4903     netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
4904     netdev->mtu = new_mtu;
4905 
4906     if (netif_running(netdev))
4907         igc_up(adapter);
4908     else
4909         igc_reset(adapter);
4910 
4911     clear_bit(__IGC_RESETTING, &adapter->state);
4912 
4913     return 0;
4914 }
4915 
4916 /**
4917  * igc_get_stats64 - Get System Network Statistics
4918  * @netdev: network interface device structure
4919  * @stats: rtnl_link_stats64 pointer
4920  *
4921  * Returns the address of the device statistics structure.
4922  * The statistics are updated here and also from the timer callback.
4923  */
4924 static void igc_get_stats64(struct net_device *netdev,
4925                 struct rtnl_link_stats64 *stats)
4926 {
4927     struct igc_adapter *adapter = netdev_priv(netdev);
4928 
4929     spin_lock(&adapter->stats64_lock);
4930     if (!test_bit(__IGC_RESETTING, &adapter->state))
4931         igc_update_stats(adapter);
4932     memcpy(stats, &adapter->stats64, sizeof(*stats));
4933     spin_unlock(&adapter->stats64_lock);
4934 }
4935 
4936 static netdev_features_t igc_fix_features(struct net_device *netdev,
4937                       netdev_features_t features)
4938 {
4939     /* Since there is no support for separate Rx/Tx vlan accel
4940      * enable/disable make sure Tx flag is always in same state as Rx.
4941      */
4942     if (features & NETIF_F_HW_VLAN_CTAG_RX)
4943         features |= NETIF_F_HW_VLAN_CTAG_TX;
4944     else
4945         features &= ~NETIF_F_HW_VLAN_CTAG_TX;
4946 
4947     return features;
4948 }
4949 
4950 static int igc_set_features(struct net_device *netdev,
4951                 netdev_features_t features)
4952 {
4953     netdev_features_t changed = netdev->features ^ features;
4954     struct igc_adapter *adapter = netdev_priv(netdev);
4955 
4956     if (changed & NETIF_F_HW_VLAN_CTAG_RX)
4957         igc_vlan_mode(netdev, features);
4958 
4959     /* Add VLAN support */
4960     if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
4961         return 0;
4962 
4963     if (!(features & NETIF_F_NTUPLE))
4964         igc_flush_nfc_rules(adapter);
4965 
4966     netdev->features = features;
4967 
4968     if (netif_running(netdev))
4969         igc_reinit_locked(adapter);
4970     else
4971         igc_reset(adapter);
4972 
4973     return 1;
4974 }
4975 
4976 static netdev_features_t
4977 igc_features_check(struct sk_buff *skb, struct net_device *dev,
4978            netdev_features_t features)
4979 {
4980     unsigned int network_hdr_len, mac_hdr_len;
4981 
4982     /* Make certain the headers can be described by a context descriptor */
4983     mac_hdr_len = skb_network_header(skb) - skb->data;
4984     if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
4985         return features & ~(NETIF_F_HW_CSUM |
4986                     NETIF_F_SCTP_CRC |
4987                     NETIF_F_HW_VLAN_CTAG_TX |
4988                     NETIF_F_TSO |
4989                     NETIF_F_TSO6);
4990 
4991     network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
4992     if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
4993         return features & ~(NETIF_F_HW_CSUM |
4994                     NETIF_F_SCTP_CRC |
4995                     NETIF_F_TSO |
4996                     NETIF_F_TSO6);
4997 
4998     /* We can only support IPv4 TSO in tunnels if we can mangle the
4999      * inner IP ID field, so strip TSO if MANGLEID is not supported.
5000      */
5001     if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
5002         features &= ~NETIF_F_TSO;
5003 
5004     return features;
5005 }
5006 
5007 static void igc_tsync_interrupt(struct igc_adapter *adapter)
5008 {
5009     u32 ack, tsauxc, sec, nsec, tsicr;
5010     struct igc_hw *hw = &adapter->hw;
5011     struct ptp_clock_event event;
5012     struct timespec64 ts;
5013 
5014     tsicr = rd32(IGC_TSICR);
5015     ack = 0;
5016 
5017     if (tsicr & IGC_TSICR_SYS_WRAP) {
5018         event.type = PTP_CLOCK_PPS;
5019         if (adapter->ptp_caps.pps)
5020             ptp_clock_event(adapter->ptp_clock, &event);
5021         ack |= IGC_TSICR_SYS_WRAP;
5022     }
5023 
5024     if (tsicr & IGC_TSICR_TXTS) {
5025         /* retrieve hardware timestamp */
5026         schedule_work(&adapter->ptp_tx_work);
5027         ack |= IGC_TSICR_TXTS;
5028     }
5029 
5030     if (tsicr & IGC_TSICR_TT0) {
5031         spin_lock(&adapter->tmreg_lock);
5032         ts = timespec64_add(adapter->perout[0].start,
5033                     adapter->perout[0].period);
5034         wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5035         wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
5036         tsauxc = rd32(IGC_TSAUXC);
5037         tsauxc |= IGC_TSAUXC_EN_TT0;
5038         wr32(IGC_TSAUXC, tsauxc);
5039         adapter->perout[0].start = ts;
5040         spin_unlock(&adapter->tmreg_lock);
5041         ack |= IGC_TSICR_TT0;
5042     }
5043 
5044     if (tsicr & IGC_TSICR_TT1) {
5045         spin_lock(&adapter->tmreg_lock);
5046         ts = timespec64_add(adapter->perout[1].start,
5047                     adapter->perout[1].period);
5048         wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5049         wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
5050         tsauxc = rd32(IGC_TSAUXC);
5051         tsauxc |= IGC_TSAUXC_EN_TT1;
5052         wr32(IGC_TSAUXC, tsauxc);
5053         adapter->perout[1].start = ts;
5054         spin_unlock(&adapter->tmreg_lock);
5055         ack |= IGC_TSICR_TT1;
5056     }
5057 
5058     if (tsicr & IGC_TSICR_AUTT0) {
5059         nsec = rd32(IGC_AUXSTMPL0);
5060         sec  = rd32(IGC_AUXSTMPH0);
5061         event.type = PTP_CLOCK_EXTTS;
5062         event.index = 0;
5063         event.timestamp = sec * NSEC_PER_SEC + nsec;
5064         ptp_clock_event(adapter->ptp_clock, &event);
5065         ack |= IGC_TSICR_AUTT0;
5066     }
5067 
5068     if (tsicr & IGC_TSICR_AUTT1) {
5069         nsec = rd32(IGC_AUXSTMPL1);
5070         sec  = rd32(IGC_AUXSTMPH1);
5071         event.type = PTP_CLOCK_EXTTS;
5072         event.index = 1;
5073         event.timestamp = sec * NSEC_PER_SEC + nsec;
5074         ptp_clock_event(adapter->ptp_clock, &event);
5075         ack |= IGC_TSICR_AUTT1;
5076     }
5077 
5078     /* acknowledge the interrupts */
5079     wr32(IGC_TSICR, ack);
5080 }
5081 
5082 /**
5083  * igc_msix_other - msix other interrupt handler
5084  * @irq: interrupt number
5085  * @data: pointer to a q_vector
5086  */
5087 static irqreturn_t igc_msix_other(int irq, void *data)
5088 {
5089     struct igc_adapter *adapter = data;
5090     struct igc_hw *hw = &adapter->hw;
5091     u32 icr = rd32(IGC_ICR);
5092 
5093     /* reading ICR causes bit 31 of EICR to be cleared */
5094     if (icr & IGC_ICR_DRSTA)
5095         schedule_work(&adapter->reset_task);
5096 
5097     if (icr & IGC_ICR_DOUTSYNC) {
5098         /* HW is reporting DMA is out of sync */
5099         adapter->stats.doosync++;
5100     }
5101 
5102     if (icr & IGC_ICR_LSC) {
5103         hw->mac.get_link_status = true;
5104         /* guard against interrupt when we're going down */
5105         if (!test_bit(__IGC_DOWN, &adapter->state))
5106             mod_timer(&adapter->watchdog_timer, jiffies + 1);
5107     }
5108 
5109     if (icr & IGC_ICR_TS)
5110         igc_tsync_interrupt(adapter);
5111 
5112     wr32(IGC_EIMS, adapter->eims_other);
5113 
5114     return IRQ_HANDLED;
5115 }
5116 
5117 static void igc_write_itr(struct igc_q_vector *q_vector)
5118 {
5119     u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
5120 
5121     if (!q_vector->set_itr)
5122         return;
5123 
5124     if (!itr_val)
5125         itr_val = IGC_ITR_VAL_MASK;
5126 
5127     itr_val |= IGC_EITR_CNT_IGNR;
5128 
5129     writel(itr_val, q_vector->itr_register);
5130     q_vector->set_itr = 0;
5131 }
5132 
5133 static irqreturn_t igc_msix_ring(int irq, void *data)
5134 {
5135     struct igc_q_vector *q_vector = data;
5136 
5137     /* Write the ITR value calculated from the previous interrupt. */
5138     igc_write_itr(q_vector);
5139 
5140     napi_schedule(&q_vector->napi);
5141 
5142     return IRQ_HANDLED;
5143 }
5144 
5145 /**
5146  * igc_request_msix - Initialize MSI-X interrupts
5147  * @adapter: Pointer to adapter structure
5148  *
5149  * igc_request_msix allocates MSI-X vectors and requests interrupts from the
5150  * kernel.
5151  */
5152 static int igc_request_msix(struct igc_adapter *adapter)
5153 {
5154     unsigned int num_q_vectors = adapter->num_q_vectors;
5155     int i = 0, err = 0, vector = 0, free_vector = 0;
5156     struct net_device *netdev = adapter->netdev;
5157 
5158     err = request_irq(adapter->msix_entries[vector].vector,
5159               &igc_msix_other, 0, netdev->name, adapter);
5160     if (err)
5161         goto err_out;
5162 
5163     if (num_q_vectors > MAX_Q_VECTORS) {
5164         num_q_vectors = MAX_Q_VECTORS;
5165         dev_warn(&adapter->pdev->dev,
5166              "The number of queue vectors (%d) is higher than max allowed (%d)\n",
5167              adapter->num_q_vectors, MAX_Q_VECTORS);
5168     }
5169     for (i = 0; i < num_q_vectors; i++) {
5170         struct igc_q_vector *q_vector = adapter->q_vector[i];
5171 
5172         vector++;
5173 
5174         q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
5175 
5176         if (q_vector->rx.ring && q_vector->tx.ring)
5177             sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
5178                 q_vector->rx.ring->queue_index);
5179         else if (q_vector->tx.ring)
5180             sprintf(q_vector->name, "%s-tx-%u", netdev->name,
5181                 q_vector->tx.ring->queue_index);
5182         else if (q_vector->rx.ring)
5183             sprintf(q_vector->name, "%s-rx-%u", netdev->name,
5184                 q_vector->rx.ring->queue_index);
5185         else
5186             sprintf(q_vector->name, "%s-unused", netdev->name);
5187 
5188         err = request_irq(adapter->msix_entries[vector].vector,
5189                   igc_msix_ring, 0, q_vector->name,
5190                   q_vector);
5191         if (err)
5192             goto err_free;
5193     }
5194 
5195     igc_configure_msix(adapter);
5196     return 0;
5197 
5198 err_free:
5199     /* free already assigned IRQs */
5200     free_irq(adapter->msix_entries[free_vector++].vector, adapter);
5201 
5202     vector--;
5203     for (i = 0; i < vector; i++) {
5204         free_irq(adapter->msix_entries[free_vector++].vector,
5205              adapter->q_vector[i]);
5206     }
5207 err_out:
5208     return err;
5209 }
5210 
5211 /**
5212  * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
5213  * @adapter: Pointer to adapter structure
5214  *
5215  * This function resets the device so that it has 0 rx queues, tx queues, and
5216  * MSI-X interrupts allocated.
5217  */
5218 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
5219 {
5220     igc_free_q_vectors(adapter);
5221     igc_reset_interrupt_capability(adapter);
5222 }
5223 
5224 /* Need to wait a few seconds after link up to get diagnostic information from
5225  * the phy
5226  */
5227 static void igc_update_phy_info(struct timer_list *t)
5228 {
5229     struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
5230 
5231     igc_get_phy_info(&adapter->hw);
5232 }
5233 
5234 /**
5235  * igc_has_link - check shared code for link and determine up/down
5236  * @adapter: pointer to driver private info
5237  */
5238 bool igc_has_link(struct igc_adapter *adapter)
5239 {
5240     struct igc_hw *hw = &adapter->hw;
5241     bool link_active = false;
5242 
5243     /* get_link_status is set on LSC (link status) interrupt or
5244      * rx sequence error interrupt.  get_link_status will stay
5245      * false until the igc_check_for_link establishes link
5246      * for copper adapters ONLY
5247      */
5248     if (!hw->mac.get_link_status)
5249         return true;
5250     hw->mac.ops.check_for_link(hw);
5251     link_active = !hw->mac.get_link_status;
5252 
5253     if (hw->mac.type == igc_i225) {
5254         if (!netif_carrier_ok(adapter->netdev)) {
5255             adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5256         } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
5257             adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
5258             adapter->link_check_timeout = jiffies;
5259         }
5260     }
5261 
5262     return link_active;
5263 }
5264 
5265 /**
5266  * igc_watchdog - Timer Call-back
5267  * @t: timer for the watchdog
5268  */
5269 static void igc_watchdog(struct timer_list *t)
5270 {
5271     struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
5272     /* Do the rest outside of interrupt context */
5273     schedule_work(&adapter->watchdog_task);
5274 }
5275 
5276 static void igc_watchdog_task(struct work_struct *work)
5277 {
5278     struct igc_adapter *adapter = container_of(work,
5279                            struct igc_adapter,
5280                            watchdog_task);
5281     struct net_device *netdev = adapter->netdev;
5282     struct igc_hw *hw = &adapter->hw;
5283     struct igc_phy_info *phy = &hw->phy;
5284     u16 phy_data, retry_count = 20;
5285     u32 link;
5286     int i;
5287 
5288     link = igc_has_link(adapter);
5289 
5290     if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
5291         if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
5292             adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5293         else
5294             link = false;
5295     }
5296 
5297     if (link) {
5298         /* Cancel scheduled suspend requests. */
5299         pm_runtime_resume(netdev->dev.parent);
5300 
5301         if (!netif_carrier_ok(netdev)) {
5302             u32 ctrl;
5303 
5304             hw->mac.ops.get_speed_and_duplex(hw,
5305                              &adapter->link_speed,
5306                              &adapter->link_duplex);
5307 
5308             ctrl = rd32(IGC_CTRL);
5309             /* Link status message must follow this format */
5310             netdev_info(netdev,
5311                     "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
5312                     adapter->link_speed,
5313                     adapter->link_duplex == FULL_DUPLEX ?
5314                     "Full" : "Half",
5315                     (ctrl & IGC_CTRL_TFCE) &&
5316                     (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
5317                     (ctrl & IGC_CTRL_RFCE) ?  "RX" :
5318                     (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
5319 
5320             /* disable EEE if enabled */
5321             if ((adapter->flags & IGC_FLAG_EEE) &&
5322                 adapter->link_duplex == HALF_DUPLEX) {
5323                 netdev_info(netdev,
5324                         "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
5325                 adapter->hw.dev_spec._base.eee_enable = false;
5326                 adapter->flags &= ~IGC_FLAG_EEE;
5327             }
5328 
5329             /* check if SmartSpeed worked */
5330             igc_check_downshift(hw);
5331             if (phy->speed_downgraded)
5332                 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
5333 
5334             /* adjust timeout factor according to speed/duplex */
5335             adapter->tx_timeout_factor = 1;
5336             switch (adapter->link_speed) {
5337             case SPEED_10:
5338                 adapter->tx_timeout_factor = 14;
5339                 break;
5340             case SPEED_100:
5341             case SPEED_1000:
5342             case SPEED_2500:
5343                 adapter->tx_timeout_factor = 7;
5344                 break;
5345             }
5346 
5347             if (adapter->link_speed != SPEED_1000)
5348                 goto no_wait;
5349 
5350             /* wait for Remote receiver status OK */
5351 retry_read_status:
5352             if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5353                           &phy_data)) {
5354                 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5355                     retry_count) {
5356                     msleep(100);
5357                     retry_count--;
5358                     goto retry_read_status;
5359                 } else if (!retry_count) {
5360                     netdev_err(netdev, "exceed max 2 second\n");
5361                 }
5362             } else {
5363                 netdev_err(netdev, "read 1000Base-T Status Reg\n");
5364             }
5365 no_wait:
5366             netif_carrier_on(netdev);
5367 
5368             /* link state has changed, schedule phy info update */
5369             if (!test_bit(__IGC_DOWN, &adapter->state))
5370                 mod_timer(&adapter->phy_info_timer,
5371                       round_jiffies(jiffies + 2 * HZ));
5372         }
5373     } else {
5374         if (netif_carrier_ok(netdev)) {
5375             adapter->link_speed = 0;
5376             adapter->link_duplex = 0;
5377 
5378             /* Links status message must follow this format */
5379             netdev_info(netdev, "NIC Link is Down\n");
5380             netif_carrier_off(netdev);
5381 
5382             /* link state has changed, schedule phy info update */
5383             if (!test_bit(__IGC_DOWN, &adapter->state))
5384                 mod_timer(&adapter->phy_info_timer,
5385                       round_jiffies(jiffies + 2 * HZ));
5386 
5387             /* link is down, time to check for alternate media */
5388             if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
5389                 if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5390                     schedule_work(&adapter->reset_task);
5391                     /* return immediately */
5392                     return;
5393                 }
5394             }
5395             pm_schedule_suspend(netdev->dev.parent,
5396                         MSEC_PER_SEC * 5);
5397 
5398         /* also check for alternate media here */
5399         } else if (!netif_carrier_ok(netdev) &&
5400                (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
5401             if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5402                 schedule_work(&adapter->reset_task);
5403                 /* return immediately */
5404                 return;
5405             }
5406         }
5407     }
5408 
5409     spin_lock(&adapter->stats64_lock);
5410     igc_update_stats(adapter);
5411     spin_unlock(&adapter->stats64_lock);
5412 
5413     for (i = 0; i < adapter->num_tx_queues; i++) {
5414         struct igc_ring *tx_ring = adapter->tx_ring[i];
5415 
5416         if (!netif_carrier_ok(netdev)) {
5417             /* We've lost link, so the controller stops DMA,
5418              * but we've got queued Tx work that's never going
5419              * to get done, so reset controller to flush Tx.
5420              * (Do the reset outside of interrupt context).
5421              */
5422             if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5423                 adapter->tx_timeout_count++;
5424                 schedule_work(&adapter->reset_task);
5425                 /* return immediately since reset is imminent */
5426                 return;
5427             }
5428         }
5429 
5430         /* Force detection of hung controller every watchdog period */
5431         set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5432     }
5433 
5434     /* Cause software interrupt to ensure Rx ring is cleaned */
5435     if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5436         u32 eics = 0;
5437 
5438         for (i = 0; i < adapter->num_q_vectors; i++)
5439             eics |= adapter->q_vector[i]->eims_value;
5440         wr32(IGC_EICS, eics);
5441     } else {
5442         wr32(IGC_ICS, IGC_ICS_RXDMT0);
5443     }
5444 
5445     igc_ptp_tx_hang(adapter);
5446 
5447     /* Reset the timer */
5448     if (!test_bit(__IGC_DOWN, &adapter->state)) {
5449         if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5450             mod_timer(&adapter->watchdog_timer,
5451                   round_jiffies(jiffies +  HZ));
5452         else
5453             mod_timer(&adapter->watchdog_timer,
5454                   round_jiffies(jiffies + 2 * HZ));
5455     }
5456 }
5457 
5458 /**
5459  * igc_intr_msi - Interrupt Handler
5460  * @irq: interrupt number
5461  * @data: pointer to a network interface device structure
5462  */
5463 static irqreturn_t igc_intr_msi(int irq, void *data)
5464 {
5465     struct igc_adapter *adapter = data;
5466     struct igc_q_vector *q_vector = adapter->q_vector[0];
5467     struct igc_hw *hw = &adapter->hw;
5468     /* read ICR disables interrupts using IAM */
5469     u32 icr = rd32(IGC_ICR);
5470 
5471     igc_write_itr(q_vector);
5472 
5473     if (icr & IGC_ICR_DRSTA)
5474         schedule_work(&adapter->reset_task);
5475 
5476     if (icr & IGC_ICR_DOUTSYNC) {
5477         /* HW is reporting DMA is out of sync */
5478         adapter->stats.doosync++;
5479     }
5480 
5481     if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5482         hw->mac.get_link_status = true;
5483         if (!test_bit(__IGC_DOWN, &adapter->state))
5484             mod_timer(&adapter->watchdog_timer, jiffies + 1);
5485     }
5486 
5487     if (icr & IGC_ICR_TS)
5488         igc_tsync_interrupt(adapter);
5489 
5490     napi_schedule(&q_vector->napi);
5491 
5492     return IRQ_HANDLED;
5493 }
5494 
5495 /**
5496  * igc_intr - Legacy Interrupt Handler
5497  * @irq: interrupt number
5498  * @data: pointer to a network interface device structure
5499  */
5500 static irqreturn_t igc_intr(int irq, void *data)
5501 {
5502     struct igc_adapter *adapter = data;
5503     struct igc_q_vector *q_vector = adapter->q_vector[0];
5504     struct igc_hw *hw = &adapter->hw;
5505     /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5506      * need for the IMC write
5507      */
5508     u32 icr = rd32(IGC_ICR);
5509 
5510     /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5511      * not set, then the adapter didn't send an interrupt
5512      */
5513     if (!(icr & IGC_ICR_INT_ASSERTED))
5514         return IRQ_NONE;
5515 
5516     igc_write_itr(q_vector);
5517 
5518     if (icr & IGC_ICR_DRSTA)
5519         schedule_work(&adapter->reset_task);
5520 
5521     if (icr & IGC_ICR_DOUTSYNC) {
5522         /* HW is reporting DMA is out of sync */
5523         adapter->stats.doosync++;
5524     }
5525 
5526     if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5527         hw->mac.get_link_status = true;
5528         /* guard against interrupt when we're going down */
5529         if (!test_bit(__IGC_DOWN, &adapter->state))
5530             mod_timer(&adapter->watchdog_timer, jiffies + 1);
5531     }
5532 
5533     if (icr & IGC_ICR_TS)
5534         igc_tsync_interrupt(adapter);
5535 
5536     napi_schedule(&q_vector->napi);
5537 
5538     return IRQ_HANDLED;
5539 }
5540 
5541 static void igc_free_irq(struct igc_adapter *adapter)
5542 {
5543     if (adapter->msix_entries) {
5544         int vector = 0, i;
5545 
5546         free_irq(adapter->msix_entries[vector++].vector, adapter);
5547 
5548         for (i = 0; i < adapter->num_q_vectors; i++)
5549             free_irq(adapter->msix_entries[vector++].vector,
5550                  adapter->q_vector[i]);
5551     } else {
5552         free_irq(adapter->pdev->irq, adapter);
5553     }
5554 }
5555 
5556 /**
5557  * igc_request_irq - initialize interrupts
5558  * @adapter: Pointer to adapter structure
5559  *
5560  * Attempts to configure interrupts using the best available
5561  * capabilities of the hardware and kernel.
5562  */
5563 static int igc_request_irq(struct igc_adapter *adapter)
5564 {
5565     struct net_device *netdev = adapter->netdev;
5566     struct pci_dev *pdev = adapter->pdev;
5567     int err = 0;
5568 
5569     if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5570         err = igc_request_msix(adapter);
5571         if (!err)
5572             goto request_done;
5573         /* fall back to MSI */
5574         igc_free_all_tx_resources(adapter);
5575         igc_free_all_rx_resources(adapter);
5576 
5577         igc_clear_interrupt_scheme(adapter);
5578         err = igc_init_interrupt_scheme(adapter, false);
5579         if (err)
5580             goto request_done;
5581         igc_setup_all_tx_resources(adapter);
5582         igc_setup_all_rx_resources(adapter);
5583         igc_configure(adapter);
5584     }
5585 
5586     igc_assign_vector(adapter->q_vector[0], 0);
5587 
5588     if (adapter->flags & IGC_FLAG_HAS_MSI) {
5589         err = request_irq(pdev->irq, &igc_intr_msi, 0,
5590                   netdev->name, adapter);
5591         if (!err)
5592             goto request_done;
5593 
5594         /* fall back to legacy interrupts */
5595         igc_reset_interrupt_capability(adapter);
5596         adapter->flags &= ~IGC_FLAG_HAS_MSI;
5597     }
5598 
5599     err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5600               netdev->name, adapter);
5601 
5602     if (err)
5603         netdev_err(netdev, "Error %d getting interrupt\n", err);
5604 
5605 request_done:
5606     return err;
5607 }
5608 
5609 /**
5610  * __igc_open - Called when a network interface is made active
5611  * @netdev: network interface device structure
5612  * @resuming: boolean indicating if the device is resuming
5613  *
5614  * Returns 0 on success, negative value on failure
5615  *
5616  * The open entry point is called when a network interface is made
5617  * active by the system (IFF_UP).  At this point all resources needed
5618  * for transmit and receive operations are allocated, the interrupt
5619  * handler is registered with the OS, the watchdog timer is started,
5620  * and the stack is notified that the interface is ready.
5621  */
5622 static int __igc_open(struct net_device *netdev, bool resuming)
5623 {
5624     struct igc_adapter *adapter = netdev_priv(netdev);
5625     struct pci_dev *pdev = adapter->pdev;
5626     struct igc_hw *hw = &adapter->hw;
5627     int err = 0;
5628     int i = 0;
5629 
5630     /* disallow open during test */
5631 
5632     if (test_bit(__IGC_TESTING, &adapter->state)) {
5633         WARN_ON(resuming);
5634         return -EBUSY;
5635     }
5636 
5637     if (!resuming)
5638         pm_runtime_get_sync(&pdev->dev);
5639 
5640     netif_carrier_off(netdev);
5641 
5642     /* allocate transmit descriptors */
5643     err = igc_setup_all_tx_resources(adapter);
5644     if (err)
5645         goto err_setup_tx;
5646 
5647     /* allocate receive descriptors */
5648     err = igc_setup_all_rx_resources(adapter);
5649     if (err)
5650         goto err_setup_rx;
5651 
5652     igc_power_up_link(adapter);
5653 
5654     igc_configure(adapter);
5655 
5656     err = igc_request_irq(adapter);
5657     if (err)
5658         goto err_req_irq;
5659 
5660     /* Notify the stack of the actual queue counts. */
5661     err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5662     if (err)
5663         goto err_set_queues;
5664 
5665     err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5666     if (err)
5667         goto err_set_queues;
5668 
5669     clear_bit(__IGC_DOWN, &adapter->state);
5670 
5671     for (i = 0; i < adapter->num_q_vectors; i++)
5672         napi_enable(&adapter->q_vector[i]->napi);
5673 
5674     /* Clear any pending interrupts. */
5675     rd32(IGC_ICR);
5676     igc_irq_enable(adapter);
5677 
5678     if (!resuming)
5679         pm_runtime_put(&pdev->dev);
5680 
5681     netif_tx_start_all_queues(netdev);
5682 
5683     /* start the watchdog. */
5684     hw->mac.get_link_status = true;
5685     schedule_work(&adapter->watchdog_task);
5686 
5687     return IGC_SUCCESS;
5688 
5689 err_set_queues:
5690     igc_free_irq(adapter);
5691 err_req_irq:
5692     igc_release_hw_control(adapter);
5693     igc_power_down_phy_copper_base(&adapter->hw);
5694     igc_free_all_rx_resources(adapter);
5695 err_setup_rx:
5696     igc_free_all_tx_resources(adapter);
5697 err_setup_tx:
5698     igc_reset(adapter);
5699     if (!resuming)
5700         pm_runtime_put(&pdev->dev);
5701 
5702     return err;
5703 }
5704 
5705 int igc_open(struct net_device *netdev)
5706 {
5707     return __igc_open(netdev, false);
5708 }
5709 
5710 /**
5711  * __igc_close - Disables a network interface
5712  * @netdev: network interface device structure
5713  * @suspending: boolean indicating the device is suspending
5714  *
5715  * Returns 0, this is not allowed to fail
5716  *
5717  * The close entry point is called when an interface is de-activated
5718  * by the OS.  The hardware is still under the driver's control, but
5719  * needs to be disabled.  A global MAC reset is issued to stop the
5720  * hardware, and all transmit and receive resources are freed.
5721  */
5722 static int __igc_close(struct net_device *netdev, bool suspending)
5723 {
5724     struct igc_adapter *adapter = netdev_priv(netdev);
5725     struct pci_dev *pdev = adapter->pdev;
5726 
5727     WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5728 
5729     if (!suspending)
5730         pm_runtime_get_sync(&pdev->dev);
5731 
5732     igc_down(adapter);
5733 
5734     igc_release_hw_control(adapter);
5735 
5736     igc_free_irq(adapter);
5737 
5738     igc_free_all_tx_resources(adapter);
5739     igc_free_all_rx_resources(adapter);
5740 
5741     if (!suspending)
5742         pm_runtime_put_sync(&pdev->dev);
5743 
5744     return 0;
5745 }
5746 
5747 int igc_close(struct net_device *netdev)
5748 {
5749     if (netif_device_present(netdev) || netdev->dismantle)
5750         return __igc_close(netdev, false);
5751     return 0;
5752 }
5753 
5754 /**
5755  * igc_ioctl - Access the hwtstamp interface
5756  * @netdev: network interface device structure
5757  * @ifr: interface request data
5758  * @cmd: ioctl command
5759  **/
5760 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5761 {
5762     switch (cmd) {
5763     case SIOCGHWTSTAMP:
5764         return igc_ptp_get_ts_config(netdev, ifr);
5765     case SIOCSHWTSTAMP:
5766         return igc_ptp_set_ts_config(netdev, ifr);
5767     default:
5768         return -EOPNOTSUPP;
5769     }
5770 }
5771 
5772 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
5773                       bool enable)
5774 {
5775     struct igc_ring *ring;
5776 
5777     if (queue < 0 || queue >= adapter->num_tx_queues)
5778         return -EINVAL;
5779 
5780     ring = adapter->tx_ring[queue];
5781     ring->launchtime_enable = enable;
5782 
5783     return 0;
5784 }
5785 
5786 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
5787 {
5788     struct timespec64 b;
5789 
5790     b = ktime_to_timespec64(base_time);
5791 
5792     return timespec64_compare(now, &b) > 0;
5793 }
5794 
5795 static bool validate_schedule(struct igc_adapter *adapter,
5796                   const struct tc_taprio_qopt_offload *qopt)
5797 {
5798     int queue_uses[IGC_MAX_TX_QUEUES] = { };
5799     struct timespec64 now;
5800     size_t n;
5801 
5802     if (qopt->cycle_time_extension)
5803         return false;
5804 
5805     igc_ptp_read(adapter, &now);
5806 
5807     /* If we program the controller's BASET registers with a time
5808      * in the future, it will hold all the packets until that
5809      * time, causing a lot of TX Hangs, so to avoid that, we
5810      * reject schedules that would start in the future.
5811      */
5812     if (!is_base_time_past(qopt->base_time, &now))
5813         return false;
5814 
5815     for (n = 0; n < qopt->num_entries; n++) {
5816         const struct tc_taprio_sched_entry *e, *prev;
5817         int i;
5818 
5819         prev = n ? &qopt->entries[n - 1] : NULL;
5820         e = &qopt->entries[n];
5821 
5822         /* i225 only supports "global" frame preemption
5823          * settings.
5824          */
5825         if (e->command != TC_TAPRIO_CMD_SET_GATES)
5826             return false;
5827 
5828         for (i = 0; i < adapter->num_tx_queues; i++) {
5829             if (e->gate_mask & BIT(i))
5830                 queue_uses[i]++;
5831 
5832             /* There are limitations: A single queue cannot be
5833              * opened and closed multiple times per cycle unless the
5834              * gate stays open. Check for it.
5835              */
5836             if (queue_uses[i] > 1 &&
5837                 !(prev->gate_mask & BIT(i)))
5838                 return false;
5839         }
5840     }
5841 
5842     return true;
5843 }
5844 
5845 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
5846                      struct tc_etf_qopt_offload *qopt)
5847 {
5848     struct igc_hw *hw = &adapter->hw;
5849     int err;
5850 
5851     if (hw->mac.type != igc_i225)
5852         return -EOPNOTSUPP;
5853 
5854     err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
5855     if (err)
5856         return err;
5857 
5858     return igc_tsn_offload_apply(adapter);
5859 }
5860 
5861 static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
5862 {
5863     int i;
5864 
5865     adapter->base_time = 0;
5866     adapter->cycle_time = NSEC_PER_SEC;
5867 
5868     for (i = 0; i < adapter->num_tx_queues; i++) {
5869         struct igc_ring *ring = adapter->tx_ring[i];
5870 
5871         ring->start_time = 0;
5872         ring->end_time = NSEC_PER_SEC;
5873     }
5874 
5875     return 0;
5876 }
5877 
5878 static int igc_save_qbv_schedule(struct igc_adapter *adapter,
5879                  struct tc_taprio_qopt_offload *qopt)
5880 {
5881     bool queue_configured[IGC_MAX_TX_QUEUES] = { };
5882     u32 start_time = 0, end_time = 0;
5883     size_t n;
5884 
5885     if (!qopt->enable)
5886         return igc_tsn_clear_schedule(adapter);
5887 
5888     if (adapter->base_time)
5889         return -EALREADY;
5890 
5891     if (!validate_schedule(adapter, qopt))
5892         return -EINVAL;
5893 
5894     adapter->cycle_time = qopt->cycle_time;
5895     adapter->base_time = qopt->base_time;
5896 
5897     for (n = 0; n < qopt->num_entries; n++) {
5898         struct tc_taprio_sched_entry *e = &qopt->entries[n];
5899         int i;
5900 
5901         end_time += e->interval;
5902 
5903         for (i = 0; i < adapter->num_tx_queues; i++) {
5904             struct igc_ring *ring = adapter->tx_ring[i];
5905 
5906             if (!(e->gate_mask & BIT(i)))
5907                 continue;
5908 
5909             /* Check whether a queue stays open for more than one
5910              * entry. If so, keep the start and advance the end
5911              * time.
5912              */
5913             if (!queue_configured[i])
5914                 ring->start_time = start_time;
5915             ring->end_time = end_time;
5916 
5917             queue_configured[i] = true;
5918         }
5919 
5920         start_time += e->interval;
5921     }
5922 
5923     return 0;
5924 }
5925 
5926 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
5927                      struct tc_taprio_qopt_offload *qopt)
5928 {
5929     struct igc_hw *hw = &adapter->hw;
5930     int err;
5931 
5932     if (hw->mac.type != igc_i225)
5933         return -EOPNOTSUPP;
5934 
5935     err = igc_save_qbv_schedule(adapter, qopt);
5936     if (err)
5937         return err;
5938 
5939     return igc_tsn_offload_apply(adapter);
5940 }
5941 
5942 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
5943                    bool enable, int idleslope, int sendslope,
5944                    int hicredit, int locredit)
5945 {
5946     bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
5947     struct net_device *netdev = adapter->netdev;
5948     struct igc_ring *ring;
5949     int i;
5950 
5951     /* i225 has two sets of credit-based shaper logic.
5952      * Supporting it only on the top two priority queues
5953      */
5954     if (queue < 0 || queue > 1)
5955         return -EINVAL;
5956 
5957     ring = adapter->tx_ring[queue];
5958 
5959     for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
5960         if (adapter->tx_ring[i])
5961             cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
5962 
5963     /* CBS should be enabled on the highest priority queue first in order
5964      * for the CBS algorithm to operate as intended.
5965      */
5966     if (enable) {
5967         if (queue == 1 && !cbs_status[0]) {
5968             netdev_err(netdev,
5969                    "Enabling CBS on queue1 before queue0\n");
5970             return -EINVAL;
5971         }
5972     } else {
5973         if (queue == 0 && cbs_status[1]) {
5974             netdev_err(netdev,
5975                    "Disabling CBS on queue0 before queue1\n");
5976             return -EINVAL;
5977         }
5978     }
5979 
5980     ring->cbs_enable = enable;
5981     ring->idleslope = idleslope;
5982     ring->sendslope = sendslope;
5983     ring->hicredit = hicredit;
5984     ring->locredit = locredit;
5985 
5986     return 0;
5987 }
5988 
5989 static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
5990                   struct tc_cbs_qopt_offload *qopt)
5991 {
5992     struct igc_hw *hw = &adapter->hw;
5993     int err;
5994 
5995     if (hw->mac.type != igc_i225)
5996         return -EOPNOTSUPP;
5997 
5998     if (qopt->queue < 0 || qopt->queue > 1)
5999         return -EINVAL;
6000 
6001     err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
6002                   qopt->idleslope, qopt->sendslope,
6003                   qopt->hicredit, qopt->locredit);
6004     if (err)
6005         return err;
6006 
6007     return igc_tsn_offload_apply(adapter);
6008 }
6009 
6010 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
6011             void *type_data)
6012 {
6013     struct igc_adapter *adapter = netdev_priv(dev);
6014 
6015     switch (type) {
6016     case TC_SETUP_QDISC_TAPRIO:
6017         return igc_tsn_enable_qbv_scheduling(adapter, type_data);
6018 
6019     case TC_SETUP_QDISC_ETF:
6020         return igc_tsn_enable_launchtime(adapter, type_data);
6021 
6022     case TC_SETUP_QDISC_CBS:
6023         return igc_tsn_enable_cbs(adapter, type_data);
6024 
6025     default:
6026         return -EOPNOTSUPP;
6027     }
6028 }
6029 
6030 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
6031 {
6032     struct igc_adapter *adapter = netdev_priv(dev);
6033 
6034     switch (bpf->command) {
6035     case XDP_SETUP_PROG:
6036         return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
6037     case XDP_SETUP_XSK_POOL:
6038         return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
6039                       bpf->xsk.queue_id);
6040     default:
6041         return -EOPNOTSUPP;
6042     }
6043 }
6044 
6045 static int igc_xdp_xmit(struct net_device *dev, int num_frames,
6046             struct xdp_frame **frames, u32 flags)
6047 {
6048     struct igc_adapter *adapter = netdev_priv(dev);
6049     int cpu = smp_processor_id();
6050     struct netdev_queue *nq;
6051     struct igc_ring *ring;
6052     int i, drops;
6053 
6054     if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
6055         return -ENETDOWN;
6056 
6057     if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
6058         return -EINVAL;
6059 
6060     ring = igc_xdp_get_tx_ring(adapter, cpu);
6061     nq = txring_txq(ring);
6062 
6063     __netif_tx_lock(nq, cpu);
6064 
6065     drops = 0;
6066     for (i = 0; i < num_frames; i++) {
6067         int err;
6068         struct xdp_frame *xdpf = frames[i];
6069 
6070         err = igc_xdp_init_tx_descriptor(ring, xdpf);
6071         if (err) {
6072             xdp_return_frame_rx_napi(xdpf);
6073             drops++;
6074         }
6075     }
6076 
6077     if (flags & XDP_XMIT_FLUSH)
6078         igc_flush_tx_descriptors(ring);
6079 
6080     __netif_tx_unlock(nq);
6081 
6082     return num_frames - drops;
6083 }
6084 
6085 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
6086                     struct igc_q_vector *q_vector)
6087 {
6088     struct igc_hw *hw = &adapter->hw;
6089     u32 eics = 0;
6090 
6091     eics |= q_vector->eims_value;
6092     wr32(IGC_EICS, eics);
6093 }
6094 
6095 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
6096 {
6097     struct igc_adapter *adapter = netdev_priv(dev);
6098     struct igc_q_vector *q_vector;
6099     struct igc_ring *ring;
6100 
6101     if (test_bit(__IGC_DOWN, &adapter->state))
6102         return -ENETDOWN;
6103 
6104     if (!igc_xdp_is_enabled(adapter))
6105         return -ENXIO;
6106 
6107     if (queue_id >= adapter->num_rx_queues)
6108         return -EINVAL;
6109 
6110     ring = adapter->rx_ring[queue_id];
6111 
6112     if (!ring->xsk_pool)
6113         return -ENXIO;
6114 
6115     q_vector = adapter->q_vector[queue_id];
6116     if (!napi_if_scheduled_mark_missed(&q_vector->napi))
6117         igc_trigger_rxtxq_interrupt(adapter, q_vector);
6118 
6119     return 0;
6120 }
6121 
6122 static const struct net_device_ops igc_netdev_ops = {
6123     .ndo_open       = igc_open,
6124     .ndo_stop       = igc_close,
6125     .ndo_start_xmit     = igc_xmit_frame,
6126     .ndo_set_rx_mode    = igc_set_rx_mode,
6127     .ndo_set_mac_address    = igc_set_mac,
6128     .ndo_change_mtu     = igc_change_mtu,
6129     .ndo_get_stats64    = igc_get_stats64,
6130     .ndo_fix_features   = igc_fix_features,
6131     .ndo_set_features   = igc_set_features,
6132     .ndo_features_check = igc_features_check,
6133     .ndo_eth_ioctl      = igc_ioctl,
6134     .ndo_setup_tc       = igc_setup_tc,
6135     .ndo_bpf        = igc_bpf,
6136     .ndo_xdp_xmit       = igc_xdp_xmit,
6137     .ndo_xsk_wakeup     = igc_xsk_wakeup,
6138 };
6139 
6140 /* PCIe configuration access */
6141 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6142 {
6143     struct igc_adapter *adapter = hw->back;
6144 
6145     pci_read_config_word(adapter->pdev, reg, value);
6146 }
6147 
6148 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6149 {
6150     struct igc_adapter *adapter = hw->back;
6151 
6152     pci_write_config_word(adapter->pdev, reg, *value);
6153 }
6154 
6155 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6156 {
6157     struct igc_adapter *adapter = hw->back;
6158 
6159     if (!pci_is_pcie(adapter->pdev))
6160         return -IGC_ERR_CONFIG;
6161 
6162     pcie_capability_read_word(adapter->pdev, reg, value);
6163 
6164     return IGC_SUCCESS;
6165 }
6166 
6167 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6168 {
6169     struct igc_adapter *adapter = hw->back;
6170 
6171     if (!pci_is_pcie(adapter->pdev))
6172         return -IGC_ERR_CONFIG;
6173 
6174     pcie_capability_write_word(adapter->pdev, reg, *value);
6175 
6176     return IGC_SUCCESS;
6177 }
6178 
6179 u32 igc_rd32(struct igc_hw *hw, u32 reg)
6180 {
6181     struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
6182     u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
6183     u32 value = 0;
6184 
6185     if (IGC_REMOVED(hw_addr))
6186         return ~value;
6187 
6188     value = readl(&hw_addr[reg]);
6189 
6190     /* reads should not return all F's */
6191     if (!(~value) && (!reg || !(~readl(hw_addr)))) {
6192         struct net_device *netdev = igc->netdev;
6193 
6194         hw->hw_addr = NULL;
6195         netif_device_detach(netdev);
6196         netdev_err(netdev, "PCIe link lost, device now detached\n");
6197         WARN(pci_device_is_present(igc->pdev),
6198              "igc: Failed to read reg 0x%x!\n", reg);
6199     }
6200 
6201     return value;
6202 }
6203 
6204 /**
6205  * igc_probe - Device Initialization Routine
6206  * @pdev: PCI device information struct
6207  * @ent: entry in igc_pci_tbl
6208  *
6209  * Returns 0 on success, negative on failure
6210  *
6211  * igc_probe initializes an adapter identified by a pci_dev structure.
6212  * The OS initialization, configuring the adapter private structure,
6213  * and a hardware reset occur.
6214  */
6215 static int igc_probe(struct pci_dev *pdev,
6216              const struct pci_device_id *ent)
6217 {
6218     struct igc_adapter *adapter;
6219     struct net_device *netdev;
6220     struct igc_hw *hw;
6221     const struct igc_info *ei = igc_info_tbl[ent->driver_data];
6222     int err;
6223 
6224     err = pci_enable_device_mem(pdev);
6225     if (err)
6226         return err;
6227 
6228     err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
6229     if (err) {
6230         dev_err(&pdev->dev,
6231             "No usable DMA configuration, aborting\n");
6232         goto err_dma;
6233     }
6234 
6235     err = pci_request_mem_regions(pdev, igc_driver_name);
6236     if (err)
6237         goto err_pci_reg;
6238 
6239     pci_enable_pcie_error_reporting(pdev);
6240 
6241     err = pci_enable_ptm(pdev, NULL);
6242     if (err < 0)
6243         dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
6244 
6245     pci_set_master(pdev);
6246 
6247     err = -ENOMEM;
6248     netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
6249                    IGC_MAX_TX_QUEUES);
6250 
6251     if (!netdev)
6252         goto err_alloc_etherdev;
6253 
6254     SET_NETDEV_DEV(netdev, &pdev->dev);
6255 
6256     pci_set_drvdata(pdev, netdev);
6257     adapter = netdev_priv(netdev);
6258     adapter->netdev = netdev;
6259     adapter->pdev = pdev;
6260     hw = &adapter->hw;
6261     hw->back = adapter;
6262     adapter->port_num = hw->bus.func;
6263     adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
6264 
6265     err = pci_save_state(pdev);
6266     if (err)
6267         goto err_ioremap;
6268 
6269     err = -EIO;
6270     adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
6271                    pci_resource_len(pdev, 0));
6272     if (!adapter->io_addr)
6273         goto err_ioremap;
6274 
6275     /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
6276     hw->hw_addr = adapter->io_addr;
6277 
6278     netdev->netdev_ops = &igc_netdev_ops;
6279     igc_ethtool_set_ops(netdev);
6280     netdev->watchdog_timeo = 5 * HZ;
6281 
6282     netdev->mem_start = pci_resource_start(pdev, 0);
6283     netdev->mem_end = pci_resource_end(pdev, 0);
6284 
6285     /* PCI config space info */
6286     hw->vendor_id = pdev->vendor;
6287     hw->device_id = pdev->device;
6288     hw->revision_id = pdev->revision;
6289     hw->subsystem_vendor_id = pdev->subsystem_vendor;
6290     hw->subsystem_device_id = pdev->subsystem_device;
6291 
6292     /* Copy the default MAC and PHY function pointers */
6293     memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
6294     memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
6295 
6296     /* Initialize skew-specific constants */
6297     err = ei->get_invariants(hw);
6298     if (err)
6299         goto err_sw_init;
6300 
6301     /* Add supported features to the features list*/
6302     netdev->features |= NETIF_F_SG;
6303     netdev->features |= NETIF_F_TSO;
6304     netdev->features |= NETIF_F_TSO6;
6305     netdev->features |= NETIF_F_TSO_ECN;
6306     netdev->features |= NETIF_F_RXCSUM;
6307     netdev->features |= NETIF_F_HW_CSUM;
6308     netdev->features |= NETIF_F_SCTP_CRC;
6309     netdev->features |= NETIF_F_HW_TC;
6310 
6311 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
6312                   NETIF_F_GSO_GRE_CSUM | \
6313                   NETIF_F_GSO_IPXIP4 | \
6314                   NETIF_F_GSO_IPXIP6 | \
6315                   NETIF_F_GSO_UDP_TUNNEL | \
6316                   NETIF_F_GSO_UDP_TUNNEL_CSUM)
6317 
6318     netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
6319     netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
6320 
6321     /* setup the private structure */
6322     err = igc_sw_init(adapter);
6323     if (err)
6324         goto err_sw_init;
6325 
6326     /* copy netdev features into list of user selectable features */
6327     netdev->hw_features |= NETIF_F_NTUPLE;
6328     netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
6329     netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
6330     netdev->hw_features |= netdev->features;
6331 
6332     netdev->features |= NETIF_F_HIGHDMA;
6333 
6334     netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
6335     netdev->mpls_features |= NETIF_F_HW_CSUM;
6336     netdev->hw_enc_features |= netdev->vlan_features;
6337 
6338     /* MTU range: 68 - 9216 */
6339     netdev->min_mtu = ETH_MIN_MTU;
6340     netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
6341 
6342     /* before reading the NVM, reset the controller to put the device in a
6343      * known good starting state
6344      */
6345     hw->mac.ops.reset_hw(hw);
6346 
6347     if (igc_get_flash_presence_i225(hw)) {
6348         if (hw->nvm.ops.validate(hw) < 0) {
6349             dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
6350             err = -EIO;
6351             goto err_eeprom;
6352         }
6353     }
6354 
6355     if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
6356         /* copy the MAC address out of the NVM */
6357         if (hw->mac.ops.read_mac_addr(hw))
6358             dev_err(&pdev->dev, "NVM Read Error\n");
6359     }
6360 
6361     eth_hw_addr_set(netdev, hw->mac.addr);
6362 
6363     if (!is_valid_ether_addr(netdev->dev_addr)) {
6364         dev_err(&pdev->dev, "Invalid MAC Address\n");
6365         err = -EIO;
6366         goto err_eeprom;
6367     }
6368 
6369     /* configure RXPBSIZE and TXPBSIZE */
6370     wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
6371     wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
6372 
6373     timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
6374     timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
6375 
6376     INIT_WORK(&adapter->reset_task, igc_reset_task);
6377     INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
6378 
6379     /* Initialize link properties that are user-changeable */
6380     adapter->fc_autoneg = true;
6381     hw->mac.autoneg = true;
6382     hw->phy.autoneg_advertised = 0xaf;
6383 
6384     hw->fc.requested_mode = igc_fc_default;
6385     hw->fc.current_mode = igc_fc_default;
6386 
6387     /* By default, support wake on port A */
6388     adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6389 
6390     /* initialize the wol settings based on the eeprom settings */
6391     if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6392         adapter->wol |= IGC_WUFC_MAG;
6393 
6394     device_set_wakeup_enable(&adapter->pdev->dev,
6395                  adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6396 
6397     igc_ptp_init(adapter);
6398 
6399     igc_tsn_clear_schedule(adapter);
6400 
6401     /* reset the hardware with the new settings */
6402     igc_reset(adapter);
6403 
6404     /* let the f/w know that the h/w is now under the control of the
6405      * driver.
6406      */
6407     igc_get_hw_control(adapter);
6408 
6409     strncpy(netdev->name, "eth%d", IFNAMSIZ);
6410     err = register_netdev(netdev);
6411     if (err)
6412         goto err_register;
6413 
6414      /* carrier off reporting is important to ethtool even BEFORE open */
6415     netif_carrier_off(netdev);
6416 
6417     /* Check if Media Autosense is enabled */
6418     adapter->ei = *ei;
6419 
6420     /* print pcie link status and MAC address */
6421     pcie_print_link_status(pdev);
6422     netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6423 
6424     dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6425     /* Disable EEE for internal PHY devices */
6426     hw->dev_spec._base.eee_enable = false;
6427     adapter->flags &= ~IGC_FLAG_EEE;
6428     igc_set_eee_i225(hw, false, false, false);
6429 
6430     pm_runtime_put_noidle(&pdev->dev);
6431 
6432     return 0;
6433 
6434 err_register:
6435     igc_release_hw_control(adapter);
6436 err_eeprom:
6437     if (!igc_check_reset_block(hw))
6438         igc_reset_phy(hw);
6439 err_sw_init:
6440     igc_clear_interrupt_scheme(adapter);
6441     iounmap(adapter->io_addr);
6442 err_ioremap:
6443     free_netdev(netdev);
6444 err_alloc_etherdev:
6445     pci_disable_pcie_error_reporting(pdev);
6446     pci_release_mem_regions(pdev);
6447 err_pci_reg:
6448 err_dma:
6449     pci_disable_device(pdev);
6450     return err;
6451 }
6452 
6453 /**
6454  * igc_remove - Device Removal Routine
6455  * @pdev: PCI device information struct
6456  *
6457  * igc_remove is called by the PCI subsystem to alert the driver
6458  * that it should release a PCI device.  This could be caused by a
6459  * Hot-Plug event, or because the driver is going to be removed from
6460  * memory.
6461  */
6462 static void igc_remove(struct pci_dev *pdev)
6463 {
6464     struct net_device *netdev = pci_get_drvdata(pdev);
6465     struct igc_adapter *adapter = netdev_priv(netdev);
6466 
6467     pm_runtime_get_noresume(&pdev->dev);
6468 
6469     igc_flush_nfc_rules(adapter);
6470 
6471     igc_ptp_stop(adapter);
6472 
6473     set_bit(__IGC_DOWN, &adapter->state);
6474 
6475     del_timer_sync(&adapter->watchdog_timer);
6476     del_timer_sync(&adapter->phy_info_timer);
6477 
6478     cancel_work_sync(&adapter->reset_task);
6479     cancel_work_sync(&adapter->watchdog_task);
6480 
6481     /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6482      * would have already happened in close and is redundant.
6483      */
6484     igc_release_hw_control(adapter);
6485     unregister_netdev(netdev);
6486 
6487     igc_clear_interrupt_scheme(adapter);
6488     pci_iounmap(pdev, adapter->io_addr);
6489     pci_release_mem_regions(pdev);
6490 
6491     free_netdev(netdev);
6492 
6493     pci_disable_pcie_error_reporting(pdev);
6494 
6495     pci_disable_device(pdev);
6496 }
6497 
6498 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6499               bool runtime)
6500 {
6501     struct net_device *netdev = pci_get_drvdata(pdev);
6502     struct igc_adapter *adapter = netdev_priv(netdev);
6503     u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6504     struct igc_hw *hw = &adapter->hw;
6505     u32 ctrl, rctl, status;
6506     bool wake;
6507 
6508     rtnl_lock();
6509     netif_device_detach(netdev);
6510 
6511     if (netif_running(netdev))
6512         __igc_close(netdev, true);
6513 
6514     igc_ptp_suspend(adapter);
6515 
6516     igc_clear_interrupt_scheme(adapter);
6517     rtnl_unlock();
6518 
6519     status = rd32(IGC_STATUS);
6520     if (status & IGC_STATUS_LU)
6521         wufc &= ~IGC_WUFC_LNKC;
6522 
6523     if (wufc) {
6524         igc_setup_rctl(adapter);
6525         igc_set_rx_mode(netdev);
6526 
6527         /* turn on all-multi mode if wake on multicast is enabled */
6528         if (wufc & IGC_WUFC_MC) {
6529             rctl = rd32(IGC_RCTL);
6530             rctl |= IGC_RCTL_MPE;
6531             wr32(IGC_RCTL, rctl);
6532         }
6533 
6534         ctrl = rd32(IGC_CTRL);
6535         ctrl |= IGC_CTRL_ADVD3WUC;
6536         wr32(IGC_CTRL, ctrl);
6537 
6538         /* Allow time for pending master requests to run */
6539         igc_disable_pcie_master(hw);
6540 
6541         wr32(IGC_WUC, IGC_WUC_PME_EN);
6542         wr32(IGC_WUFC, wufc);
6543     } else {
6544         wr32(IGC_WUC, 0);
6545         wr32(IGC_WUFC, 0);
6546     }
6547 
6548     wake = wufc || adapter->en_mng_pt;
6549     if (!wake)
6550         igc_power_down_phy_copper_base(&adapter->hw);
6551     else
6552         igc_power_up_link(adapter);
6553 
6554     if (enable_wake)
6555         *enable_wake = wake;
6556 
6557     /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6558      * would have already happened in close and is redundant.
6559      */
6560     igc_release_hw_control(adapter);
6561 
6562     pci_disable_device(pdev);
6563 
6564     return 0;
6565 }
6566 
6567 #ifdef CONFIG_PM
6568 static int __maybe_unused igc_runtime_suspend(struct device *dev)
6569 {
6570     return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6571 }
6572 
6573 static void igc_deliver_wake_packet(struct net_device *netdev)
6574 {
6575     struct igc_adapter *adapter = netdev_priv(netdev);
6576     struct igc_hw *hw = &adapter->hw;
6577     struct sk_buff *skb;
6578     u32 wupl;
6579 
6580     wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6581 
6582     /* WUPM stores only the first 128 bytes of the wake packet.
6583      * Read the packet only if we have the whole thing.
6584      */
6585     if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6586         return;
6587 
6588     skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6589     if (!skb)
6590         return;
6591 
6592     skb_put(skb, wupl);
6593 
6594     /* Ensure reads are 32-bit aligned */
6595     wupl = roundup(wupl, 4);
6596 
6597     memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6598 
6599     skb->protocol = eth_type_trans(skb, netdev);
6600     netif_rx(skb);
6601 }
6602 
6603 static int __maybe_unused igc_resume(struct device *dev)
6604 {
6605     struct pci_dev *pdev = to_pci_dev(dev);
6606     struct net_device *netdev = pci_get_drvdata(pdev);
6607     struct igc_adapter *adapter = netdev_priv(netdev);
6608     struct igc_hw *hw = &adapter->hw;
6609     u32 err, val;
6610 
6611     pci_set_power_state(pdev, PCI_D0);
6612     pci_restore_state(pdev);
6613     pci_save_state(pdev);
6614 
6615     if (!pci_device_is_present(pdev))
6616         return -ENODEV;
6617     err = pci_enable_device_mem(pdev);
6618     if (err) {
6619         netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6620         return err;
6621     }
6622     pci_set_master(pdev);
6623 
6624     pci_enable_wake(pdev, PCI_D3hot, 0);
6625     pci_enable_wake(pdev, PCI_D3cold, 0);
6626 
6627     if (igc_init_interrupt_scheme(adapter, true)) {
6628         netdev_err(netdev, "Unable to allocate memory for queues\n");
6629         return -ENOMEM;
6630     }
6631 
6632     igc_reset(adapter);
6633 
6634     /* let the f/w know that the h/w is now under the control of the
6635      * driver.
6636      */
6637     igc_get_hw_control(adapter);
6638 
6639     val = rd32(IGC_WUS);
6640     if (val & WAKE_PKT_WUS)
6641         igc_deliver_wake_packet(netdev);
6642 
6643     wr32(IGC_WUS, ~0);
6644 
6645     rtnl_lock();
6646     if (!err && netif_running(netdev))
6647         err = __igc_open(netdev, true);
6648 
6649     if (!err)
6650         netif_device_attach(netdev);
6651     rtnl_unlock();
6652 
6653     return err;
6654 }
6655 
6656 static int __maybe_unused igc_runtime_resume(struct device *dev)
6657 {
6658     return igc_resume(dev);
6659 }
6660 
6661 static int __maybe_unused igc_suspend(struct device *dev)
6662 {
6663     return __igc_shutdown(to_pci_dev(dev), NULL, 0);
6664 }
6665 
6666 static int __maybe_unused igc_runtime_idle(struct device *dev)
6667 {
6668     struct net_device *netdev = dev_get_drvdata(dev);
6669     struct igc_adapter *adapter = netdev_priv(netdev);
6670 
6671     if (!igc_has_link(adapter))
6672         pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6673 
6674     return -EBUSY;
6675 }
6676 #endif /* CONFIG_PM */
6677 
6678 static void igc_shutdown(struct pci_dev *pdev)
6679 {
6680     bool wake;
6681 
6682     __igc_shutdown(pdev, &wake, 0);
6683 
6684     if (system_state == SYSTEM_POWER_OFF) {
6685         pci_wake_from_d3(pdev, wake);
6686         pci_set_power_state(pdev, PCI_D3hot);
6687     }
6688 }
6689 
6690 /**
6691  *  igc_io_error_detected - called when PCI error is detected
6692  *  @pdev: Pointer to PCI device
6693  *  @state: The current PCI connection state
6694  *
6695  *  This function is called after a PCI bus error affecting
6696  *  this device has been detected.
6697  **/
6698 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
6699                           pci_channel_state_t state)
6700 {
6701     struct net_device *netdev = pci_get_drvdata(pdev);
6702     struct igc_adapter *adapter = netdev_priv(netdev);
6703 
6704     netif_device_detach(netdev);
6705 
6706     if (state == pci_channel_io_perm_failure)
6707         return PCI_ERS_RESULT_DISCONNECT;
6708 
6709     if (netif_running(netdev))
6710         igc_down(adapter);
6711     pci_disable_device(pdev);
6712 
6713     /* Request a slot reset. */
6714     return PCI_ERS_RESULT_NEED_RESET;
6715 }
6716 
6717 /**
6718  *  igc_io_slot_reset - called after the PCI bus has been reset.
6719  *  @pdev: Pointer to PCI device
6720  *
6721  *  Restart the card from scratch, as if from a cold-boot. Implementation
6722  *  resembles the first-half of the igc_resume routine.
6723  **/
6724 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
6725 {
6726     struct net_device *netdev = pci_get_drvdata(pdev);
6727     struct igc_adapter *adapter = netdev_priv(netdev);
6728     struct igc_hw *hw = &adapter->hw;
6729     pci_ers_result_t result;
6730 
6731     if (pci_enable_device_mem(pdev)) {
6732         netdev_err(netdev, "Could not re-enable PCI device after reset\n");
6733         result = PCI_ERS_RESULT_DISCONNECT;
6734     } else {
6735         pci_set_master(pdev);
6736         pci_restore_state(pdev);
6737         pci_save_state(pdev);
6738 
6739         pci_enable_wake(pdev, PCI_D3hot, 0);
6740         pci_enable_wake(pdev, PCI_D3cold, 0);
6741 
6742         /* In case of PCI error, adapter loses its HW address
6743          * so we should re-assign it here.
6744          */
6745         hw->hw_addr = adapter->io_addr;
6746 
6747         igc_reset(adapter);
6748         wr32(IGC_WUS, ~0);
6749         result = PCI_ERS_RESULT_RECOVERED;
6750     }
6751 
6752     return result;
6753 }
6754 
6755 /**
6756  *  igc_io_resume - called when traffic can start to flow again.
6757  *  @pdev: Pointer to PCI device
6758  *
6759  *  This callback is called when the error recovery driver tells us that
6760  *  its OK to resume normal operation. Implementation resembles the
6761  *  second-half of the igc_resume routine.
6762  */
6763 static void igc_io_resume(struct pci_dev *pdev)
6764 {
6765     struct net_device *netdev = pci_get_drvdata(pdev);
6766     struct igc_adapter *adapter = netdev_priv(netdev);
6767 
6768     rtnl_lock();
6769     if (netif_running(netdev)) {
6770         if (igc_open(netdev)) {
6771             netdev_err(netdev, "igc_open failed after reset\n");
6772             return;
6773         }
6774     }
6775 
6776     netif_device_attach(netdev);
6777 
6778     /* let the f/w know that the h/w is now under the control of the
6779      * driver.
6780      */
6781     igc_get_hw_control(adapter);
6782     rtnl_unlock();
6783 }
6784 
6785 static const struct pci_error_handlers igc_err_handler = {
6786     .error_detected = igc_io_error_detected,
6787     .slot_reset = igc_io_slot_reset,
6788     .resume = igc_io_resume,
6789 };
6790 
6791 #ifdef CONFIG_PM
6792 static const struct dev_pm_ops igc_pm_ops = {
6793     SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
6794     SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
6795                igc_runtime_idle)
6796 };
6797 #endif
6798 
6799 static struct pci_driver igc_driver = {
6800     .name     = igc_driver_name,
6801     .id_table = igc_pci_tbl,
6802     .probe    = igc_probe,
6803     .remove   = igc_remove,
6804 #ifdef CONFIG_PM
6805     .driver.pm = &igc_pm_ops,
6806 #endif
6807     .shutdown = igc_shutdown,
6808     .err_handler = &igc_err_handler,
6809 };
6810 
6811 /**
6812  * igc_reinit_queues - return error
6813  * @adapter: pointer to adapter structure
6814  */
6815 int igc_reinit_queues(struct igc_adapter *adapter)
6816 {
6817     struct net_device *netdev = adapter->netdev;
6818     int err = 0;
6819 
6820     if (netif_running(netdev))
6821         igc_close(netdev);
6822 
6823     igc_reset_interrupt_capability(adapter);
6824 
6825     if (igc_init_interrupt_scheme(adapter, true)) {
6826         netdev_err(netdev, "Unable to allocate memory for queues\n");
6827         return -ENOMEM;
6828     }
6829 
6830     if (netif_running(netdev))
6831         err = igc_open(netdev);
6832 
6833     return err;
6834 }
6835 
6836 /**
6837  * igc_get_hw_dev - return device
6838  * @hw: pointer to hardware structure
6839  *
6840  * used by hardware layer to print debugging information
6841  */
6842 struct net_device *igc_get_hw_dev(struct igc_hw *hw)
6843 {
6844     struct igc_adapter *adapter = hw->back;
6845 
6846     return adapter->netdev;
6847 }
6848 
6849 static void igc_disable_rx_ring_hw(struct igc_ring *ring)
6850 {
6851     struct igc_hw *hw = &ring->q_vector->adapter->hw;
6852     u8 idx = ring->reg_idx;
6853     u32 rxdctl;
6854 
6855     rxdctl = rd32(IGC_RXDCTL(idx));
6856     rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
6857     rxdctl |= IGC_RXDCTL_SWFLUSH;
6858     wr32(IGC_RXDCTL(idx), rxdctl);
6859 }
6860 
6861 void igc_disable_rx_ring(struct igc_ring *ring)
6862 {
6863     igc_disable_rx_ring_hw(ring);
6864     igc_clean_rx_ring(ring);
6865 }
6866 
6867 void igc_enable_rx_ring(struct igc_ring *ring)
6868 {
6869     struct igc_adapter *adapter = ring->q_vector->adapter;
6870 
6871     igc_configure_rx_ring(adapter, ring);
6872 
6873     if (ring->xsk_pool)
6874         igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
6875     else
6876         igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
6877 }
6878 
6879 static void igc_disable_tx_ring_hw(struct igc_ring *ring)
6880 {
6881     struct igc_hw *hw = &ring->q_vector->adapter->hw;
6882     u8 idx = ring->reg_idx;
6883     u32 txdctl;
6884 
6885     txdctl = rd32(IGC_TXDCTL(idx));
6886     txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
6887     txdctl |= IGC_TXDCTL_SWFLUSH;
6888     wr32(IGC_TXDCTL(idx), txdctl);
6889 }
6890 
6891 void igc_disable_tx_ring(struct igc_ring *ring)
6892 {
6893     igc_disable_tx_ring_hw(ring);
6894     igc_clean_tx_ring(ring);
6895 }
6896 
6897 void igc_enable_tx_ring(struct igc_ring *ring)
6898 {
6899     struct igc_adapter *adapter = ring->q_vector->adapter;
6900 
6901     igc_configure_tx_ring(adapter, ring);
6902 }
6903 
6904 /**
6905  * igc_init_module - Driver Registration Routine
6906  *
6907  * igc_init_module is the first routine called when the driver is
6908  * loaded. All it does is register with the PCI subsystem.
6909  */
6910 static int __init igc_init_module(void)
6911 {
6912     int ret;
6913 
6914     pr_info("%s\n", igc_driver_string);
6915     pr_info("%s\n", igc_copyright);
6916 
6917     ret = pci_register_driver(&igc_driver);
6918     return ret;
6919 }
6920 
6921 module_init(igc_init_module);
6922 
6923 /**
6924  * igc_exit_module - Driver Exit Cleanup Routine
6925  *
6926  * igc_exit_module is called just before the driver is removed
6927  * from memory.
6928  */
6929 static void __exit igc_exit_module(void)
6930 {
6931     pci_unregister_driver(&igc_driver);
6932 }
6933 
6934 module_exit(igc_exit_module);
6935 /* igc_main.c */