Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /****************************************************************************
0003  * Driver for Solarflare network controllers and boards
0004  * Copyright 2018 Solarflare Communications Inc.
0005  *
0006  * This program is free software; you can redistribute it and/or modify it
0007  * under the terms of the GNU General Public License version 2 as published
0008  * by the Free Software Foundation, incorporated herein by reference.
0009  */
0010 
0011 #include "net_driver.h"
0012 #include <linux/filter.h>
0013 #include <linux/module.h>
0014 #include <linux/netdevice.h>
0015 #include <net/gre.h>
0016 #include "efx_common.h"
0017 #include "efx_channels.h"
0018 #include "efx.h"
0019 #include "mcdi.h"
0020 #include "selftest.h"
0021 #include "rx_common.h"
0022 #include "tx_common.h"
0023 #include "nic.h"
0024 #include "mcdi_port_common.h"
0025 #include "io.h"
0026 #include "mcdi_pcol.h"
0027 #include "ef100_rep.h"
0028 
0029 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
0030                  NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
0031                  NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
0032                  NETIF_MSG_TX_ERR | NETIF_MSG_HW);
0033 module_param(debug, uint, 0);
0034 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
0035 
0036 /* This is the time (in jiffies) between invocations of the hardware
0037  * monitor.
0038  * On Falcon-based NICs, this will:
0039  * - Check the on-board hardware monitor;
0040  * - Poll the link state and reconfigure the hardware as necessary.
0041  * On Siena-based NICs for power systems with EEH support, this will give EEH a
0042  * chance to start.
0043  */
0044 static unsigned int efx_monitor_interval = 1 * HZ;
0045 
0046 /* How often and how many times to poll for a reset while waiting for a
0047  * BIST that another function started to complete.
0048  */
0049 #define BIST_WAIT_DELAY_MS  100
0050 #define BIST_WAIT_DELAY_COUNT   100
0051 
0052 /* Default stats update time */
0053 #define STATS_PERIOD_MS_DEFAULT 1000
0054 
0055 static const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
0056 static const char *const efx_reset_type_names[] = {
0057     [RESET_TYPE_INVISIBLE]          = "INVISIBLE",
0058     [RESET_TYPE_ALL]                = "ALL",
0059     [RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
0060     [RESET_TYPE_WORLD]              = "WORLD",
0061     [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
0062     [RESET_TYPE_DATAPATH]           = "DATAPATH",
0063     [RESET_TYPE_MC_BIST]        = "MC_BIST",
0064     [RESET_TYPE_DISABLE]            = "DISABLE",
0065     [RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
0066     [RESET_TYPE_INT_ERROR]          = "INT_ERROR",
0067     [RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
0068     [RESET_TYPE_TX_SKIP]            = "TX_SKIP",
0069     [RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
0070     [RESET_TYPE_MCDI_TIMEOUT]   = "MCDI_TIMEOUT (FLR)",
0071 };
0072 
0073 #define RESET_TYPE(type) \
0074     STRING_TABLE_LOOKUP(type, efx_reset_type)
0075 
0076 /* Loopback mode names (see LOOPBACK_MODE()) */
0077 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
0078 const char *const efx_loopback_mode_names[] = {
0079     [LOOPBACK_NONE]     = "NONE",
0080     [LOOPBACK_DATA]     = "DATAPATH",
0081     [LOOPBACK_GMAC]     = "GMAC",
0082     [LOOPBACK_XGMII]    = "XGMII",
0083     [LOOPBACK_XGXS]     = "XGXS",
0084     [LOOPBACK_XAUI]     = "XAUI",
0085     [LOOPBACK_GMII]     = "GMII",
0086     [LOOPBACK_SGMII]    = "SGMII",
0087     [LOOPBACK_XGBR]     = "XGBR",
0088     [LOOPBACK_XFI]      = "XFI",
0089     [LOOPBACK_XAUI_FAR] = "XAUI_FAR",
0090     [LOOPBACK_GMII_FAR] = "GMII_FAR",
0091     [LOOPBACK_SGMII_FAR]    = "SGMII_FAR",
0092     [LOOPBACK_XFI_FAR]  = "XFI_FAR",
0093     [LOOPBACK_GPHY]     = "GPHY",
0094     [LOOPBACK_PHYXS]    = "PHYXS",
0095     [LOOPBACK_PCS]      = "PCS",
0096     [LOOPBACK_PMAPMD]   = "PMA/PMD",
0097     [LOOPBACK_XPORT]    = "XPORT",
0098     [LOOPBACK_XGMII_WS] = "XGMII_WS",
0099     [LOOPBACK_XAUI_WS]  = "XAUI_WS",
0100     [LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
0101     [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
0102     [LOOPBACK_GMII_WS]  = "GMII_WS",
0103     [LOOPBACK_XFI_WS]   = "XFI_WS",
0104     [LOOPBACK_XFI_WS_FAR]   = "XFI_WS_FAR",
0105     [LOOPBACK_PHYXS_WS] = "PHYXS_WS",
0106 };
0107 
0108 /* Reset workqueue. If any NIC has a hardware failure then a reset will be
0109  * queued onto this work queue. This is not a per-nic work queue, because
0110  * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
0111  */
0112 static struct workqueue_struct *reset_workqueue;
0113 
0114 int efx_create_reset_workqueue(void)
0115 {
0116     reset_workqueue = create_singlethread_workqueue("sfc_reset");
0117     if (!reset_workqueue) {
0118         printk(KERN_ERR "Failed to create reset workqueue\n");
0119         return -ENOMEM;
0120     }
0121 
0122     return 0;
0123 }
0124 
0125 void efx_queue_reset_work(struct efx_nic *efx)
0126 {
0127     queue_work(reset_workqueue, &efx->reset_work);
0128 }
0129 
0130 void efx_flush_reset_workqueue(struct efx_nic *efx)
0131 {
0132     cancel_work_sync(&efx->reset_work);
0133 }
0134 
0135 void efx_destroy_reset_workqueue(void)
0136 {
0137     if (reset_workqueue) {
0138         destroy_workqueue(reset_workqueue);
0139         reset_workqueue = NULL;
0140     }
0141 }
0142 
0143 /* We assume that efx->type->reconfigure_mac will always try to sync RX
0144  * filters and therefore needs to read-lock the filter table against freeing
0145  */
0146 void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only)
0147 {
0148     if (efx->type->reconfigure_mac) {
0149         down_read(&efx->filter_sem);
0150         efx->type->reconfigure_mac(efx, mtu_only);
0151         up_read(&efx->filter_sem);
0152     }
0153 }
0154 
0155 /* Asynchronous work item for changing MAC promiscuity and multicast
0156  * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
0157  * MAC directly.
0158  */
0159 static void efx_mac_work(struct work_struct *data)
0160 {
0161     struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
0162 
0163     mutex_lock(&efx->mac_lock);
0164     if (efx->port_enabled)
0165         efx_mac_reconfigure(efx, false);
0166     mutex_unlock(&efx->mac_lock);
0167 }
0168 
0169 int efx_set_mac_address(struct net_device *net_dev, void *data)
0170 {
0171     struct efx_nic *efx = efx_netdev_priv(net_dev);
0172     struct sockaddr *addr = data;
0173     u8 *new_addr = addr->sa_data;
0174     u8 old_addr[6];
0175     int rc;
0176 
0177     if (!is_valid_ether_addr(new_addr)) {
0178         netif_err(efx, drv, efx->net_dev,
0179               "invalid ethernet MAC address requested: %pM\n",
0180               new_addr);
0181         return -EADDRNOTAVAIL;
0182     }
0183 
0184     /* save old address */
0185     ether_addr_copy(old_addr, net_dev->dev_addr);
0186     eth_hw_addr_set(net_dev, new_addr);
0187     if (efx->type->set_mac_address) {
0188         rc = efx->type->set_mac_address(efx);
0189         if (rc) {
0190             eth_hw_addr_set(net_dev, old_addr);
0191             return rc;
0192         }
0193     }
0194 
0195     /* Reconfigure the MAC */
0196     mutex_lock(&efx->mac_lock);
0197     efx_mac_reconfigure(efx, false);
0198     mutex_unlock(&efx->mac_lock);
0199 
0200     return 0;
0201 }
0202 
0203 /* Context: netif_addr_lock held, BHs disabled. */
0204 void efx_set_rx_mode(struct net_device *net_dev)
0205 {
0206     struct efx_nic *efx = efx_netdev_priv(net_dev);
0207 
0208     if (efx->port_enabled)
0209         queue_work(efx->workqueue, &efx->mac_work);
0210     /* Otherwise efx_start_port() will do this */
0211 }
0212 
0213 int efx_set_features(struct net_device *net_dev, netdev_features_t data)
0214 {
0215     struct efx_nic *efx = efx_netdev_priv(net_dev);
0216     int rc;
0217 
0218     /* If disabling RX n-tuple filtering, clear existing filters */
0219     if (net_dev->features & ~data & NETIF_F_NTUPLE) {
0220         rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
0221         if (rc)
0222             return rc;
0223     }
0224 
0225     /* If Rx VLAN filter is changed, update filters via mac_reconfigure.
0226      * If rx-fcs is changed, mac_reconfigure updates that too.
0227      */
0228     if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
0229                       NETIF_F_RXFCS)) {
0230         /* efx_set_rx_mode() will schedule MAC work to update filters
0231          * when a new features are finally set in net_dev.
0232          */
0233         efx_set_rx_mode(net_dev);
0234     }
0235 
0236     return 0;
0237 }
0238 
0239 /* This ensures that the kernel is kept informed (via
0240  * netif_carrier_on/off) of the link status, and also maintains the
0241  * link status's stop on the port's TX queue.
0242  */
0243 void efx_link_status_changed(struct efx_nic *efx)
0244 {
0245     struct efx_link_state *link_state = &efx->link_state;
0246 
0247     /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
0248      * that no events are triggered between unregister_netdev() and the
0249      * driver unloading. A more general condition is that NETDEV_CHANGE
0250      * can only be generated between NETDEV_UP and NETDEV_DOWN
0251      */
0252     if (!netif_running(efx->net_dev))
0253         return;
0254 
0255     if (link_state->up != netif_carrier_ok(efx->net_dev)) {
0256         efx->n_link_state_changes++;
0257 
0258         if (link_state->up)
0259             netif_carrier_on(efx->net_dev);
0260         else
0261             netif_carrier_off(efx->net_dev);
0262     }
0263 
0264     /* Status message for kernel log */
0265     if (link_state->up)
0266         netif_info(efx, link, efx->net_dev,
0267                "link up at %uMbps %s-duplex (MTU %d)\n",
0268                link_state->speed, link_state->fd ? "full" : "half",
0269                efx->net_dev->mtu);
0270     else
0271         netif_info(efx, link, efx->net_dev, "link down\n");
0272 }
0273 
0274 unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
0275 {
0276     /* The maximum MTU that we can fit in a single page, allowing for
0277      * framing, overhead and XDP headroom + tailroom.
0278      */
0279     int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
0280                efx->rx_prefix_size + efx->type->rx_buffer_padding +
0281                efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM;
0282 
0283     return PAGE_SIZE - overhead;
0284 }
0285 
0286 /* Context: process, rtnl_lock() held. */
0287 int efx_change_mtu(struct net_device *net_dev, int new_mtu)
0288 {
0289     struct efx_nic *efx = efx_netdev_priv(net_dev);
0290     int rc;
0291 
0292     rc = efx_check_disabled(efx);
0293     if (rc)
0294         return rc;
0295 
0296     if (rtnl_dereference(efx->xdp_prog) &&
0297         new_mtu > efx_xdp_max_mtu(efx)) {
0298         netif_err(efx, drv, efx->net_dev,
0299               "Requested MTU of %d too big for XDP (max: %d)\n",
0300               new_mtu, efx_xdp_max_mtu(efx));
0301         return -EINVAL;
0302     }
0303 
0304     netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
0305 
0306     efx_device_detach_sync(efx);
0307     efx_stop_all(efx);
0308 
0309     mutex_lock(&efx->mac_lock);
0310     net_dev->mtu = new_mtu;
0311     efx_mac_reconfigure(efx, true);
0312     mutex_unlock(&efx->mac_lock);
0313 
0314     efx_start_all(efx);
0315     efx_device_attach_if_not_resetting(efx);
0316     return 0;
0317 }
0318 
0319 /**************************************************************************
0320  *
0321  * Hardware monitor
0322  *
0323  **************************************************************************/
0324 
0325 /* Run periodically off the general workqueue */
0326 static void efx_monitor(struct work_struct *data)
0327 {
0328     struct efx_nic *efx = container_of(data, struct efx_nic,
0329                        monitor_work.work);
0330 
0331     netif_vdbg(efx, timer, efx->net_dev,
0332            "hardware monitor executing on CPU %d\n",
0333            raw_smp_processor_id());
0334     BUG_ON(efx->type->monitor == NULL);
0335 
0336     /* If the mac_lock is already held then it is likely a port
0337      * reconfiguration is already in place, which will likely do
0338      * most of the work of monitor() anyway.
0339      */
0340     if (mutex_trylock(&efx->mac_lock)) {
0341         if (efx->port_enabled && efx->type->monitor)
0342             efx->type->monitor(efx);
0343         mutex_unlock(&efx->mac_lock);
0344     }
0345 
0346     efx_start_monitor(efx);
0347 }
0348 
0349 void efx_start_monitor(struct efx_nic *efx)
0350 {
0351     if (efx->type->monitor)
0352         queue_delayed_work(efx->workqueue, &efx->monitor_work,
0353                    efx_monitor_interval);
0354 }
0355 
0356 /**************************************************************************
0357  *
0358  * Event queue processing
0359  *
0360  *************************************************************************/
0361 
0362 /* Channels are shutdown and reinitialised whilst the NIC is running
0363  * to propagate configuration changes (mtu, checksum offload), or
0364  * to clear hardware error conditions
0365  */
0366 static void efx_start_datapath(struct efx_nic *efx)
0367 {
0368     netdev_features_t old_features = efx->net_dev->features;
0369     bool old_rx_scatter = efx->rx_scatter;
0370     size_t rx_buf_len;
0371 
0372     /* Calculate the rx buffer allocation parameters required to
0373      * support the current MTU, including padding for header
0374      * alignment and overruns.
0375      */
0376     efx->rx_dma_len = (efx->rx_prefix_size +
0377                EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
0378                efx->type->rx_buffer_padding);
0379     rx_buf_len = (sizeof(struct efx_rx_page_state)   + EFX_XDP_HEADROOM +
0380               efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM);
0381 
0382     if (rx_buf_len <= PAGE_SIZE) {
0383         efx->rx_scatter = efx->type->always_rx_scatter;
0384         efx->rx_buffer_order = 0;
0385     } else if (efx->type->can_rx_scatter) {
0386         BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
0387         BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
0388                  2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
0389                        EFX_RX_BUF_ALIGNMENT) >
0390                  PAGE_SIZE);
0391         efx->rx_scatter = true;
0392         efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
0393         efx->rx_buffer_order = 0;
0394     } else {
0395         efx->rx_scatter = false;
0396         efx->rx_buffer_order = get_order(rx_buf_len);
0397     }
0398 
0399     efx_rx_config_page_split(efx);
0400     if (efx->rx_buffer_order)
0401         netif_dbg(efx, drv, efx->net_dev,
0402               "RX buf len=%u; page order=%u batch=%u\n",
0403               efx->rx_dma_len, efx->rx_buffer_order,
0404               efx->rx_pages_per_batch);
0405     else
0406         netif_dbg(efx, drv, efx->net_dev,
0407               "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
0408               efx->rx_dma_len, efx->rx_page_buf_step,
0409               efx->rx_bufs_per_page, efx->rx_pages_per_batch);
0410 
0411     /* Restore previously fixed features in hw_features and remove
0412      * features which are fixed now
0413      */
0414     efx->net_dev->hw_features |= efx->net_dev->features;
0415     efx->net_dev->hw_features &= ~efx->fixed_features;
0416     efx->net_dev->features |= efx->fixed_features;
0417     if (efx->net_dev->features != old_features)
0418         netdev_features_change(efx->net_dev);
0419 
0420     /* RX filters may also have scatter-enabled flags */
0421     if ((efx->rx_scatter != old_rx_scatter) &&
0422         efx->type->filter_update_rx_scatter)
0423         efx->type->filter_update_rx_scatter(efx);
0424 
0425     /* We must keep at least one descriptor in a TX ring empty.
0426      * We could avoid this when the queue size does not exactly
0427      * match the hardware ring size, but it's not that important.
0428      * Therefore we stop the queue when one more skb might fill
0429      * the ring completely.  We wake it when half way back to
0430      * empty.
0431      */
0432     efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
0433     efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
0434 
0435     /* Initialise the channels */
0436     efx_start_channels(efx);
0437 
0438     efx_ptp_start_datapath(efx);
0439 
0440     if (netif_device_present(efx->net_dev))
0441         netif_tx_wake_all_queues(efx->net_dev);
0442 }
0443 
0444 static void efx_stop_datapath(struct efx_nic *efx)
0445 {
0446     EFX_ASSERT_RESET_SERIALISED(efx);
0447     BUG_ON(efx->port_enabled);
0448 
0449     efx_ptp_stop_datapath(efx);
0450 
0451     efx_stop_channels(efx);
0452 }
0453 
0454 /**************************************************************************
0455  *
0456  * Port handling
0457  *
0458  **************************************************************************/
0459 
0460 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not
0461  * force the Autoneg bit on.
0462  */
0463 void efx_link_clear_advertising(struct efx_nic *efx)
0464 {
0465     bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
0466     efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
0467 }
0468 
0469 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
0470 {
0471     efx->wanted_fc = wanted_fc;
0472     if (efx->link_advertising[0]) {
0473         if (wanted_fc & EFX_FC_RX)
0474             efx->link_advertising[0] |= (ADVERTISED_Pause |
0475                              ADVERTISED_Asym_Pause);
0476         else
0477             efx->link_advertising[0] &= ~(ADVERTISED_Pause |
0478                               ADVERTISED_Asym_Pause);
0479         if (wanted_fc & EFX_FC_TX)
0480             efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
0481     }
0482 }
0483 
0484 static void efx_start_port(struct efx_nic *efx)
0485 {
0486     netif_dbg(efx, ifup, efx->net_dev, "start port\n");
0487     BUG_ON(efx->port_enabled);
0488 
0489     mutex_lock(&efx->mac_lock);
0490     efx->port_enabled = true;
0491 
0492     /* Ensure MAC ingress/egress is enabled */
0493     efx_mac_reconfigure(efx, false);
0494 
0495     mutex_unlock(&efx->mac_lock);
0496 }
0497 
0498 /* Cancel work for MAC reconfiguration, periodic hardware monitoring
0499  * and the async self-test, wait for them to finish and prevent them
0500  * being scheduled again.  This doesn't cover online resets, which
0501  * should only be cancelled when removing the device.
0502  */
0503 static void efx_stop_port(struct efx_nic *efx)
0504 {
0505     netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
0506 
0507     EFX_ASSERT_RESET_SERIALISED(efx);
0508 
0509     mutex_lock(&efx->mac_lock);
0510     efx->port_enabled = false;
0511     mutex_unlock(&efx->mac_lock);
0512 
0513     /* Serialise against efx_set_multicast_list() */
0514     netif_addr_lock_bh(efx->net_dev);
0515     netif_addr_unlock_bh(efx->net_dev);
0516 
0517     cancel_delayed_work_sync(&efx->monitor_work);
0518     efx_selftest_async_cancel(efx);
0519     cancel_work_sync(&efx->mac_work);
0520 }
0521 
0522 /* If the interface is supposed to be running but is not, start
0523  * the hardware and software data path, regular activity for the port
0524  * (MAC statistics, link polling, etc.) and schedule the port to be
0525  * reconfigured.  Interrupts must already be enabled.  This function
0526  * is safe to call multiple times, so long as the NIC is not disabled.
0527  * Requires the RTNL lock.
0528  */
0529 void efx_start_all(struct efx_nic *efx)
0530 {
0531     EFX_ASSERT_RESET_SERIALISED(efx);
0532     BUG_ON(efx->state == STATE_DISABLED);
0533 
0534     /* Check that it is appropriate to restart the interface. All
0535      * of these flags are safe to read under just the rtnl lock
0536      */
0537     if (efx->port_enabled || !netif_running(efx->net_dev) ||
0538         efx->reset_pending)
0539         return;
0540 
0541     efx_start_port(efx);
0542     efx_start_datapath(efx);
0543 
0544     /* Start the hardware monitor if there is one */
0545     efx_start_monitor(efx);
0546 
0547     /* Link state detection is normally event-driven; we have
0548      * to poll now because we could have missed a change
0549      */
0550     mutex_lock(&efx->mac_lock);
0551     if (efx_mcdi_phy_poll(efx))
0552         efx_link_status_changed(efx);
0553     mutex_unlock(&efx->mac_lock);
0554 
0555     if (efx->type->start_stats) {
0556         efx->type->start_stats(efx);
0557         efx->type->pull_stats(efx);
0558         spin_lock_bh(&efx->stats_lock);
0559         efx->type->update_stats(efx, NULL, NULL);
0560         spin_unlock_bh(&efx->stats_lock);
0561     }
0562 }
0563 
0564 /* Quiesce the hardware and software data path, and regular activity
0565  * for the port without bringing the link down.  Safe to call multiple
0566  * times with the NIC in almost any state, but interrupts should be
0567  * enabled.  Requires the RTNL lock.
0568  */
0569 void efx_stop_all(struct efx_nic *efx)
0570 {
0571     EFX_ASSERT_RESET_SERIALISED(efx);
0572 
0573     /* port_enabled can be read safely under the rtnl lock */
0574     if (!efx->port_enabled)
0575         return;
0576 
0577     if (efx->type->update_stats) {
0578         /* update stats before we go down so we can accurately count
0579          * rx_nodesc_drops
0580          */
0581         efx->type->pull_stats(efx);
0582         spin_lock_bh(&efx->stats_lock);
0583         efx->type->update_stats(efx, NULL, NULL);
0584         spin_unlock_bh(&efx->stats_lock);
0585         efx->type->stop_stats(efx);
0586     }
0587 
0588     efx_stop_port(efx);
0589 
0590     /* Stop the kernel transmit interface.  This is only valid if
0591      * the device is stopped or detached; otherwise the watchdog
0592      * may fire immediately.
0593      */
0594     WARN_ON(netif_running(efx->net_dev) &&
0595         netif_device_present(efx->net_dev));
0596     netif_tx_disable(efx->net_dev);
0597 
0598     efx_stop_datapath(efx);
0599 }
0600 
0601 /* Context: process, dev_base_lock or RTNL held, non-blocking. */
0602 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
0603 {
0604     struct efx_nic *efx = efx_netdev_priv(net_dev);
0605 
0606     spin_lock_bh(&efx->stats_lock);
0607     efx_nic_update_stats_atomic(efx, NULL, stats);
0608     spin_unlock_bh(&efx->stats_lock);
0609 }
0610 
0611 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure
0612  * the MAC appropriately. All other PHY configuration changes are pushed
0613  * through phy_op->set_settings(), and pushed asynchronously to the MAC
0614  * through efx_monitor().
0615  *
0616  * Callers must hold the mac_lock
0617  */
0618 int __efx_reconfigure_port(struct efx_nic *efx)
0619 {
0620     enum efx_phy_mode phy_mode;
0621     int rc = 0;
0622 
0623     WARN_ON(!mutex_is_locked(&efx->mac_lock));
0624 
0625     /* Disable PHY transmit in mac level loopbacks */
0626     phy_mode = efx->phy_mode;
0627     if (LOOPBACK_INTERNAL(efx))
0628         efx->phy_mode |= PHY_MODE_TX_DISABLED;
0629     else
0630         efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
0631 
0632     if (efx->type->reconfigure_port)
0633         rc = efx->type->reconfigure_port(efx);
0634 
0635     if (rc)
0636         efx->phy_mode = phy_mode;
0637 
0638     return rc;
0639 }
0640 
0641 /* Reinitialise the MAC to pick up new PHY settings, even if the port is
0642  * disabled.
0643  */
0644 int efx_reconfigure_port(struct efx_nic *efx)
0645 {
0646     int rc;
0647 
0648     EFX_ASSERT_RESET_SERIALISED(efx);
0649 
0650     mutex_lock(&efx->mac_lock);
0651     rc = __efx_reconfigure_port(efx);
0652     mutex_unlock(&efx->mac_lock);
0653 
0654     return rc;
0655 }
0656 
0657 /**************************************************************************
0658  *
0659  * Device reset and suspend
0660  *
0661  **************************************************************************/
0662 
0663 static void efx_wait_for_bist_end(struct efx_nic *efx)
0664 {
0665     int i;
0666 
0667     for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
0668         if (efx_mcdi_poll_reboot(efx))
0669             goto out;
0670         msleep(BIST_WAIT_DELAY_MS);
0671     }
0672 
0673     netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
0674 out:
0675     /* Either way unset the BIST flag. If we found no reboot we probably
0676      * won't recover, but we should try.
0677      */
0678     efx->mc_bist_for_other_fn = false;
0679 }
0680 
0681 /* Try recovery mechanisms.
0682  * For now only EEH is supported.
0683  * Returns 0 if the recovery mechanisms are unsuccessful.
0684  * Returns a non-zero value otherwise.
0685  */
0686 int efx_try_recovery(struct efx_nic *efx)
0687 {
0688 #ifdef CONFIG_EEH
0689     /* A PCI error can occur and not be seen by EEH because nothing
0690      * happens on the PCI bus. In this case the driver may fail and
0691      * schedule a 'recover or reset', leading to this recovery handler.
0692      * Manually call the eeh failure check function.
0693      */
0694     struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
0695     if (eeh_dev_check_failure(eehdev)) {
0696         /* The EEH mechanisms will handle the error and reset the
0697          * device if necessary.
0698          */
0699         return 1;
0700     }
0701 #endif
0702     return 0;
0703 }
0704 
0705 /* Tears down the entire software state and most of the hardware state
0706  * before reset.
0707  */
0708 void efx_reset_down(struct efx_nic *efx, enum reset_type method)
0709 {
0710     EFX_ASSERT_RESET_SERIALISED(efx);
0711 
0712     if (method == RESET_TYPE_MCDI_TIMEOUT)
0713         efx->type->prepare_flr(efx);
0714 
0715     efx_stop_all(efx);
0716     efx_disable_interrupts(efx);
0717 
0718     mutex_lock(&efx->mac_lock);
0719     down_write(&efx->filter_sem);
0720     mutex_lock(&efx->rss_lock);
0721     efx->type->fini(efx);
0722 }
0723 
0724 /* Context: netif_tx_lock held, BHs disabled. */
0725 void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
0726 {
0727     struct efx_nic *efx = efx_netdev_priv(net_dev);
0728 
0729     netif_err(efx, tx_err, efx->net_dev,
0730           "TX stuck with port_enabled=%d: resetting channels\n",
0731           efx->port_enabled);
0732 
0733     efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
0734 }
0735 
0736 /* This function will always ensure that the locks acquired in
0737  * efx_reset_down() are released. A failure return code indicates
0738  * that we were unable to reinitialise the hardware, and the
0739  * driver should be disabled. If ok is false, then the rx and tx
0740  * engines are not restarted, pending a RESET_DISABLE.
0741  */
0742 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
0743 {
0744     int rc;
0745 
0746     EFX_ASSERT_RESET_SERIALISED(efx);
0747 
0748     if (method == RESET_TYPE_MCDI_TIMEOUT)
0749         efx->type->finish_flr(efx);
0750 
0751     /* Ensure that SRAM is initialised even if we're disabling the device */
0752     rc = efx->type->init(efx);
0753     if (rc) {
0754         netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
0755         goto fail;
0756     }
0757 
0758     if (!ok)
0759         goto fail;
0760 
0761     if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
0762         method != RESET_TYPE_DATAPATH) {
0763         rc = efx_mcdi_port_reconfigure(efx);
0764         if (rc && rc != -EPERM)
0765             netif_err(efx, drv, efx->net_dev,
0766                   "could not restore PHY settings\n");
0767     }
0768 
0769     rc = efx_enable_interrupts(efx);
0770     if (rc)
0771         goto fail;
0772 
0773 #ifdef CONFIG_SFC_SRIOV
0774     rc = efx->type->vswitching_restore(efx);
0775     if (rc) /* not fatal; the PF will still work fine */
0776         netif_warn(efx, probe, efx->net_dev,
0777                "failed to restore vswitching rc=%d;"
0778                " VFs may not function\n", rc);
0779 #endif
0780 
0781     if (efx->type->rx_restore_rss_contexts)
0782         efx->type->rx_restore_rss_contexts(efx);
0783     mutex_unlock(&efx->rss_lock);
0784     efx->type->filter_table_restore(efx);
0785     up_write(&efx->filter_sem);
0786     if (efx->type->sriov_reset)
0787         efx->type->sriov_reset(efx);
0788 
0789     mutex_unlock(&efx->mac_lock);
0790 
0791     efx_start_all(efx);
0792 
0793     if (efx->type->udp_tnl_push_ports)
0794         efx->type->udp_tnl_push_ports(efx);
0795 
0796     return 0;
0797 
0798 fail:
0799     efx->port_initialized = false;
0800 
0801     mutex_unlock(&efx->rss_lock);
0802     up_write(&efx->filter_sem);
0803     mutex_unlock(&efx->mac_lock);
0804 
0805     return rc;
0806 }
0807 
0808 /* Reset the NIC using the specified method.  Note that the reset may
0809  * fail, in which case the card will be left in an unusable state.
0810  *
0811  * Caller must hold the rtnl_lock.
0812  */
0813 int efx_reset(struct efx_nic *efx, enum reset_type method)
0814 {
0815     int rc, rc2 = 0;
0816     bool disabled;
0817 
0818     netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
0819            RESET_TYPE(method));
0820 
0821     efx_device_detach_sync(efx);
0822     /* efx_reset_down() grabs locks that prevent recovery on EF100.
0823      * EF100 reset is handled in the efx_nic_type callback below.
0824      */
0825     if (efx_nic_rev(efx) != EFX_REV_EF100)
0826         efx_reset_down(efx, method);
0827 
0828     rc = efx->type->reset(efx, method);
0829     if (rc) {
0830         netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
0831         goto out;
0832     }
0833 
0834     /* Clear flags for the scopes we covered.  We assume the NIC and
0835      * driver are now quiescent so that there is no race here.
0836      */
0837     if (method < RESET_TYPE_MAX_METHOD)
0838         efx->reset_pending &= -(1 << (method + 1));
0839     else /* it doesn't fit into the well-ordered scope hierarchy */
0840         __clear_bit(method, &efx->reset_pending);
0841 
0842     /* Reinitialise bus-mastering, which may have been turned off before
0843      * the reset was scheduled. This is still appropriate, even in the
0844      * RESET_TYPE_DISABLE since this driver generally assumes the hardware
0845      * can respond to requests.
0846      */
0847     pci_set_master(efx->pci_dev);
0848 
0849 out:
0850     /* Leave device stopped if necessary */
0851     disabled = rc ||
0852         method == RESET_TYPE_DISABLE ||
0853         method == RESET_TYPE_RECOVER_OR_DISABLE;
0854     if (efx_nic_rev(efx) != EFX_REV_EF100)
0855         rc2 = efx_reset_up(efx, method, !disabled);
0856     if (rc2) {
0857         disabled = true;
0858         if (!rc)
0859             rc = rc2;
0860     }
0861 
0862     if (disabled) {
0863         dev_close(efx->net_dev);
0864         netif_err(efx, drv, efx->net_dev, "has been disabled\n");
0865         efx->state = STATE_DISABLED;
0866     } else {
0867         netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
0868         efx_device_attach_if_not_resetting(efx);
0869     }
0870     return rc;
0871 }
0872 
0873 /* The worker thread exists so that code that cannot sleep can
0874  * schedule a reset for later.
0875  */
0876 static void efx_reset_work(struct work_struct *data)
0877 {
0878     struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
0879     unsigned long pending;
0880     enum reset_type method;
0881 
0882     pending = READ_ONCE(efx->reset_pending);
0883     method = fls(pending) - 1;
0884 
0885     if (method == RESET_TYPE_MC_BIST)
0886         efx_wait_for_bist_end(efx);
0887 
0888     if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
0889          method == RESET_TYPE_RECOVER_OR_ALL) &&
0890         efx_try_recovery(efx))
0891         return;
0892 
0893     if (!pending)
0894         return;
0895 
0896     rtnl_lock();
0897 
0898     /* We checked the state in efx_schedule_reset() but it may
0899      * have changed by now.  Now that we have the RTNL lock,
0900      * it cannot change again.
0901      */
0902     if (efx_net_active(efx->state))
0903         (void)efx_reset(efx, method);
0904 
0905     rtnl_unlock();
0906 }
0907 
0908 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
0909 {
0910     enum reset_type method;
0911 
0912     if (efx_recovering(efx->state)) {
0913         netif_dbg(efx, drv, efx->net_dev,
0914               "recovering: skip scheduling %s reset\n",
0915               RESET_TYPE(type));
0916         return;
0917     }
0918 
0919     switch (type) {
0920     case RESET_TYPE_INVISIBLE:
0921     case RESET_TYPE_ALL:
0922     case RESET_TYPE_RECOVER_OR_ALL:
0923     case RESET_TYPE_WORLD:
0924     case RESET_TYPE_DISABLE:
0925     case RESET_TYPE_RECOVER_OR_DISABLE:
0926     case RESET_TYPE_DATAPATH:
0927     case RESET_TYPE_MC_BIST:
0928     case RESET_TYPE_MCDI_TIMEOUT:
0929         method = type;
0930         netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
0931               RESET_TYPE(method));
0932         break;
0933     default:
0934         method = efx->type->map_reset_reason(type);
0935         netif_dbg(efx, drv, efx->net_dev,
0936               "scheduling %s reset for %s\n",
0937               RESET_TYPE(method), RESET_TYPE(type));
0938         break;
0939     }
0940 
0941     set_bit(method, &efx->reset_pending);
0942     smp_mb(); /* ensure we change reset_pending before checking state */
0943 
0944     /* If we're not READY then just leave the flags set as the cue
0945      * to abort probing or reschedule the reset later.
0946      */
0947     if (!efx_net_active(READ_ONCE(efx->state)))
0948         return;
0949 
0950     /* efx_process_channel() will no longer read events once a
0951      * reset is scheduled. So switch back to poll'd MCDI completions.
0952      */
0953     efx_mcdi_mode_poll(efx);
0954 
0955     efx_queue_reset_work(efx);
0956 }
0957 
0958 /**************************************************************************
0959  *
0960  * Dummy NIC operations
0961  *
0962  * Can be used for some unimplemented operations
0963  * Needed so all function pointers are valid and do not have to be tested
0964  * before use
0965  *
0966  **************************************************************************/
0967 int efx_port_dummy_op_int(struct efx_nic *efx)
0968 {
0969     return 0;
0970 }
0971 void efx_port_dummy_op_void(struct efx_nic *efx) {}
0972 
0973 /**************************************************************************
0974  *
0975  * Data housekeeping
0976  *
0977  **************************************************************************/
0978 
0979 /* This zeroes out and then fills in the invariants in a struct
0980  * efx_nic (including all sub-structures).
0981  */
0982 int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev)
0983 {
0984     int rc = -ENOMEM;
0985 
0986     /* Initialise common structures */
0987     INIT_LIST_HEAD(&efx->node);
0988     INIT_LIST_HEAD(&efx->secondary_list);
0989     spin_lock_init(&efx->biu_lock);
0990 #ifdef CONFIG_SFC_MTD
0991     INIT_LIST_HEAD(&efx->mtd_list);
0992 #endif
0993     INIT_WORK(&efx->reset_work, efx_reset_work);
0994     INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
0995     efx_selftest_async_init(efx);
0996     efx->pci_dev = pci_dev;
0997     efx->msg_enable = debug;
0998     efx->state = STATE_UNINIT;
0999     strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
1000 
1001     efx->rx_prefix_size = efx->type->rx_prefix_size;
1002     efx->rx_ip_align =
1003         NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
1004     efx->rx_packet_hash_offset =
1005         efx->type->rx_hash_offset - efx->type->rx_prefix_size;
1006     efx->rx_packet_ts_offset =
1007         efx->type->rx_ts_offset - efx->type->rx_prefix_size;
1008     INIT_LIST_HEAD(&efx->rss_context.list);
1009     efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
1010     mutex_init(&efx->rss_lock);
1011     efx->vport_id = EVB_PORT_ID_ASSIGNED;
1012     spin_lock_init(&efx->stats_lock);
1013     efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
1014     efx->num_mac_stats = MC_CMD_MAC_NSTATS;
1015     BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
1016     mutex_init(&efx->mac_lock);
1017     init_rwsem(&efx->filter_sem);
1018 #ifdef CONFIG_RFS_ACCEL
1019     mutex_init(&efx->rps_mutex);
1020     spin_lock_init(&efx->rps_hash_lock);
1021     /* Failure to allocate is not fatal, but may degrade ARFS performance */
1022     efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
1023                       sizeof(*efx->rps_hash_table), GFP_KERNEL);
1024 #endif
1025     spin_lock_init(&efx->vf_reps_lock);
1026     INIT_LIST_HEAD(&efx->vf_reps);
1027     INIT_WORK(&efx->mac_work, efx_mac_work);
1028     init_waitqueue_head(&efx->flush_wq);
1029 
1030     efx->tx_queues_per_channel = 1;
1031     efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE;
1032     efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1033 
1034     efx->mem_bar = UINT_MAX;
1035 
1036     rc = efx_init_channels(efx);
1037     if (rc)
1038         goto fail;
1039 
1040     /* Would be good to use the net_dev name, but we're too early */
1041     snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
1042          pci_name(pci_dev));
1043     efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
1044     if (!efx->workqueue) {
1045         rc = -ENOMEM;
1046         goto fail;
1047     }
1048 
1049     return 0;
1050 
1051 fail:
1052     efx_fini_struct(efx);
1053     return rc;
1054 }
1055 
1056 void efx_fini_struct(struct efx_nic *efx)
1057 {
1058 #ifdef CONFIG_RFS_ACCEL
1059     kfree(efx->rps_hash_table);
1060 #endif
1061 
1062     efx_fini_channels(efx);
1063 
1064     kfree(efx->vpd_sn);
1065 
1066     if (efx->workqueue) {
1067         destroy_workqueue(efx->workqueue);
1068         efx->workqueue = NULL;
1069     }
1070 }
1071 
1072 /* This configures the PCI device to enable I/O and DMA. */
1073 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
1074         unsigned int mem_map_size)
1075 {
1076     struct pci_dev *pci_dev = efx->pci_dev;
1077     int rc;
1078 
1079     efx->mem_bar = UINT_MAX;
1080     pci_dbg(pci_dev, "initialising I/O bar=%d\n", bar);
1081 
1082     rc = pci_enable_device(pci_dev);
1083     if (rc) {
1084         pci_err(pci_dev, "failed to enable PCI device\n");
1085         goto fail1;
1086     }
1087 
1088     pci_set_master(pci_dev);
1089 
1090     rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
1091     if (rc) {
1092         pci_err(efx->pci_dev, "could not find a suitable DMA mask\n");
1093         goto fail2;
1094     }
1095     pci_dbg(efx->pci_dev, "using DMA mask %llx\n", (unsigned long long)dma_mask);
1096 
1097     efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
1098     if (!efx->membase_phys) {
1099         pci_err(efx->pci_dev,
1100             "ERROR: No BAR%d mapping from the BIOS. Try pci=realloc on the kernel command line\n",
1101             bar);
1102         rc = -ENODEV;
1103         goto fail3;
1104     }
1105 
1106     rc = pci_request_region(pci_dev, bar, "sfc");
1107     if (rc) {
1108         pci_err(efx->pci_dev,
1109             "request for memory BAR[%d] failed\n", bar);
1110         rc = -EIO;
1111         goto fail3;
1112     }
1113     efx->mem_bar = bar;
1114     efx->membase = ioremap(efx->membase_phys, mem_map_size);
1115     if (!efx->membase) {
1116         pci_err(efx->pci_dev,
1117             "could not map memory BAR[%d] at %llx+%x\n", bar,
1118             (unsigned long long)efx->membase_phys, mem_map_size);
1119         rc = -ENOMEM;
1120         goto fail4;
1121     }
1122     pci_dbg(efx->pci_dev,
1123         "memory BAR[%d] at %llx+%x (virtual %p)\n", bar,
1124         (unsigned long long)efx->membase_phys, mem_map_size,
1125         efx->membase);
1126 
1127     return 0;
1128 
1129 fail4:
1130     pci_release_region(efx->pci_dev, bar);
1131 fail3:
1132     efx->membase_phys = 0;
1133 fail2:
1134     pci_disable_device(efx->pci_dev);
1135 fail1:
1136     return rc;
1137 }
1138 
1139 void efx_fini_io(struct efx_nic *efx)
1140 {
1141     pci_dbg(efx->pci_dev, "shutting down I/O\n");
1142 
1143     if (efx->membase) {
1144         iounmap(efx->membase);
1145         efx->membase = NULL;
1146     }
1147 
1148     if (efx->membase_phys) {
1149         pci_release_region(efx->pci_dev, efx->mem_bar);
1150         efx->membase_phys = 0;
1151         efx->mem_bar = UINT_MAX;
1152     }
1153 
1154     /* Don't disable bus-mastering if VFs are assigned */
1155     if (!pci_vfs_assigned(efx->pci_dev))
1156         pci_disable_device(efx->pci_dev);
1157 }
1158 
1159 #ifdef CONFIG_SFC_MCDI_LOGGING
1160 static ssize_t mcdi_logging_show(struct device *dev,
1161                  struct device_attribute *attr,
1162                  char *buf)
1163 {
1164     struct efx_nic *efx = dev_get_drvdata(dev);
1165     struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
1166 
1167     return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
1168 }
1169 
1170 static ssize_t mcdi_logging_store(struct device *dev,
1171                   struct device_attribute *attr,
1172                   const char *buf, size_t count)
1173 {
1174     struct efx_nic *efx = dev_get_drvdata(dev);
1175     struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
1176     bool enable = count > 0 && *buf != '0';
1177 
1178     mcdi->logging_enabled = enable;
1179     return count;
1180 }
1181 
1182 static DEVICE_ATTR_RW(mcdi_logging);
1183 
1184 void efx_init_mcdi_logging(struct efx_nic *efx)
1185 {
1186     int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
1187 
1188     if (rc) {
1189         netif_warn(efx, drv, efx->net_dev,
1190                "failed to init net dev attributes\n");
1191     }
1192 }
1193 
1194 void efx_fini_mcdi_logging(struct efx_nic *efx)
1195 {
1196     device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
1197 }
1198 #endif
1199 
1200 /* A PCI error affecting this device was detected.
1201  * At this point MMIO and DMA may be disabled.
1202  * Stop the software path and request a slot reset.
1203  */
1204 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
1205                           pci_channel_state_t state)
1206 {
1207     pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1208     struct efx_nic *efx = pci_get_drvdata(pdev);
1209 
1210     if (state == pci_channel_io_perm_failure)
1211         return PCI_ERS_RESULT_DISCONNECT;
1212 
1213     rtnl_lock();
1214 
1215     if (efx->state != STATE_DISABLED) {
1216         efx->state = efx_recover(efx->state);
1217         efx->reset_pending = 0;
1218 
1219         efx_device_detach_sync(efx);
1220 
1221         if (efx_net_active(efx->state)) {
1222             efx_stop_all(efx);
1223             efx_disable_interrupts(efx);
1224         }
1225 
1226         status = PCI_ERS_RESULT_NEED_RESET;
1227     } else {
1228         /* If the interface is disabled we don't want to do anything
1229          * with it.
1230          */
1231         status = PCI_ERS_RESULT_RECOVERED;
1232     }
1233 
1234     rtnl_unlock();
1235 
1236     pci_disable_device(pdev);
1237 
1238     return status;
1239 }
1240 
1241 /* Fake a successful reset, which will be performed later in efx_io_resume. */
1242 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
1243 {
1244     struct efx_nic *efx = pci_get_drvdata(pdev);
1245     pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1246 
1247     if (pci_enable_device(pdev)) {
1248         netif_err(efx, hw, efx->net_dev,
1249               "Cannot re-enable PCI device after reset.\n");
1250         status =  PCI_ERS_RESULT_DISCONNECT;
1251     }
1252 
1253     return status;
1254 }
1255 
1256 /* Perform the actual reset and resume I/O operations. */
1257 static void efx_io_resume(struct pci_dev *pdev)
1258 {
1259     struct efx_nic *efx = pci_get_drvdata(pdev);
1260     int rc;
1261 
1262     rtnl_lock();
1263 
1264     if (efx->state == STATE_DISABLED)
1265         goto out;
1266 
1267     rc = efx_reset(efx, RESET_TYPE_ALL);
1268     if (rc) {
1269         netif_err(efx, hw, efx->net_dev,
1270               "efx_reset failed after PCI error (%d)\n", rc);
1271     } else {
1272         efx->state = efx_recovered(efx->state);
1273         netif_dbg(efx, hw, efx->net_dev,
1274               "Done resetting and resuming IO after PCI error.\n");
1275     }
1276 
1277 out:
1278     rtnl_unlock();
1279 }
1280 
1281 /* For simplicity and reliability, we always require a slot reset and try to
1282  * reset the hardware when a pci error affecting the device is detected.
1283  * We leave both the link_reset and mmio_enabled callback unimplemented:
1284  * with our request for slot reset the mmio_enabled callback will never be
1285  * called, and the link_reset callback is not used by AER or EEH mechanisms.
1286  */
1287 const struct pci_error_handlers efx_err_handlers = {
1288     .error_detected = efx_io_error_detected,
1289     .slot_reset = efx_io_slot_reset,
1290     .resume     = efx_io_resume,
1291 };
1292 
1293 /* Determine whether the NIC will be able to handle TX offloads for a given
1294  * encapsulated packet.
1295  */
1296 static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb)
1297 {
1298     struct gre_base_hdr *greh;
1299     __be16 dst_port;
1300     u8 ipproto;
1301 
1302     /* Does the NIC support encap offloads?
1303      * If not, we should never get here, because we shouldn't have
1304      * advertised encap offload feature flags in the first place.
1305      */
1306     if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port))
1307         return false;
1308 
1309     /* Determine encapsulation protocol in use */
1310     switch (skb->protocol) {
1311     case htons(ETH_P_IP):
1312         ipproto = ip_hdr(skb)->protocol;
1313         break;
1314     case htons(ETH_P_IPV6):
1315         /* If there are extension headers, this will cause us to
1316          * think we can't offload something that we maybe could have.
1317          */
1318         ipproto = ipv6_hdr(skb)->nexthdr;
1319         break;
1320     default:
1321         /* Not IP, so can't offload it */
1322         return false;
1323     }
1324     switch (ipproto) {
1325     case IPPROTO_GRE:
1326         /* We support NVGRE but not IP over GRE or random gretaps.
1327          * Specifically, the NIC will accept GRE as encapsulated if
1328          * the inner protocol is Ethernet, but only handle it
1329          * correctly if the GRE header is 8 bytes long.  Moreover,
1330          * it will not update the Checksum or Sequence Number fields
1331          * if they are present.  (The Routing Present flag,
1332          * GRE_ROUTING, cannot be set else the header would be more
1333          * than 8 bytes long; so we don't have to worry about it.)
1334          */
1335         if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
1336             return false;
1337         if (ntohs(skb->inner_protocol) != ETH_P_TEB)
1338             return false;
1339         if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8)
1340             return false;
1341         greh = (struct gre_base_hdr *)skb_transport_header(skb);
1342         return !(greh->flags & (GRE_CSUM | GRE_SEQ));
1343     case IPPROTO_UDP:
1344         /* If the port is registered for a UDP tunnel, we assume the
1345          * packet is for that tunnel, and the NIC will handle it as
1346          * such.  If not, the NIC won't know what to do with it.
1347          */
1348         dst_port = udp_hdr(skb)->dest;
1349         return efx->type->udp_tnl_has_port(efx, dst_port);
1350     default:
1351         return false;
1352     }
1353 }
1354 
1355 netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
1356                      netdev_features_t features)
1357 {
1358     struct efx_nic *efx = efx_netdev_priv(dev);
1359 
1360     if (skb->encapsulation) {
1361         if (features & NETIF_F_GSO_MASK)
1362             /* Hardware can only do TSO with at most 208 bytes
1363              * of headers.
1364              */
1365             if (skb_inner_transport_offset(skb) >
1366                 EFX_TSO2_MAX_HDRLEN)
1367                 features &= ~(NETIF_F_GSO_MASK);
1368         if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK))
1369             if (!efx_can_encap_offloads(efx, skb))
1370                 features &= ~(NETIF_F_GSO_MASK |
1371                           NETIF_F_CSUM_MASK);
1372     }
1373     return features;
1374 }
1375 
1376 int efx_get_phys_port_id(struct net_device *net_dev,
1377              struct netdev_phys_item_id *ppid)
1378 {
1379     struct efx_nic *efx = efx_netdev_priv(net_dev);
1380 
1381     if (efx->type->get_phys_port_id)
1382         return efx->type->get_phys_port_id(efx, ppid);
1383     else
1384         return -EOPNOTSUPP;
1385 }
1386 
1387 int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len)
1388 {
1389     struct efx_nic *efx = efx_netdev_priv(net_dev);
1390 
1391     if (snprintf(name, len, "p%u", efx->port_num) >= len)
1392         return -EINVAL;
1393     return 0;
1394 }
1395 
1396 void efx_detach_reps(struct efx_nic *efx)
1397 {
1398     struct net_device *rep_dev;
1399     struct efx_rep *efv;
1400 
1401     ASSERT_RTNL();
1402     netif_dbg(efx, drv, efx->net_dev, "Detaching VF representors\n");
1403     list_for_each_entry(efv, &efx->vf_reps, list) {
1404         rep_dev = efv->net_dev;
1405         if (!rep_dev)
1406             continue;
1407         netif_carrier_off(rep_dev);
1408         /* See efx_device_detach_sync() */
1409         netif_tx_lock_bh(rep_dev);
1410         netif_tx_stop_all_queues(rep_dev);
1411         netif_tx_unlock_bh(rep_dev);
1412     }
1413 }
1414 
1415 void efx_attach_reps(struct efx_nic *efx)
1416 {
1417     struct net_device *rep_dev;
1418     struct efx_rep *efv;
1419 
1420     ASSERT_RTNL();
1421     netif_dbg(efx, drv, efx->net_dev, "Attaching VF representors\n");
1422     list_for_each_entry(efv, &efx->vf_reps, list) {
1423         rep_dev = efv->net_dev;
1424         if (!rep_dev)
1425             continue;
1426         netif_tx_wake_all_queues(rep_dev);
1427         netif_carrier_on(rep_dev);
1428     }
1429 }