chelsio/cxgb4vf/cxgb4vf_main.c

0001 /*
0002  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
0003  * driver for Linux.
0004  *
0005  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
0006  *
0007  * This software is available to you under a choice of one of two
0008  * licenses.  You may choose to be licensed under the terms of the GNU
0009  * General Public License (GPL) Version 2, available from the file
0010  * COPYING in the main directory of this source tree, or the
0011  * OpenIB.org BSD license below:
0012  *
0013  *     Redistribution and use in source and binary forms, with or
0014  *     without modification, are permitted provided that the following
0015  *     conditions are met:
0016  *
0017  *      - Redistributions of source code must retain the above
0018  *        copyright notice, this list of conditions and the following
0019  *        disclaimer.
0020  *
0021  *      - Redistributions in binary form must reproduce the above
0022  *        copyright notice, this list of conditions and the following
0023  *        disclaimer in the documentation and/or other materials
0024  *        provided with the distribution.
0025  *
0026  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0027  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0028  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0029  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0030  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0031  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0032  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0033  * SOFTWARE.
0034  */
0035
0036 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0037
0038 #include <linux/module.h>
0039 #include <linux/moduleparam.h>
0040 #include <linux/init.h>
0041 #include <linux/pci.h>
0042 #include <linux/dma-mapping.h>
0043 #include <linux/netdevice.h>
0044 #include <linux/etherdevice.h>
0045 #include <linux/debugfs.h>
0046 #include <linux/ethtool.h>
0047 #include <linux/mdio.h>
0048
0049 #include "t4vf_common.h"
0050 #include "t4vf_defs.h"
0051
0052 #include "../cxgb4/t4_regs.h"
0053 #include "../cxgb4/t4_msg.h"
0054
0055 /*
0056  * Generic information about the driver.
0057  */
0058 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
0059
0060 /*
0061  * Module Parameters.
0062  * ==================
0063  */
0064
0065 /*
0066  * Default ethtool "message level" for adapters.
0067  */
0068 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
0069              NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
0070              NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
0071
0072 /*
0073  * The driver uses the best interrupt scheme available on a platform in the
0074  * order MSI-X then MSI.  This parameter determines which of these schemes the
0075  * driver may consider as follows:
0076  *
0077  *     msi = 2: choose from among MSI-X and MSI
0078  *     msi = 1: only consider MSI interrupts
0079  *
0080  * Note that unlike the Physical Function driver, this Virtual Function driver
0081  * does _not_ support legacy INTx interrupts (this limitation is mandated by
0082  * the PCI-E SR-IOV standard).
0083  */
0084 #define MSI_MSIX    2
0085 #define MSI_MSI     1
0086 #define MSI_DEFAULT MSI_MSIX
0087
0088 static int msi = MSI_DEFAULT;
0089
0090 module_param(msi, int, 0644);
0091 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
0092
0093 /*
0094  * Fundamental constants.
0095  * ======================
0096  */
0097
0098 enum {
0099     MAX_TXQ_ENTRIES     = 16384,
0100     MAX_RSPQ_ENTRIES    = 16384,
0101     MAX_RX_BUFFERS      = 16384,
0102
0103     MIN_TXQ_ENTRIES     = 32,
0104     MIN_RSPQ_ENTRIES    = 128,
0105     MIN_FL_ENTRIES      = 16,
0106
0107     /*
0108      * For purposes of manipulating the Free List size we need to
0109      * recognize that Free Lists are actually Egress Queues (the host
0110      * produces free buffers which the hardware consumes), Egress Queues
0111      * indices are all in units of Egress Context Units bytes, and free
0112      * list entries are 64-bit PCI DMA addresses.  And since the state of
0113      * the Producer Index == the Consumer Index implies an EMPTY list, we
0114      * always have at least one Egress Unit's worth of Free List entries
0115      * unused.  See sge.c for more details ...
0116      */
0117     EQ_UNIT = SGE_EQ_IDXSIZE,
0118     FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
0119     MIN_FL_RESID = FL_PER_EQ_UNIT,
0120 };
0121
0122 /*
0123  * Global driver state.
0124  * ====================
0125  */
0126
0127 static struct dentry *cxgb4vf_debugfs_root;
0128
0129 /*
0130  * OS "Callback" functions.
0131  * ========================
0132  */
0133
0134 /*
0135  * The link status has changed on the indicated "port" (Virtual Interface).
0136  */
0137 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
0138 {
0139     struct net_device *dev = adapter->port[pidx];
0140
0141     /*
0142      * If the port is disabled or the current recorded "link up"
0143      * status matches the new status, just return.
0144      */
0145     if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
0146         return;
0147
0148     /*
0149      * Tell the OS that the link status has changed and print a short
0150      * informative message on the console about the event.
0151      */
0152     if (link_ok) {
0153         const char *s;
0154         const char *fc;
0155         const struct port_info *pi = netdev_priv(dev);
0156
0157         netif_carrier_on(dev);
0158
0159         switch (pi->link_cfg.speed) {
0160         case 100:
0161             s = "100Mbps";
0162             break;
0163         case 1000:
0164             s = "1Gbps";
0165             break;
0166         case 10000:
0167             s = "10Gbps";
0168             break;
0169         case 25000:
0170             s = "25Gbps";
0171             break;
0172         case 40000:
0173             s = "40Gbps";
0174             break;
0175         case 100000:
0176             s = "100Gbps";
0177             break;
0178
0179         default:
0180             s = "unknown";
0181             break;
0182         }
0183
0184         switch ((int)pi->link_cfg.fc) {
0185         case PAUSE_RX:
0186             fc = "RX";
0187             break;
0188
0189         case PAUSE_TX:
0190             fc = "TX";
0191             break;
0192
0193         case PAUSE_RX | PAUSE_TX:
0194             fc = "RX/TX";
0195             break;
0196
0197         default:
0198             fc = "no";
0199             break;
0200         }
0201
0202         netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
0203     } else {
0204         netif_carrier_off(dev);
0205         netdev_info(dev, "link down\n");
0206     }
0207 }
0208
0209 /*
0210  * THe port module type has changed on the indicated "port" (Virtual
0211  * Interface).
0212  */
0213 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
0214 {
0215     static const char * const mod_str[] = {
0216         NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
0217     };
0218     const struct net_device *dev = adapter->port[pidx];
0219     const struct port_info *pi = netdev_priv(dev);
0220
0221     if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
0222         dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
0223              dev->name);
0224     else if (pi->mod_type < ARRAY_SIZE(mod_str))
0225         dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
0226              dev->name, mod_str[pi->mod_type]);
0227     else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
0228         dev_info(adapter->pdev_dev, "%s: unsupported optical port "
0229              "module inserted\n", dev->name);
0230     else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
0231         dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
0232              "forcing TWINAX\n", dev->name);
0233     else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
0234         dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
0235              dev->name);
0236     else
0237         dev_info(adapter->pdev_dev, "%s: unknown module type %d "
0238              "inserted\n", dev->name, pi->mod_type);
0239 }
0240
0241 static int cxgb4vf_set_addr_hash(struct port_info *pi)
0242 {
0243     struct adapter *adapter = pi->adapter;
0244     u64 vec = 0;
0245     bool ucast = false;
0246     struct hash_mac_addr *entry;
0247
0248     /* Calculate the hash vector for the updated list and program it */
0249     list_for_each_entry(entry, &adapter->mac_hlist, list) {
0250         ucast |= is_unicast_ether_addr(entry->addr);
0251         vec |= (1ULL << hash_mac_addr(entry->addr));
0252     }
0253     return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
0254 }
0255
0256 /**
0257  *  cxgb4vf_change_mac - Update match filter for a MAC address.
0258  *  @pi: the port_info
0259  *  @viid: the VI id
0260  *  @tcam_idx: TCAM index of existing filter for old value of MAC address,
0261  *         or -1
0262  *  @addr: the new MAC address value
0263  *  @persistent: whether a new MAC allocation should be persistent
0264  *
0265  *  Modifies an MPS filter and sets it to the new MAC address if
0266  *  @tcam_idx >= 0, or adds the MAC address to a new filter if
0267  *  @tcam_idx < 0. In the latter case the address is added persistently
0268  *  if @persist is %true.
0269  *  Addresses are programmed to hash region, if tcam runs out of entries.
0270  *
0271  */
0272 static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid,
0273                   int *tcam_idx, const u8 *addr, bool persistent)
0274 {
0275     struct hash_mac_addr *new_entry, *entry;
0276     struct adapter *adapter = pi->adapter;
0277     int ret;
0278
0279     ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent);
0280     /* We ran out of TCAM entries. try programming hash region. */
0281     if (ret == -ENOMEM) {
0282         /* If the MAC address to be updated is in the hash addr
0283          * list, update it from the list
0284          */
0285         list_for_each_entry(entry, &adapter->mac_hlist, list) {
0286             if (entry->iface_mac) {
0287                 ether_addr_copy(entry->addr, addr);
0288                 goto set_hash;
0289             }
0290         }
0291         new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
0292         if (!new_entry)
0293             return -ENOMEM;
0294         ether_addr_copy(new_entry->addr, addr);
0295         new_entry->iface_mac = true;
0296         list_add_tail(&new_entry->list, &adapter->mac_hlist);
0297 set_hash:
0298         ret = cxgb4vf_set_addr_hash(pi);
0299     } else if (ret >= 0) {
0300         *tcam_idx = ret;
0301         ret = 0;
0302     }
0303
0304     return ret;
0305 }
0306
0307 /*
0308  * Net device operations.
0309  * ======================
0310  */
0311
0312
0313
0314
0315 /*
0316  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
0317  * Interface).
0318  */
0319 static int link_start(struct net_device *dev)
0320 {
0321     int ret;
0322     struct port_info *pi = netdev_priv(dev);
0323
0324     /*
0325      * We do not set address filters and promiscuity here, the stack does
0326      * that step explicitly. Enable vlan accel.
0327      */
0328     ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
0329                   true);
0330     if (ret == 0)
0331         ret = cxgb4vf_change_mac(pi, pi->viid,
0332                      &pi->xact_addr_filt,
0333                      dev->dev_addr, true);
0334
0335     /*
0336      * We don't need to actually "start the link" itself since the
0337      * firmware will do that for us when the first Virtual Interface
0338      * is enabled on a port.
0339      */
0340     if (ret == 0)
0341         ret = t4vf_enable_pi(pi->adapter, pi, true, true);
0342
0343     return ret;
0344 }
0345
0346 /*
0347  * Name the MSI-X interrupts.
0348  */
0349 static void name_msix_vecs(struct adapter *adapter)
0350 {
0351     int namelen = sizeof(adapter->msix_info[0].desc) - 1;
0352     int pidx;
0353
0354     /*
0355      * Firmware events.
0356      */
0357     snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
0358          "%s-FWeventq", adapter->name);
0359     adapter->msix_info[MSIX_FW].desc[namelen] = 0;
0360
0361     /*
0362      * Ethernet queues.
0363      */
0364     for_each_port(adapter, pidx) {
0365         struct net_device *dev = adapter->port[pidx];
0366         const struct port_info *pi = netdev_priv(dev);
0367         int qs, msi;
0368
0369         for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
0370             snprintf(adapter->msix_info[msi].desc, namelen,
0371                  "%s-%d", dev->name, qs);
0372             adapter->msix_info[msi].desc[namelen] = 0;
0373         }
0374     }
0375 }
0376
0377 /*
0378  * Request all of our MSI-X resources.
0379  */
0380 static int request_msix_queue_irqs(struct adapter *adapter)
0381 {
0382     struct sge *s = &adapter->sge;
0383     int rxq, msi, err;
0384
0385     /*
0386      * Firmware events.
0387      */
0388     err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
0389               0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
0390     if (err)
0391         return err;
0392
0393     /*
0394      * Ethernet queues.
0395      */
0396     msi = MSIX_IQFLINT;
0397     for_each_ethrxq(s, rxq) {
0398         err = request_irq(adapter->msix_info[msi].vec,
0399                   t4vf_sge_intr_msix, 0,
0400                   adapter->msix_info[msi].desc,
0401                   &s->ethrxq[rxq].rspq);
0402         if (err)
0403             goto err_free_irqs;
0404         msi++;
0405     }
0406     return 0;
0407
0408 err_free_irqs:
0409     while (--rxq >= 0)
0410         free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
0411     free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
0412     return err;
0413 }
0414
0415 /*
0416  * Free our MSI-X resources.
0417  */
0418 static void free_msix_queue_irqs(struct adapter *adapter)
0419 {
0420     struct sge *s = &adapter->sge;
0421     int rxq, msi;
0422
0423     free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
0424     msi = MSIX_IQFLINT;
0425     for_each_ethrxq(s, rxq)
0426         free_irq(adapter->msix_info[msi++].vec,
0427              &s->ethrxq[rxq].rspq);
0428 }
0429
0430 /*
0431  * Turn on NAPI and start up interrupts on a response queue.
0432  */
0433 static void qenable(struct sge_rspq *rspq)
0434 {
0435     napi_enable(&rspq->napi);
0436
0437     /*
0438      * 0-increment the Going To Sleep register to start the timer and
0439      * enable interrupts.
0440      */
0441     t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
0442              CIDXINC_V(0) |
0443              SEINTARM_V(rspq->intr_params) |
0444              INGRESSQID_V(rspq->cntxt_id));
0445 }
0446
0447 /*
0448  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
0449  */
0450 static void enable_rx(struct adapter *adapter)
0451 {
0452     int rxq;
0453     struct sge *s = &adapter->sge;
0454
0455     for_each_ethrxq(s, rxq)
0456         qenable(&s->ethrxq[rxq].rspq);
0457     qenable(&s->fw_evtq);
0458
0459     /*
0460      * The interrupt queue doesn't use NAPI so we do the 0-increment of
0461      * its Going To Sleep register here to get it started.
0462      */
0463     if (adapter->flags & CXGB4VF_USING_MSI)
0464         t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
0465                  CIDXINC_V(0) |
0466                  SEINTARM_V(s->intrq.intr_params) |
0467                  INGRESSQID_V(s->intrq.cntxt_id));
0468
0469 }
0470
0471 /*
0472  * Wait until all NAPI handlers are descheduled.
0473  */
0474 static void quiesce_rx(struct adapter *adapter)
0475 {
0476     struct sge *s = &adapter->sge;
0477     int rxq;
0478
0479     for_each_ethrxq(s, rxq)
0480         napi_disable(&s->ethrxq[rxq].rspq.napi);
0481     napi_disable(&s->fw_evtq.napi);
0482 }
0483
0484 /*
0485  * Response queue handler for the firmware event queue.
0486  */
0487 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
0488               const struct pkt_gl *gl)
0489 {
0490     /*
0491      * Extract response opcode and get pointer to CPL message body.
0492      */
0493     struct adapter *adapter = rspq->adapter;
0494     u8 opcode = ((const struct rss_header *)rsp)->opcode;
0495     void *cpl = (void *)(rsp + 1);
0496
0497     switch (opcode) {
0498     case CPL_FW6_MSG: {
0499         /*
0500          * We've received an asynchronous message from the firmware.
0501          */
0502         const struct cpl_fw6_msg *fw_msg = cpl;
0503         if (fw_msg->type == FW6_TYPE_CMD_RPL)
0504             t4vf_handle_fw_rpl(adapter, fw_msg->data);
0505         break;
0506     }
0507
0508     case CPL_FW4_MSG: {
0509         /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
0510          */
0511         const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
0512         opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
0513         if (opcode != CPL_SGE_EGR_UPDATE) {
0514             dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
0515                 , opcode);
0516             break;
0517         }
0518         cpl = (void *)p;
0519     }
0520         fallthrough;
0521
0522     case CPL_SGE_EGR_UPDATE: {
0523         /*
0524          * We've received an Egress Queue Status Update message.  We
0525          * get these, if the SGE is configured to send these when the
0526          * firmware passes certain points in processing our TX
0527          * Ethernet Queue or if we make an explicit request for one.
0528          * We use these updates to determine when we may need to
0529          * restart a TX Ethernet Queue which was stopped for lack of
0530          * free TX Queue Descriptors ...
0531          */
0532         const struct cpl_sge_egr_update *p = cpl;
0533         unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
0534         struct sge *s = &adapter->sge;
0535         struct sge_txq *tq;
0536         struct sge_eth_txq *txq;
0537         unsigned int eq_idx;
0538
0539         /*
0540          * Perform sanity checking on the Queue ID to make sure it
0541          * really refers to one of our TX Ethernet Egress Queues which
0542          * is active and matches the queue's ID.  None of these error
0543          * conditions should ever happen so we may want to either make
0544          * them fatal and/or conditionalized under DEBUG.
0545          */
0546         eq_idx = EQ_IDX(s, qid);
0547         if (unlikely(eq_idx >= MAX_EGRQ)) {
0548             dev_err(adapter->pdev_dev,
0549                 "Egress Update QID %d out of range\n", qid);
0550             break;
0551         }
0552         tq = s->egr_map[eq_idx];
0553         if (unlikely(tq == NULL)) {
0554             dev_err(adapter->pdev_dev,
0555                 "Egress Update QID %d TXQ=NULL\n", qid);
0556             break;
0557         }
0558         txq = container_of(tq, struct sge_eth_txq, q);
0559         if (unlikely(tq->abs_id != qid)) {
0560             dev_err(adapter->pdev_dev,
0561                 "Egress Update QID %d refers to TXQ %d\n",
0562                 qid, tq->abs_id);
0563             break;
0564         }
0565
0566         /*
0567          * Restart a stopped TX Queue which has less than half of its
0568          * TX ring in use ...
0569          */
0570         txq->q.restarts++;
0571         netif_tx_wake_queue(txq->txq);
0572         break;
0573     }
0574
0575     default:
0576         dev_err(adapter->pdev_dev,
0577             "unexpected CPL %#x on FW event queue\n", opcode);
0578     }
0579
0580     return 0;
0581 }
0582
0583 /*
0584  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
0585  * to use and initializes them.  We support multiple "Queue Sets" per port if
0586  * we have MSI-X, otherwise just one queue set per port.
0587  */
0588 static int setup_sge_queues(struct adapter *adapter)
0589 {
0590     struct sge *s = &adapter->sge;
0591     int err, pidx, msix;
0592
0593     /*
0594      * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
0595      * state.
0596      */
0597     bitmap_zero(s->starving_fl, MAX_EGRQ);
0598
0599     /*
0600      * If we're using MSI interrupt mode we need to set up a "forwarded
0601      * interrupt" queue which we'll set up with our MSI vector.  The rest
0602      * of the ingress queues will be set up to forward their interrupts to
0603      * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
0604      * the intrq's queue ID as the interrupt forwarding queue for the
0605      * subsequent calls ...
0606      */
0607     if (adapter->flags & CXGB4VF_USING_MSI) {
0608         err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
0609                      adapter->port[0], 0, NULL, NULL);
0610         if (err)
0611             goto err_free_queues;
0612     }
0613
0614     /*
0615      * Allocate our ingress queue for asynchronous firmware messages.
0616      */
0617     err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
0618                  MSIX_FW, NULL, fwevtq_handler);
0619     if (err)
0620         goto err_free_queues;
0621
0622     /*
0623      * Allocate each "port"'s initial Queue Sets.  These can be changed
0624      * later on ... up to the point where any interface on the adapter is
0625      * brought up at which point lots of things get nailed down
0626      * permanently ...
0627      */
0628     msix = MSIX_IQFLINT;
0629     for_each_port(adapter, pidx) {
0630         struct net_device *dev = adapter->port[pidx];
0631         struct port_info *pi = netdev_priv(dev);
0632         struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
0633         struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
0634         int qs;
0635
0636         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
0637             err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
0638                          dev, msix++,
0639                          &rxq->fl, t4vf_ethrx_handler);
0640             if (err)
0641                 goto err_free_queues;
0642
0643             err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
0644                          netdev_get_tx_queue(dev, qs),
0645                          s->fw_evtq.cntxt_id);
0646             if (err)
0647                 goto err_free_queues;
0648
0649             rxq->rspq.idx = qs;
0650             memset(&rxq->stats, 0, sizeof(rxq->stats));
0651         }
0652     }
0653
0654     /*
0655      * Create the reverse mappings for the queues.
0656      */
0657     s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
0658     s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
0659     IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
0660     for_each_port(adapter, pidx) {
0661         struct net_device *dev = adapter->port[pidx];
0662         struct port_info *pi = netdev_priv(dev);
0663         struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
0664         struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
0665         int qs;
0666
0667         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
0668             IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
0669             EQ_MAP(s, txq->q.abs_id) = &txq->q;
0670
0671             /*
0672              * The FW_IQ_CMD doesn't return the Absolute Queue IDs
0673              * for Free Lists but since all of the Egress Queues
0674              * (including Free Lists) have Relative Queue IDs
0675              * which are computed as Absolute - Base Queue ID, we
0676              * can synthesize the Absolute Queue IDs for the Free
0677              * Lists.  This is useful for debugging purposes when
0678              * we want to dump Queue Contexts via the PF Driver.
0679              */
0680             rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
0681             EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
0682         }
0683     }
0684     return 0;
0685
0686 err_free_queues:
0687     t4vf_free_sge_resources(adapter);
0688     return err;
0689 }
0690
0691 /*
0692  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
0693  * queues.  We configure the RSS CPU lookup table to distribute to the number
0694  * of HW receive queues, and the response queue lookup table to narrow that
0695  * down to the response queues actually configured for each "port" (Virtual
0696  * Interface).  We always configure the RSS mapping for all ports since the
0697  * mapping table has plenty of entries.
0698  */
0699 static int setup_rss(struct adapter *adapter)
0700 {
0701     int pidx;
0702
0703     for_each_port(adapter, pidx) {
0704         struct port_info *pi = adap2pinfo(adapter, pidx);
0705         struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
0706         u16 rss[MAX_PORT_QSETS];
0707         int qs, err;
0708
0709         for (qs = 0; qs < pi->nqsets; qs++)
0710             rss[qs] = rxq[qs].rspq.abs_id;
0711
0712         err = t4vf_config_rss_range(adapter, pi->viid,
0713                         0, pi->rss_size, rss, pi->nqsets);
0714         if (err)
0715             return err;
0716
0717         /*
0718          * Perform Global RSS Mode-specific initialization.
0719          */
0720         switch (adapter->params.rss.mode) {
0721         case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
0722             /*
0723              * If Tunnel All Lookup isn't specified in the global
0724              * RSS Configuration, then we need to specify a
0725              * default Ingress Queue for any ingress packets which
0726              * aren't hashed.  We'll use our first ingress queue
0727              * ...
0728              */
0729             if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
0730                 union rss_vi_config config;
0731                 err = t4vf_read_rss_vi_config(adapter,
0732                                   pi->viid,
0733                                   &config);
0734                 if (err)
0735                     return err;
0736                 config.basicvirtual.defaultq =
0737                     rxq[0].rspq.abs_id;
0738                 err = t4vf_write_rss_vi_config(adapter,
0739                                    pi->viid,
0740                                    &config);
0741                 if (err)
0742                     return err;
0743             }
0744             break;
0745         }
0746     }
0747
0748     return 0;
0749 }
0750
0751 /*
0752  * Bring the adapter up.  Called whenever we go from no "ports" open to having
0753  * one open.  This function performs the actions necessary to make an adapter
0754  * operational, such as completing the initialization of HW modules, and
0755  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
0756  * this is called "cxgb_up" in the PF Driver.)
0757  */
0758 static int adapter_up(struct adapter *adapter)
0759 {
0760     int err;
0761
0762     /*
0763      * If this is the first time we've been called, perform basic
0764      * adapter setup.  Once we've done this, many of our adapter
0765      * parameters can no longer be changed ...
0766      */
0767     if ((adapter->flags & CXGB4VF_FULL_INIT_DONE) == 0) {
0768         err = setup_sge_queues(adapter);
0769         if (err)
0770             return err;
0771         err = setup_rss(adapter);
0772         if (err) {
0773             t4vf_free_sge_resources(adapter);
0774             return err;
0775         }
0776
0777         if (adapter->flags & CXGB4VF_USING_MSIX)
0778             name_msix_vecs(adapter);
0779
0780         adapter->flags |= CXGB4VF_FULL_INIT_DONE;
0781     }
0782
0783     /*
0784      * Acquire our interrupt resources.  We only support MSI-X and MSI.
0785      */
0786     BUG_ON((adapter->flags &
0787            (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
0788     if (adapter->flags & CXGB4VF_USING_MSIX)
0789         err = request_msix_queue_irqs(adapter);
0790     else
0791         err = request_irq(adapter->pdev->irq,
0792                   t4vf_intr_handler(adapter), 0,
0793                   adapter->name, adapter);
0794     if (err) {
0795         dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
0796             err);
0797         return err;
0798     }
0799
0800     /*
0801      * Enable NAPI ingress processing and return success.
0802      */
0803     enable_rx(adapter);
0804     t4vf_sge_start(adapter);
0805
0806     return 0;
0807 }
0808
0809 /*
0810  * Bring the adapter down.  Called whenever the last "port" (Virtual
0811  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
0812  * Driver.)
0813  */
0814 static void adapter_down(struct adapter *adapter)
0815 {
0816     /*
0817      * Free interrupt resources.
0818      */
0819     if (adapter->flags & CXGB4VF_USING_MSIX)
0820         free_msix_queue_irqs(adapter);
0821     else
0822         free_irq(adapter->pdev->irq, adapter);
0823
0824     /*
0825      * Wait for NAPI handlers to finish.
0826      */
0827     quiesce_rx(adapter);
0828 }
0829
0830 /*
0831  * Start up a net device.
0832  */
0833 static int cxgb4vf_open(struct net_device *dev)
0834 {
0835     int err;
0836     struct port_info *pi = netdev_priv(dev);
0837     struct adapter *adapter = pi->adapter;
0838
0839     /*
0840      * If we don't have a connection to the firmware there's nothing we
0841      * can do.
0842      */
0843     if (!(adapter->flags & CXGB4VF_FW_OK))
0844         return -ENXIO;
0845
0846     /*
0847      * If this is the first interface that we're opening on the "adapter",
0848      * bring the "adapter" up now.
0849      */
0850     if (adapter->open_device_map == 0) {
0851         err = adapter_up(adapter);
0852         if (err)
0853             return err;
0854     }
0855
0856     /* It's possible that the basic port information could have
0857      * changed since we first read it.
0858      */
0859     err = t4vf_update_port_info(pi);
0860     if (err < 0)
0861         return err;
0862
0863     /*
0864      * Note that this interface is up and start everything up ...
0865      */
0866     err = link_start(dev);
0867     if (err)
0868         goto err_unwind;
0869
0870     pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
0871
0872     netif_tx_start_all_queues(dev);
0873     set_bit(pi->port_id, &adapter->open_device_map);
0874     return 0;
0875
0876 err_unwind:
0877     if (adapter->open_device_map == 0)
0878         adapter_down(adapter);
0879     return err;
0880 }
0881
0882 /*
0883  * Shut down a net device.  This routine is called "cxgb_close" in the PF
0884  * Driver ...
0885  */
0886 static int cxgb4vf_stop(struct net_device *dev)
0887 {
0888     struct port_info *pi = netdev_priv(dev);
0889     struct adapter *adapter = pi->adapter;
0890
0891     netif_tx_stop_all_queues(dev);
0892     netif_carrier_off(dev);
0893     t4vf_enable_pi(adapter, pi, false, false);
0894
0895     clear_bit(pi->port_id, &adapter->open_device_map);
0896     if (adapter->open_device_map == 0)
0897         adapter_down(adapter);
0898     return 0;
0899 }
0900
0901 /*
0902  * Translate our basic statistics into the standard "ifconfig" statistics.
0903  */
0904 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
0905 {
0906     struct t4vf_port_stats stats;
0907     struct port_info *pi = netdev2pinfo(dev);
0908     struct adapter *adapter = pi->adapter;
0909     struct net_device_stats *ns = &dev->stats;
0910     int err;
0911
0912     spin_lock(&adapter->stats_lock);
0913     err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
0914     spin_unlock(&adapter->stats_lock);
0915
0916     memset(ns, 0, sizeof(*ns));
0917     if (err)
0918         return ns;
0919
0920     ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
0921             stats.tx_ucast_bytes + stats.tx_offload_bytes);
0922     ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
0923               stats.tx_ucast_frames + stats.tx_offload_frames);
0924     ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
0925             stats.rx_ucast_bytes);
0926     ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
0927               stats.rx_ucast_frames);
0928     ns->multicast = stats.rx_mcast_frames;
0929     ns->tx_errors = stats.tx_drop_frames;
0930     ns->rx_errors = stats.rx_err_frames;
0931
0932     return ns;
0933 }
0934
0935 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
0936 {
0937     struct port_info *pi = netdev_priv(netdev);
0938     struct adapter *adapter = pi->adapter;
0939     int ret;
0940     u64 mhash = 0;
0941     u64 uhash = 0;
0942     bool free = false;
0943     bool ucast = is_unicast_ether_addr(mac_addr);
0944     const u8 *maclist[1] = {mac_addr};
0945     struct hash_mac_addr *new_entry;
0946
0947     ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
0948                   NULL, ucast ? &uhash : &mhash, false);
0949     if (ret < 0)
0950         goto out;
0951     /* if hash != 0, then add the addr to hash addr list
0952      * so on the end we will calculate the hash for the
0953      * list and program it
0954      */
0955     if (uhash || mhash) {
0956         new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
0957         if (!new_entry)
0958             return -ENOMEM;
0959         ether_addr_copy(new_entry->addr, mac_addr);
0960         list_add_tail(&new_entry->list, &adapter->mac_hlist);
0961         ret = cxgb4vf_set_addr_hash(pi);
0962     }
0963 out:
0964     return ret < 0 ? ret : 0;
0965 }
0966
0967 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
0968 {
0969     struct port_info *pi = netdev_priv(netdev);
0970     struct adapter *adapter = pi->adapter;
0971     int ret;
0972     const u8 *maclist[1] = {mac_addr};
0973     struct hash_mac_addr *entry, *tmp;
0974
0975     /* If the MAC address to be removed is in the hash addr
0976      * list, delete it from the list and update hash vector
0977      */
0978     list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
0979         if (ether_addr_equal(entry->addr, mac_addr)) {
0980             list_del(&entry->list);
0981             kfree(entry);
0982             return cxgb4vf_set_addr_hash(pi);
0983         }
0984     }
0985
0986     ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
0987     return ret < 0 ? -EINVAL : 0;
0988 }
0989
0990 /*
0991  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
0992  * If @mtu is -1 it is left unchanged.
0993  */
0994 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
0995 {
0996     struct port_info *pi = netdev_priv(dev);
0997
0998     __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
0999     __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1000     return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
1001                    (dev->flags & IFF_PROMISC) != 0,
1002                    (dev->flags & IFF_ALLMULTI) != 0,
1003                    1, -1, sleep_ok);
1004 }
1005
1006 /*
1007  * Set the current receive modes on the device.
1008  */
1009 static void cxgb4vf_set_rxmode(struct net_device *dev)
1010 {
1011     /* unfortunately we can't return errors to the stack */
1012     set_rxmode(dev, -1, false);
1013 }
1014
1015 /*
1016  * Find the entry in the interrupt holdoff timer value array which comes
1017  * closest to the specified interrupt holdoff value.
1018  */
1019 static int closest_timer(const struct sge *s, int us)
1020 {
1021     int i, timer_idx = 0, min_delta = INT_MAX;
1022
1023     for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1024         int delta = us - s->timer_val[i];
1025         if (delta < 0)
1026             delta = -delta;
1027         if (delta < min_delta) {
1028             min_delta = delta;
1029             timer_idx = i;
1030         }
1031     }
1032     return timer_idx;
1033 }
1034
1035 static int closest_thres(const struct sge *s, int thres)
1036 {
1037     int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1038
1039     for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1040         delta = thres - s->counter_val[i];
1041         if (delta < 0)
1042             delta = -delta;
1043         if (delta < min_delta) {
1044             min_delta = delta;
1045             pktcnt_idx = i;
1046         }
1047     }
1048     return pktcnt_idx;
1049 }
1050
1051 /*
1052  * Return a queue's interrupt hold-off time in us.  0 means no timer.
1053  */
1054 static unsigned int qtimer_val(const struct adapter *adapter,
1055                    const struct sge_rspq *rspq)
1056 {
1057     unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1058
1059     return timer_idx < SGE_NTIMERS
1060         ? adapter->sge.timer_val[timer_idx]
1061         : 0;
1062 }
1063
1064 /**
1065  *  set_rxq_intr_params - set a queue's interrupt holdoff parameters
1066  *  @adapter: the adapter
1067  *  @rspq: the RX response queue
1068  *  @us: the hold-off time in us, or 0 to disable timer
1069  *  @cnt: the hold-off packet count, or 0 to disable counter
1070  *
1071  *  Sets an RX response queue's interrupt hold-off time and packet count.
1072  *  At least one of the two needs to be enabled for the queue to generate
1073  *  interrupts.
1074  */
1075 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1076                    unsigned int us, unsigned int cnt)
1077 {
1078     unsigned int timer_idx;
1079
1080     /*
1081      * If both the interrupt holdoff timer and count are specified as
1082      * zero, default to a holdoff count of 1 ...
1083      */
1084     if ((us | cnt) == 0)
1085         cnt = 1;
1086
1087     /*
1088      * If an interrupt holdoff count has been specified, then find the
1089      * closest configured holdoff count and use that.  If the response
1090      * queue has already been created, then update its queue context
1091      * parameters ...
1092      */
1093     if (cnt) {
1094         int err;
1095         u32 v, pktcnt_idx;
1096
1097         pktcnt_idx = closest_thres(&adapter->sge, cnt);
1098         if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1099             v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1100                 FW_PARAMS_PARAM_X_V(
1101                     FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1102                 FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1103             err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1104             if (err)
1105                 return err;
1106         }
1107         rspq->pktcnt_idx = pktcnt_idx;
1108     }
1109
1110     /*
1111      * Compute the closest holdoff timer index from the supplied holdoff
1112      * timer value.
1113      */
1114     timer_idx = (us == 0
1115              ? SGE_TIMER_RSTRT_CNTR
1116              : closest_timer(&adapter->sge, us));
1117
1118     /*
1119      * Update the response queue's interrupt coalescing parameters and
1120      * return success.
1121      */
1122     rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1123                  QINTR_CNT_EN_V(cnt > 0));
1124     return 0;
1125 }
1126
1127 /*
1128  * Return a version number to identify the type of adapter.  The scheme is:
1129  * - bits 0..9: chip version
1130  * - bits 10..15: chip revision
1131  */
1132 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1133 {
1134     /*
1135      * Chip version 4, revision 0x3f (cxgb4vf).
1136      */
1137     return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1138 }
1139
1140 /*
1141  * Execute the specified ioctl command.
1142  */
1143 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1144 {
1145     int ret = 0;
1146
1147     switch (cmd) {
1148         /*
1149          * The VF Driver doesn't have access to any of the other
1150          * common Ethernet device ioctl()'s (like reading/writing
1151          * PHY registers, etc.
1152          */
1153
1154     default:
1155         ret = -EOPNOTSUPP;
1156         break;
1157     }
1158     return ret;
1159 }
1160
1161 /*
1162  * Change the device's MTU.
1163  */
1164 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1165 {
1166     int ret;
1167     struct port_info *pi = netdev_priv(dev);
1168
1169     ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1170                   -1, -1, -1, -1, true);
1171     if (!ret)
1172         dev->mtu = new_mtu;
1173     return ret;
1174 }
1175
1176 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1177     netdev_features_t features)
1178 {
1179     /*
1180      * Since there is no support for separate rx/tx vlan accel
1181      * enable/disable make sure tx flag is always in same state as rx.
1182      */
1183     if (features & NETIF_F_HW_VLAN_CTAG_RX)
1184         features |= NETIF_F_HW_VLAN_CTAG_TX;
1185     else
1186         features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1187
1188     return features;
1189 }
1190
1191 static int cxgb4vf_set_features(struct net_device *dev,
1192     netdev_features_t features)
1193 {
1194     struct port_info *pi = netdev_priv(dev);
1195     netdev_features_t changed = dev->features ^ features;
1196
1197     if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1198         t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1199                 features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1200
1201     return 0;
1202 }
1203
1204 /*
1205  * Change the devices MAC address.
1206  */
1207 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1208 {
1209     int ret;
1210     struct sockaddr *addr = _addr;
1211     struct port_info *pi = netdev_priv(dev);
1212
1213     if (!is_valid_ether_addr(addr->sa_data))
1214         return -EADDRNOTAVAIL;
1215
1216     ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
1217                  addr->sa_data, true);
1218     if (ret < 0)
1219         return ret;
1220
1221     eth_hw_addr_set(dev, addr->sa_data);
1222     return 0;
1223 }
1224
1225 #ifdef CONFIG_NET_POLL_CONTROLLER
1226 /*
1227  * Poll all of our receive queues.  This is called outside of normal interrupt
1228  * context.
1229  */
1230 static void cxgb4vf_poll_controller(struct net_device *dev)
1231 {
1232     struct port_info *pi = netdev_priv(dev);
1233     struct adapter *adapter = pi->adapter;
1234
1235     if (adapter->flags & CXGB4VF_USING_MSIX) {
1236         struct sge_eth_rxq *rxq;
1237         int nqsets;
1238
1239         rxq = &adapter->sge.ethrxq[pi->first_qset];
1240         for (nqsets = pi->nqsets; nqsets; nqsets--) {
1241             t4vf_sge_intr_msix(0, &rxq->rspq);
1242             rxq++;
1243         }
1244     } else
1245         t4vf_intr_handler(adapter)(0, adapter);
1246 }
1247 #endif
1248
1249 /*
1250  * Ethtool operations.
1251  * ===================
1252  *
1253  * Note that we don't support any ethtool operations which change the physical
1254  * state of the port to which we're linked.
1255  */
1256
1257 /**
1258  *  from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1259  *  @port_type: Firmware Port Type
1260  *  @mod_type: Firmware Module Type
1261  *
1262  *  Translate Firmware Port/Module type to Ethtool Port Type.
1263  */
1264 static int from_fw_port_mod_type(enum fw_port_type port_type,
1265                  enum fw_port_module_type mod_type)
1266 {
1267     if (port_type == FW_PORT_TYPE_BT_SGMII ||
1268         port_type == FW_PORT_TYPE_BT_XFI ||
1269         port_type == FW_PORT_TYPE_BT_XAUI) {
1270         return PORT_TP;
1271     } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1272            port_type == FW_PORT_TYPE_FIBER_XAUI) {
1273         return PORT_FIBRE;
1274     } else if (port_type == FW_PORT_TYPE_SFP ||
1275            port_type == FW_PORT_TYPE_QSFP_10G ||
1276            port_type == FW_PORT_TYPE_QSA ||
1277            port_type == FW_PORT_TYPE_QSFP ||
1278            port_type == FW_PORT_TYPE_CR4_QSFP ||
1279            port_type == FW_PORT_TYPE_CR_QSFP ||
1280            port_type == FW_PORT_TYPE_CR2_QSFP ||
1281            port_type == FW_PORT_TYPE_SFP28) {
1282         if (mod_type == FW_PORT_MOD_TYPE_LR ||
1283             mod_type == FW_PORT_MOD_TYPE_SR ||
1284             mod_type == FW_PORT_MOD_TYPE_ER ||
1285             mod_type == FW_PORT_MOD_TYPE_LRM)
1286             return PORT_FIBRE;
1287         else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1288              mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1289             return PORT_DA;
1290         else
1291             return PORT_OTHER;
1292     } else if (port_type == FW_PORT_TYPE_KR4_100G ||
1293            port_type == FW_PORT_TYPE_KR_SFP28 ||
1294            port_type == FW_PORT_TYPE_KR_XLAUI) {
1295         return PORT_NONE;
1296     }
1297
1298     return PORT_OTHER;
1299 }
1300
1301 /**
1302  *  fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1303  *  @port_type: Firmware Port Type
1304  *  @fw_caps: Firmware Port Capabilities
1305  *  @link_mode_mask: ethtool Link Mode Mask
1306  *
1307  *  Translate a Firmware Port Capabilities specification to an ethtool
1308  *  Link Mode Mask.
1309  */
1310 static void fw_caps_to_lmm(enum fw_port_type port_type,
1311                unsigned int fw_caps,
1312                unsigned long *link_mode_mask)
1313 {
1314     #define SET_LMM(__lmm_name) \
1315         __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1316               link_mode_mask)
1317
1318     #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1319         do { \
1320             if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1321                 SET_LMM(__lmm_name); \
1322         } while (0)
1323
1324     switch (port_type) {
1325     case FW_PORT_TYPE_BT_SGMII:
1326     case FW_PORT_TYPE_BT_XFI:
1327     case FW_PORT_TYPE_BT_XAUI:
1328         SET_LMM(TP);
1329         FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1330         FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1331         FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1332         break;
1333
1334     case FW_PORT_TYPE_KX4:
1335     case FW_PORT_TYPE_KX:
1336         SET_LMM(Backplane);
1337         FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1338         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1339         break;
1340
1341     case FW_PORT_TYPE_KR:
1342         SET_LMM(Backplane);
1343         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1344         break;
1345
1346     case FW_PORT_TYPE_BP_AP:
1347         SET_LMM(Backplane);
1348         FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1349         FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1350         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1351         break;
1352
1353     case FW_PORT_TYPE_BP4_AP:
1354         SET_LMM(Backplane);
1355         FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1356         FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1357         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1358         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1359         break;
1360
1361     case FW_PORT_TYPE_FIBER_XFI:
1362     case FW_PORT_TYPE_FIBER_XAUI:
1363     case FW_PORT_TYPE_SFP:
1364     case FW_PORT_TYPE_QSFP_10G:
1365     case FW_PORT_TYPE_QSA:
1366         SET_LMM(FIBRE);
1367         FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1368         FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1369         break;
1370
1371     case FW_PORT_TYPE_BP40_BA:
1372     case FW_PORT_TYPE_QSFP:
1373         SET_LMM(FIBRE);
1374         FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1375         FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1376         FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1377         break;
1378
1379     case FW_PORT_TYPE_CR_QSFP:
1380     case FW_PORT_TYPE_SFP28:
1381         SET_LMM(FIBRE);
1382         FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1383         FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1384         FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1385         break;
1386
1387     case FW_PORT_TYPE_KR_SFP28:
1388         SET_LMM(Backplane);
1389         FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1390         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1391         FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
1392         break;
1393
1394     case FW_PORT_TYPE_KR_XLAUI:
1395         SET_LMM(Backplane);
1396         FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1397         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1398         FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full);
1399         break;
1400
1401     case FW_PORT_TYPE_CR2_QSFP:
1402         SET_LMM(FIBRE);
1403         FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
1404         break;
1405
1406     case FW_PORT_TYPE_KR4_100G:
1407     case FW_PORT_TYPE_CR4_QSFP:
1408         SET_LMM(FIBRE);
1409         FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
1410         FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1411         FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1412         FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1413         FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
1414         FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
1415         break;
1416
1417     default:
1418         break;
1419     }
1420
1421     if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
1422         FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
1423         FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
1424     } else {
1425         SET_LMM(FEC_NONE);
1426     }
1427
1428     FW_CAPS_TO_LMM(ANEG, Autoneg);
1429     FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1430     FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1431
1432     #undef FW_CAPS_TO_LMM
1433     #undef SET_LMM
1434 }
1435
1436 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1437                   struct ethtool_link_ksettings *link_ksettings)
1438 {
1439     struct port_info *pi = netdev_priv(dev);
1440     struct ethtool_link_settings *base = &link_ksettings->base;
1441
1442     /* For the nonce, the Firmware doesn't send up Port State changes
1443      * when the Virtual Interface attached to the Port is down.  So
1444      * if it's down, let's grab any changes.
1445      */
1446     if (!netif_running(dev))
1447         (void)t4vf_update_port_info(pi);
1448
1449     ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1450     ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1451     ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1452
1453     base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1454
1455     if (pi->mdio_addr >= 0) {
1456         base->phy_address = pi->mdio_addr;
1457         base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1458                       ? ETH_MDIO_SUPPORTS_C22
1459                       : ETH_MDIO_SUPPORTS_C45);
1460     } else {
1461         base->phy_address = 255;
1462         base->mdio_support = 0;
1463     }
1464
1465     fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1466                link_ksettings->link_modes.supported);
1467     fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1468                link_ksettings->link_modes.advertising);
1469     fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1470                link_ksettings->link_modes.lp_advertising);
1471
1472     if (netif_carrier_ok(dev)) {
1473         base->speed = pi->link_cfg.speed;
1474         base->duplex = DUPLEX_FULL;
1475     } else {
1476         base->speed = SPEED_UNKNOWN;
1477         base->duplex = DUPLEX_UNKNOWN;
1478     }
1479
1480     base->autoneg = pi->link_cfg.autoneg;
1481     if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1482         ethtool_link_ksettings_add_link_mode(link_ksettings,
1483                              supported, Autoneg);
1484     if (pi->link_cfg.autoneg)
1485         ethtool_link_ksettings_add_link_mode(link_ksettings,
1486                              advertising, Autoneg);
1487
1488     return 0;
1489 }
1490
1491 /* Translate the Firmware FEC value into the ethtool value. */
1492 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1493 {
1494     unsigned int eth_fec = 0;
1495
1496     if (fw_fec & FW_PORT_CAP32_FEC_RS)
1497         eth_fec |= ETHTOOL_FEC_RS;
1498     if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1499         eth_fec |= ETHTOOL_FEC_BASER;
1500
1501     /* if nothing is set, then FEC is off */
1502     if (!eth_fec)
1503         eth_fec = ETHTOOL_FEC_OFF;
1504
1505     return eth_fec;
1506 }
1507
1508 /* Translate Common Code FEC value into ethtool value. */
1509 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1510 {
1511     unsigned int eth_fec = 0;
1512
1513     if (cc_fec & FEC_AUTO)
1514         eth_fec |= ETHTOOL_FEC_AUTO;
1515     if (cc_fec & FEC_RS)
1516         eth_fec |= ETHTOOL_FEC_RS;
1517     if (cc_fec & FEC_BASER_RS)
1518         eth_fec |= ETHTOOL_FEC_BASER;
1519
1520     /* if nothing is set, then FEC is off */
1521     if (!eth_fec)
1522         eth_fec = ETHTOOL_FEC_OFF;
1523
1524     return eth_fec;
1525 }
1526
1527 static int cxgb4vf_get_fecparam(struct net_device *dev,
1528                 struct ethtool_fecparam *fec)
1529 {
1530     const struct port_info *pi = netdev_priv(dev);
1531     const struct link_config *lc = &pi->link_cfg;
1532
1533     /* Translate the Firmware FEC Support into the ethtool value.  We
1534      * always support IEEE 802.3 "automatic" selection of Link FEC type if
1535      * any FEC is supported.
1536      */
1537     fec->fec = fwcap_to_eth_fec(lc->pcaps);
1538     if (fec->fec != ETHTOOL_FEC_OFF)
1539         fec->fec |= ETHTOOL_FEC_AUTO;
1540
1541     /* Translate the current internal FEC parameters into the
1542      * ethtool values.
1543      */
1544     fec->active_fec = cc_to_eth_fec(lc->fec);
1545     return 0;
1546 }
1547
1548 /*
1549  * Return our driver information.
1550  */
1551 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1552                 struct ethtool_drvinfo *drvinfo)
1553 {
1554     struct adapter *adapter = netdev2adap(dev);
1555
1556     strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1557     strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1558         sizeof(drvinfo->bus_info));
1559     snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1560          "%u.%u.%u.%u, TP %u.%u.%u.%u",
1561          FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1562          FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1563          FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1564          FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1565          FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1566          FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1567          FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1568          FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1569 }
1570
1571 /*
1572  * Return current adapter message level.
1573  */
1574 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1575 {
1576     return netdev2adap(dev)->msg_enable;
1577 }
1578
1579 /*
1580  * Set current adapter message level.
1581  */
1582 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1583 {
1584     netdev2adap(dev)->msg_enable = msglevel;
1585 }
1586
1587 /*
1588  * Return the device's current Queue Set ring size parameters along with the
1589  * allowed maximum values.  Since ethtool doesn't understand the concept of
1590  * multi-queue devices, we just return the current values associated with the
1591  * first Queue Set.
1592  */
1593 static void cxgb4vf_get_ringparam(struct net_device *dev,
1594                   struct ethtool_ringparam *rp,
1595                   struct kernel_ethtool_ringparam *kernel_rp,
1596                   struct netlink_ext_ack *extack)
1597 {
1598     const struct port_info *pi = netdev_priv(dev);
1599     const struct sge *s = &pi->adapter->sge;
1600
1601     rp->rx_max_pending = MAX_RX_BUFFERS;
1602     rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1603     rp->rx_jumbo_max_pending = 0;
1604     rp->tx_max_pending = MAX_TXQ_ENTRIES;
1605
1606     rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1607     rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1608     rp->rx_jumbo_pending = 0;
1609     rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1610 }
1611
1612 /*
1613  * Set the Queue Set ring size parameters for the device.  Again, since
1614  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1615  * apply these new values across all of the Queue Sets associated with the
1616  * device -- after vetting them of course!
1617  */
1618 static int cxgb4vf_set_ringparam(struct net_device *dev,
1619                  struct ethtool_ringparam *rp,
1620                  struct kernel_ethtool_ringparam *kernel_rp,
1621                  struct netlink_ext_ack *extack)
1622 {
1623     const struct port_info *pi = netdev_priv(dev);
1624     struct adapter *adapter = pi->adapter;
1625     struct sge *s = &adapter->sge;
1626     int qs;
1627
1628     if (rp->rx_pending > MAX_RX_BUFFERS ||
1629         rp->rx_jumbo_pending ||
1630         rp->tx_pending > MAX_TXQ_ENTRIES ||
1631         rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1632         rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1633         rp->rx_pending < MIN_FL_ENTRIES ||
1634         rp->tx_pending < MIN_TXQ_ENTRIES)
1635         return -EINVAL;
1636
1637     if (adapter->flags & CXGB4VF_FULL_INIT_DONE)
1638         return -EBUSY;
1639
1640     for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1641         s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1642         s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1643         s->ethtxq[qs].q.size = rp->tx_pending;
1644     }
1645     return 0;
1646 }
1647
1648 /*
1649  * Return the interrupt holdoff timer and count for the first Queue Set on the
1650  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1651  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1652  */
1653 static int cxgb4vf_get_coalesce(struct net_device *dev,
1654                 struct ethtool_coalesce *coalesce,
1655                 struct kernel_ethtool_coalesce *kernel_coal,
1656                 struct netlink_ext_ack *extack)
1657 {
1658     const struct port_info *pi = netdev_priv(dev);
1659     const struct adapter *adapter = pi->adapter;
1660     const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1661
1662     coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1663     coalesce->rx_max_coalesced_frames =
1664         ((rspq->intr_params & QINTR_CNT_EN_F)
1665          ? adapter->sge.counter_val[rspq->pktcnt_idx]
1666          : 0);
1667     return 0;
1668 }
1669
1670 /*
1671  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1672  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1673  * the interrupt holdoff timer on any of the device's Queue Sets.
1674  */
1675 static int cxgb4vf_set_coalesce(struct net_device *dev,
1676                 struct ethtool_coalesce *coalesce,
1677                 struct kernel_ethtool_coalesce *kernel_coal,
1678                 struct netlink_ext_ack *extack)
1679 {
1680     const struct port_info *pi = netdev_priv(dev);
1681     struct adapter *adapter = pi->adapter;
1682
1683     return set_rxq_intr_params(adapter,
1684                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1685                    coalesce->rx_coalesce_usecs,
1686                    coalesce->rx_max_coalesced_frames);
1687 }
1688
1689 /*
1690  * Report current port link pause parameter settings.
1691  */
1692 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1693                    struct ethtool_pauseparam *pauseparam)
1694 {
1695     struct port_info *pi = netdev_priv(dev);
1696
1697     pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1698     pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
1699     pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
1700 }
1701
1702 /*
1703  * Identify the port by blinking the port's LED.
1704  */
1705 static int cxgb4vf_phys_id(struct net_device *dev,
1706                enum ethtool_phys_id_state state)
1707 {
1708     unsigned int val;
1709     struct port_info *pi = netdev_priv(dev);
1710
1711     if (state == ETHTOOL_ID_ACTIVE)
1712         val = 0xffff;
1713     else if (state == ETHTOOL_ID_INACTIVE)
1714         val = 0;
1715     else
1716         return -EINVAL;
1717
1718     return t4vf_identify_port(pi->adapter, pi->viid, val);
1719 }
1720
1721 /*
1722  * Port stats maintained per queue of the port.
1723  */
1724 struct queue_port_stats {
1725     u64 tso;
1726     u64 tx_csum;
1727     u64 rx_csum;
1728     u64 vlan_ex;
1729     u64 vlan_ins;
1730     u64 lro_pkts;
1731     u64 lro_merged;
1732 };
1733
1734 /*
1735  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1736  * these need to match the order of statistics returned by
1737  * t4vf_get_port_stats().
1738  */
1739 static const char stats_strings[][ETH_GSTRING_LEN] = {
1740     /*
1741      * These must match the layout of the t4vf_port_stats structure.
1742      */
1743     "TxBroadcastBytes  ",
1744     "TxBroadcastFrames ",
1745     "TxMulticastBytes  ",
1746     "TxMulticastFrames ",
1747     "TxUnicastBytes    ",
1748     "TxUnicastFrames   ",
1749     "TxDroppedFrames   ",
1750     "TxOffloadBytes    ",
1751     "TxOffloadFrames   ",
1752     "RxBroadcastBytes  ",
1753     "RxBroadcastFrames ",
1754     "RxMulticastBytes  ",
1755     "RxMulticastFrames ",
1756     "RxUnicastBytes    ",
1757     "RxUnicastFrames   ",
1758     "RxErrorFrames     ",
1759
1760     /*
1761      * These are accumulated per-queue statistics and must match the
1762      * order of the fields in the queue_port_stats structure.
1763      */
1764     "TSO               ",
1765     "TxCsumOffload     ",
1766     "RxCsumGood        ",
1767     "VLANextractions   ",
1768     "VLANinsertions    ",
1769     "GROPackets        ",
1770     "GROMerged         ",
1771 };
1772
1773 /*
1774  * Return the number of statistics in the specified statistics set.
1775  */
1776 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1777 {
1778     switch (sset) {
1779     case ETH_SS_STATS:
1780         return ARRAY_SIZE(stats_strings);
1781     default:
1782         return -EOPNOTSUPP;
1783     }
1784     /*NOTREACHED*/
1785 }
1786
1787 /*
1788  * Return the strings for the specified statistics set.
1789  */
1790 static void cxgb4vf_get_strings(struct net_device *dev,
1791                 u32 sset,
1792                 u8 *data)
1793 {
1794     switch (sset) {
1795     case ETH_SS_STATS:
1796         memcpy(data, stats_strings, sizeof(stats_strings));
1797         break;
1798     }
1799 }
1800
1801 /*
1802  * Small utility routine to accumulate queue statistics across the queues of
1803  * a "port".
1804  */
1805 static void collect_sge_port_stats(const struct adapter *adapter,
1806                    const struct port_info *pi,
1807                    struct queue_port_stats *stats)
1808 {
1809     const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1810     const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1811     int qs;
1812
1813     memset(stats, 0, sizeof(*stats));
1814     for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1815         stats->tso += txq->tso;
1816         stats->tx_csum += txq->tx_cso;
1817         stats->rx_csum += rxq->stats.rx_cso;
1818         stats->vlan_ex += rxq->stats.vlan_ex;
1819         stats->vlan_ins += txq->vlan_ins;
1820         stats->lro_pkts += rxq->stats.lro_pkts;
1821         stats->lro_merged += rxq->stats.lro_merged;
1822     }
1823 }
1824
1825 /*
1826  * Return the ETH_SS_STATS statistics set.
1827  */
1828 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1829                       struct ethtool_stats *stats,
1830                       u64 *data)
1831 {
1832     struct port_info *pi = netdev2pinfo(dev);
1833     struct adapter *adapter = pi->adapter;
1834     int err = t4vf_get_port_stats(adapter, pi->pidx,
1835                       (struct t4vf_port_stats *)data);
1836     if (err)
1837         memset(data, 0, sizeof(struct t4vf_port_stats));
1838
1839     data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1840     collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1841 }
1842
1843 /*
1844  * Return the size of our register map.
1845  */
1846 static int cxgb4vf_get_regs_len(struct net_device *dev)
1847 {
1848     return T4VF_REGMAP_SIZE;
1849 }
1850
1851 /*
1852  * Dump a block of registers, start to end inclusive, into a buffer.
1853  */
1854 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1855                unsigned int start, unsigned int end)
1856 {
1857     u32 *bp = regbuf + start - T4VF_REGMAP_START;
1858
1859     for ( ; start <= end; start += sizeof(u32)) {
1860         /*
1861          * Avoid reading the Mailbox Control register since that
1862          * can trigger a Mailbox Ownership Arbitration cycle and
1863          * interfere with communication with the firmware.
1864          */
1865         if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1866             *bp++ = 0xffff;
1867         else
1868             *bp++ = t4_read_reg(adapter, start);
1869     }
1870 }
1871
1872 /*
1873  * Copy our entire register map into the provided buffer.
1874  */
1875 static void cxgb4vf_get_regs(struct net_device *dev,
1876                  struct ethtool_regs *regs,
1877                  void *regbuf)
1878 {
1879     struct adapter *adapter = netdev2adap(dev);
1880
1881     regs->version = mk_adap_vers(adapter);
1882
1883     /*
1884      * Fill in register buffer with our register map.
1885      */
1886     memset(regbuf, 0, T4VF_REGMAP_SIZE);
1887
1888     reg_block_dump(adapter, regbuf,
1889                T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1890                T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1891     reg_block_dump(adapter, regbuf,
1892                T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1893                T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1894
1895     /* T5 adds new registers in the PL Register map.
1896      */
1897     reg_block_dump(adapter, regbuf,
1898                T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1899                T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1900                ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1901     reg_block_dump(adapter, regbuf,
1902                T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1903                T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1904
1905     reg_block_dump(adapter, regbuf,
1906                T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1907                T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1908 }
1909
1910 /*
1911  * Report current Wake On LAN settings.
1912  */
1913 static void cxgb4vf_get_wol(struct net_device *dev,
1914                 struct ethtool_wolinfo *wol)
1915 {
1916     wol->supported = 0;
1917     wol->wolopts = 0;
1918     memset(&wol->sopass, 0, sizeof(wol->sopass));
1919 }
1920
1921 /*
1922  * TCP Segmentation Offload flags which we support.
1923  */
1924 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1925 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
1926            NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
1927
1928 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1929     .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
1930                      ETHTOOL_COALESCE_RX_MAX_FRAMES,
1931     .get_link_ksettings = cxgb4vf_get_link_ksettings,
1932     .get_fecparam       = cxgb4vf_get_fecparam,
1933     .get_drvinfo        = cxgb4vf_get_drvinfo,
1934     .get_msglevel       = cxgb4vf_get_msglevel,
1935     .set_msglevel       = cxgb4vf_set_msglevel,
1936     .get_ringparam      = cxgb4vf_get_ringparam,
1937     .set_ringparam      = cxgb4vf_set_ringparam,
1938     .get_coalesce       = cxgb4vf_get_coalesce,
1939     .set_coalesce       = cxgb4vf_set_coalesce,
1940     .get_pauseparam     = cxgb4vf_get_pauseparam,
1941     .get_link       = ethtool_op_get_link,
1942     .get_strings        = cxgb4vf_get_strings,
1943     .set_phys_id        = cxgb4vf_phys_id,
1944     .get_sset_count     = cxgb4vf_get_sset_count,
1945     .get_ethtool_stats  = cxgb4vf_get_ethtool_stats,
1946     .get_regs_len       = cxgb4vf_get_regs_len,
1947     .get_regs       = cxgb4vf_get_regs,
1948     .get_wol        = cxgb4vf_get_wol,
1949 };
1950
1951 /*
1952  * /sys/kernel/debug/cxgb4vf support code and data.
1953  * ================================================
1954  */
1955
1956 /*
1957  * Show Firmware Mailbox Command/Reply Log
1958  *
1959  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1960  * it's possible that we can catch things during a log update and therefore
1961  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1962  * If we ever decide that we want to make sure that we're dumping a coherent
1963  * log, we'd need to perform locking in the mailbox logging and in
1964  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1965  * like we do for the Firmware Device Log.  But as stated above, meh ...
1966  */
1967 static int mboxlog_show(struct seq_file *seq, void *v)
1968 {
1969     struct adapter *adapter = seq->private;
1970     struct mbox_cmd_log *log = adapter->mbox_log;
1971     struct mbox_cmd *entry;
1972     int entry_idx, i;
1973
1974     if (v == SEQ_START_TOKEN) {
1975         seq_printf(seq,
1976                "%10s  %15s  %5s  %5s  %s\n",
1977                "Seq#", "Tstamp", "Atime", "Etime",
1978                "Command/Reply");
1979         return 0;
1980     }
1981
1982     entry_idx = log->cursor + ((uintptr_t)v - 2);
1983     if (entry_idx >= log->size)
1984         entry_idx -= log->size;
1985     entry = mbox_cmd_log_entry(log, entry_idx);
1986
1987     /* skip over unused entries */
1988     if (entry->timestamp == 0)
1989         return 0;
1990
1991     seq_printf(seq, "%10u  %15llu  %5d  %5d",
1992            entry->seqno, entry->timestamp,
1993            entry->access, entry->execute);
1994     for (i = 0; i < MBOX_LEN / 8; i++) {
1995         u64 flit = entry->cmd[i];
1996         u32 hi = (u32)(flit >> 32);
1997         u32 lo = (u32)flit;
1998
1999         seq_printf(seq, "  %08x %08x", hi, lo);
2000     }
2001     seq_puts(seq, "\n");
2002     return 0;
2003 }
2004
2005 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
2006 {
2007     struct adapter *adapter = seq->private;
2008     struct mbox_cmd_log *log = adapter->mbox_log;
2009
2010     return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
2011 }
2012
2013 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
2014 {
2015     return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
2016 }
2017
2018 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
2019 {
2020     ++*pos;
2021     return mboxlog_get_idx(seq, *pos);
2022 }
2023
2024 static void mboxlog_stop(struct seq_file *seq, void *v)
2025 {
2026 }
2027
2028 static const struct seq_operations mboxlog_sops = {
2029     .start = mboxlog_start,
2030     .next  = mboxlog_next,
2031     .stop  = mboxlog_stop,
2032     .show  = mboxlog_show
2033 };
2034
2035 DEFINE_SEQ_ATTRIBUTE(mboxlog);
2036 /*
2037  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
2038  */
2039 #define QPL 4
2040
2041 static int sge_qinfo_show(struct seq_file *seq, void *v)
2042 {
2043     struct adapter *adapter = seq->private;
2044     int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2045     int qs, r = (uintptr_t)v - 1;
2046
2047     if (r)
2048         seq_putc(seq, '\n');
2049
2050     #define S3(fmt_spec, s, v) \
2051         do {\
2052             seq_printf(seq, "%-12s", s); \
2053             for (qs = 0; qs < n; ++qs) \
2054                 seq_printf(seq, " %16" fmt_spec, v); \
2055             seq_putc(seq, '\n'); \
2056         } while (0)
2057     #define S(s, v)     S3("s", s, v)
2058     #define T(s, v)     S3("u", s, txq[qs].v)
2059     #define R(s, v)     S3("u", s, rxq[qs].v)
2060
2061     if (r < eth_entries) {
2062         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2063         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2064         int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2065
2066         S("QType:", "Ethernet");
2067         S("Interface:",
2068           (rxq[qs].rspq.netdev
2069            ? rxq[qs].rspq.netdev->name
2070            : "N/A"));
2071         S3("d", "Port:",
2072            (rxq[qs].rspq.netdev
2073             ? ((struct port_info *)
2074                netdev_priv(rxq[qs].rspq.netdev))->port_id
2075             : -1));
2076         T("TxQ ID:", q.abs_id);
2077         T("TxQ size:", q.size);
2078         T("TxQ inuse:", q.in_use);
2079         T("TxQ PIdx:", q.pidx);
2080         T("TxQ CIdx:", q.cidx);
2081         R("RspQ ID:", rspq.abs_id);
2082         R("RspQ size:", rspq.size);
2083         R("RspQE size:", rspq.iqe_len);
2084         S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2085         S3("u", "Intr pktcnt:",
2086            adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2087         R("RspQ CIdx:", rspq.cidx);
2088         R("RspQ Gen:", rspq.gen);
2089         R("FL ID:", fl.abs_id);
2090         R("FL size:", fl.size - MIN_FL_RESID);
2091         R("FL avail:", fl.avail);
2092         R("FL PIdx:", fl.pidx);
2093         R("FL CIdx:", fl.cidx);
2094         return 0;
2095     }
2096
2097     r -= eth_entries;
2098     if (r == 0) {
2099         const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2100
2101         seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2102         seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2103         seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2104                qtimer_val(adapter, evtq));
2105         seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2106                adapter->sge.counter_val[evtq->pktcnt_idx]);
2107         seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2108         seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2109     } else if (r == 1) {
2110         const struct sge_rspq *intrq = &adapter->sge.intrq;
2111
2112         seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2113         seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2114         seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2115                qtimer_val(adapter, intrq));
2116         seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2117                adapter->sge.counter_val[intrq->pktcnt_idx]);
2118         seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2119         seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2120     }
2121
2122     #undef R
2123     #undef T
2124     #undef S
2125     #undef S3
2126
2127     return 0;
2128 }
2129
2130 /*
2131  * Return the number of "entries" in our "file".  We group the multi-Queue
2132  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2133  *
2134  *     Ethernet RX/TX Queue Sets
2135  *     Firmware Event Queue
2136  *     Forwarded Interrupt Queue (if in MSI mode)
2137  */
2138 static int sge_queue_entries(const struct adapter *adapter)
2139 {
2140     return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2141         ((adapter->flags & CXGB4VF_USING_MSI) != 0);
2142 }
2143
2144 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2145 {
2146     int entries = sge_queue_entries(seq->private);
2147
2148     return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2149 }
2150
2151 static void sge_queue_stop(struct seq_file *seq, void *v)
2152 {
2153 }
2154
2155 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2156 {
2157     int entries = sge_queue_entries(seq->private);
2158
2159     ++*pos;
2160     return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2161 }
2162
2163 static const struct seq_operations sge_qinfo_sops = {
2164     .start = sge_queue_start,
2165     .next  = sge_queue_next,
2166     .stop  = sge_queue_stop,
2167     .show  = sge_qinfo_show
2168 };
2169
2170 DEFINE_SEQ_ATTRIBUTE(sge_qinfo);
2171
2172 /*
2173  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2174  */
2175 #define QPL 4
2176
2177 static int sge_qstats_show(struct seq_file *seq, void *v)
2178 {
2179     struct adapter *adapter = seq->private;
2180     int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2181     int qs, r = (uintptr_t)v - 1;
2182
2183     if (r)
2184         seq_putc(seq, '\n');
2185
2186     #define S3(fmt, s, v) \
2187         do { \
2188             seq_printf(seq, "%-16s", s); \
2189             for (qs = 0; qs < n; ++qs) \
2190                 seq_printf(seq, " %8" fmt, v); \
2191             seq_putc(seq, '\n'); \
2192         } while (0)
2193     #define S(s, v)     S3("s", s, v)
2194
2195     #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2196     #define T(s, v)     T3("lu", s, v)
2197
2198     #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2199     #define R(s, v)     R3("lu", s, v)
2200
2201     if (r < eth_entries) {
2202         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2203         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2204         int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2205
2206         S("QType:", "Ethernet");
2207         S("Interface:",
2208           (rxq[qs].rspq.netdev
2209            ? rxq[qs].rspq.netdev->name
2210            : "N/A"));
2211         R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2212         R("RxPackets:", stats.pkts);
2213         R("RxCSO:", stats.rx_cso);
2214         R("VLANxtract:", stats.vlan_ex);
2215         R("LROmerged:", stats.lro_merged);
2216         R("LROpackets:", stats.lro_pkts);
2217         R("RxDrops:", stats.rx_drops);
2218         T("TSO:", tso);
2219         T("TxCSO:", tx_cso);
2220         T("VLANins:", vlan_ins);
2221         T("TxQFull:", q.stops);
2222         T("TxQRestarts:", q.restarts);
2223         T("TxMapErr:", mapping_err);
2224         R("FLAllocErr:", fl.alloc_failed);
2225         R("FLLrgAlcErr:", fl.large_alloc_failed);
2226         R("FLStarving:", fl.starving);
2227         return 0;
2228     }
2229
2230     r -= eth_entries;
2231     if (r == 0) {
2232         const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2233
2234         seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2235         seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2236                evtq->unhandled_irqs);
2237         seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2238         seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2239     } else if (r == 1) {
2240         const struct sge_rspq *intrq = &adapter->sge.intrq;
2241
2242         seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2243         seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2244                intrq->unhandled_irqs);
2245         seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2246         seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2247     }
2248
2249     #undef R
2250     #undef T
2251     #undef S
2252     #undef R3
2253     #undef T3
2254     #undef S3
2255
2256     return 0;
2257 }
2258
2259 /*
2260  * Return the number of "entries" in our "file".  We group the multi-Queue
2261  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2262  *
2263  *     Ethernet RX/TX Queue Sets
2264  *     Firmware Event Queue
2265  *     Forwarded Interrupt Queue (if in MSI mode)
2266  */
2267 static int sge_qstats_entries(const struct adapter *adapter)
2268 {
2269     return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2270         ((adapter->flags & CXGB4VF_USING_MSI) != 0);
2271 }
2272
2273 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2274 {
2275     int entries = sge_qstats_entries(seq->private);
2276
2277     return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2278 }
2279
2280 static void sge_qstats_stop(struct seq_file *seq, void *v)
2281 {
2282 }
2283
2284 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2285 {
2286     int entries = sge_qstats_entries(seq->private);
2287
2288     (*pos)++;
2289     return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2290 }
2291
2292 static const struct seq_operations sge_qstats_sops = {
2293     .start = sge_qstats_start,
2294     .next  = sge_qstats_next,
2295     .stop  = sge_qstats_stop,
2296     .show  = sge_qstats_show
2297 };
2298
2299 DEFINE_SEQ_ATTRIBUTE(sge_qstats);
2300
2301 /*
2302  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2303  */
2304 static int resources_show(struct seq_file *seq, void *v)
2305 {
2306     struct adapter *adapter = seq->private;
2307     struct vf_resources *vfres = &adapter->params.vfres;
2308
2309     #define S(desc, fmt, var) \
2310         seq_printf(seq, "%-60s " fmt "\n", \
2311                desc " (" #var "):", vfres->var)
2312
2313     S("Virtual Interfaces", "%d", nvi);
2314     S("Egress Queues", "%d", neq);
2315     S("Ethernet Control", "%d", nethctrl);
2316     S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2317     S("Ingress Queues", "%d", niq);
2318     S("Traffic Class", "%d", tc);
2319     S("Port Access Rights Mask", "%#x", pmask);
2320     S("MAC Address Filters", "%d", nexactf);
2321     S("Firmware Command Read Capabilities", "%#x", r_caps);
2322     S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2323
2324     #undef S
2325
2326     return 0;
2327 }
2328 DEFINE_SHOW_ATTRIBUTE(resources);
2329
2330 /*
2331  * Show Virtual Interfaces.
2332  */
2333 static int interfaces_show(struct seq_file *seq, void *v)
2334 {
2335     if (v == SEQ_START_TOKEN) {
2336         seq_puts(seq, "Interface  Port   VIID\n");
2337     } else {
2338         struct adapter *adapter = seq->private;
2339         int pidx = (uintptr_t)v - 2;
2340         struct net_device *dev = adapter->port[pidx];
2341         struct port_info *pi = netdev_priv(dev);
2342
2343         seq_printf(seq, "%9s  %4d  %#5x\n",
2344                dev->name, pi->port_id, pi->viid);
2345     }
2346     return 0;
2347 }
2348
2349 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2350 {
2351     return pos <= adapter->params.nports
2352         ? (void *)(uintptr_t)(pos + 1)
2353         : NULL;
2354 }
2355
2356 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2357 {
2358     return *pos
2359         ? interfaces_get_idx(seq->private, *pos)
2360         : SEQ_START_TOKEN;
2361 }
2362
2363 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2364 {
2365     (*pos)++;
2366     return interfaces_get_idx(seq->private, *pos);
2367 }
2368
2369 static void interfaces_stop(struct seq_file *seq, void *v)
2370 {
2371 }
2372
2373 static const struct seq_operations interfaces_sops = {
2374     .start = interfaces_start,
2375     .next  = interfaces_next,
2376     .stop  = interfaces_stop,
2377     .show  = interfaces_show
2378 };
2379
2380 DEFINE_SEQ_ATTRIBUTE(interfaces);
2381
2382 /*
2383  * /sys/kernel/debugfs/cxgb4vf/ files list.
2384  */
2385 struct cxgb4vf_debugfs_entry {
2386     const char *name;       /* name of debugfs node */
2387     umode_t mode;           /* file system mode */
2388     const struct file_operations *fops;
2389 };
2390
2391 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2392     { "mboxlog",    0444, &mboxlog_fops },
2393     { "sge_qinfo",  0444, &sge_qinfo_fops },
2394     { "sge_qstats", 0444, &sge_qstats_fops },
2395     { "resources",  0444, &resources_fops },
2396     { "interfaces", 0444, &interfaces_fops },
2397 };
2398
2399 /*
2400  * Module and device initialization and cleanup code.
2401  * ==================================================
2402  */
2403
2404 /*
2405  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2406  * directory (debugfs_root) has already been set up.
2407  */
2408 static int setup_debugfs(struct adapter *adapter)
2409 {
2410     int i;
2411
2412     BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2413
2414     /*
2415      * Debugfs support is best effort.
2416      */
2417     for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2418         debugfs_create_file(debugfs_files[i].name,
2419                     debugfs_files[i].mode,
2420                     adapter->debugfs_root, adapter,
2421                     debugfs_files[i].fops);
2422
2423     return 0;
2424 }
2425
2426 /*
2427  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2428  * it to our caller to tear down the directory (debugfs_root).
2429  */
2430 static void cleanup_debugfs(struct adapter *adapter)
2431 {
2432     BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2433
2434     /*
2435      * Unlike our sister routine cleanup_proc(), we don't need to remove
2436      * individual entries because a call will be made to
2437      * debugfs_remove_recursive().  We just need to clean up any ancillary
2438      * persistent state.
2439      */
2440     /* nothing to do */
2441 }
2442
2443 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2444  * knowing our Virtual Function Resources and may be called a second time if
2445  * we fall back from MSI-X to MSI Interrupt Mode.
2446  */
2447 static void size_nports_qsets(struct adapter *adapter)
2448 {
2449     struct vf_resources *vfres = &adapter->params.vfres;
2450     unsigned int ethqsets, pmask_nports;
2451
2452     /* The number of "ports" which we support is equal to the number of
2453      * Virtual Interfaces with which we've been provisioned.
2454      */
2455     adapter->params.nports = vfres->nvi;
2456     if (adapter->params.nports > MAX_NPORTS) {
2457         dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2458              " allowed virtual interfaces\n", MAX_NPORTS,
2459              adapter->params.nports);
2460         adapter->params.nports = MAX_NPORTS;
2461     }
2462
2463     /* We may have been provisioned with more VIs than the number of
2464      * ports we're allowed to access (our Port Access Rights Mask).
2465      * This is obviously a configuration conflict but we don't want to
2466      * crash the kernel or anything silly just because of that.
2467      */
2468     pmask_nports = hweight32(adapter->params.vfres.pmask);
2469     if (pmask_nports < adapter->params.nports) {
2470         dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2471              " virtual interfaces; limited by Port Access Rights"
2472              " mask %#x\n", pmask_nports, adapter->params.nports,
2473              adapter->params.vfres.pmask);
2474         adapter->params.nports = pmask_nports;
2475     }
2476
2477     /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2478      * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2479      * reserve an Ingress Queue for a Forwarded Interrupts.
2480      *
2481      * The rest of the FL/Intr-capable ingress queues will be matched up
2482      * one-for-one with Ethernet/Control egress queues in order to form
2483      * "Queue Sets" which will be aportioned between the "ports".  For
2484      * each Queue Set, we'll need the ability to allocate two Egress
2485      * Contexts -- one for the Ingress Queue Free List and one for the TX
2486      * Ethernet Queue.
2487      *
2488      * Note that even if we're currently configured to use MSI-X
2489      * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2490      * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2491      * happens we'll need to adjust things later.
2492      */
2493     ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2494     if (vfres->nethctrl != ethqsets)
2495         ethqsets = min(vfres->nethctrl, ethqsets);
2496     if (vfres->neq < ethqsets*2)
2497         ethqsets = vfres->neq/2;
2498     if (ethqsets > MAX_ETH_QSETS)
2499         ethqsets = MAX_ETH_QSETS;
2500     adapter->sge.max_ethqsets = ethqsets;
2501
2502     if (adapter->sge.max_ethqsets < adapter->params.nports) {
2503         dev_warn(adapter->pdev_dev, "only using %d of %d available"
2504              " virtual interfaces (too few Queue Sets)\n",
2505              adapter->sge.max_ethqsets, adapter->params.nports);
2506         adapter->params.nports = adapter->sge.max_ethqsets;
2507     }
2508 }
2509
2510 /*
2511  * Perform early "adapter" initialization.  This is where we discover what
2512  * adapter parameters we're going to be using and initialize basic adapter
2513  * hardware support.
2514  */
2515 static int adap_init0(struct adapter *adapter)
2516 {
2517     struct sge_params *sge_params = &adapter->params.sge;
2518     struct sge *s = &adapter->sge;
2519     int err;
2520     u32 param, val = 0;
2521
2522     /*
2523      * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2524      * 2.6.31 and later we can't call pci_reset_function() in order to
2525      * issue an FLR because of a self- deadlock on the device semaphore.
2526      * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2527      * cases where they're needed -- for instance, some versions of KVM
2528      * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2529      * use the firmware based reset in order to reset any per function
2530      * state.
2531      */
2532     err = t4vf_fw_reset(adapter);
2533     if (err < 0) {
2534         dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2535         return err;
2536     }
2537
2538     /*
2539      * Grab basic operational parameters.  These will predominantly have
2540      * been set up by the Physical Function Driver or will be hard coded
2541      * into the adapter.  We just have to live with them ...  Note that
2542      * we _must_ get our VPD parameters before our SGE parameters because
2543      * we need to know the adapter's core clock from the VPD in order to
2544      * properly decode the SGE Timer Values.
2545      */
2546     err = t4vf_get_dev_params(adapter);
2547     if (err) {
2548         dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2549             " device parameters: err=%d\n", err);
2550         return err;
2551     }
2552     err = t4vf_get_vpd_params(adapter);
2553     if (err) {
2554         dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2555             " VPD parameters: err=%d\n", err);
2556         return err;
2557     }
2558     err = t4vf_get_sge_params(adapter);
2559     if (err) {
2560         dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2561             " SGE parameters: err=%d\n", err);
2562         return err;
2563     }
2564     err = t4vf_get_rss_glb_config(adapter);
2565     if (err) {
2566         dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2567             " RSS parameters: err=%d\n", err);
2568         return err;
2569     }
2570     if (adapter->params.rss.mode !=
2571         FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2572         dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2573             " mode %d\n", adapter->params.rss.mode);
2574         return -EINVAL;
2575     }
2576     err = t4vf_sge_init(adapter);
2577     if (err) {
2578         dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2579             " err=%d\n", err);
2580         return err;
2581     }
2582
2583     /* If we're running on newer firmware, let it know that we're
2584      * prepared to deal with encapsulated CPL messages.  Older
2585      * firmware won't understand this and we'll just get
2586      * unencapsulated messages ...
2587      */
2588     param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2589         FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2590     val = 1;
2591     (void) t4vf_set_params(adapter, 1, &param, &val);
2592
2593     /*
2594      * Retrieve our RX interrupt holdoff timer values and counter
2595      * threshold values from the SGE parameters.
2596      */
2597     s->timer_val[0] = core_ticks_to_us(adapter,
2598         TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2599     s->timer_val[1] = core_ticks_to_us(adapter,
2600         TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2601     s->timer_val[2] = core_ticks_to_us(adapter,
2602         TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2603     s->timer_val[3] = core_ticks_to_us(adapter,
2604         TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2605     s->timer_val[4] = core_ticks_to_us(adapter,
2606         TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2607     s->timer_val[5] = core_ticks_to_us(adapter,
2608         TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2609
2610     s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2611     s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2612     s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2613     s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2614
2615     /*
2616      * Grab our Virtual Interface resource allocation, extract the
2617      * features that we're interested in and do a bit of sanity testing on
2618      * what we discover.
2619      */
2620     err = t4vf_get_vfres(adapter);
2621     if (err) {
2622         dev_err(adapter->pdev_dev, "unable to get virtual interface"
2623             " resources: err=%d\n", err);
2624         return err;
2625     }
2626
2627     /* Check for various parameter sanity issues */
2628     if (adapter->params.vfres.pmask == 0) {
2629         dev_err(adapter->pdev_dev, "no port access configured\n"
2630             "usable!\n");
2631         return -EINVAL;
2632     }
2633     if (adapter->params.vfres.nvi == 0) {
2634         dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2635             "usable!\n");
2636         return -EINVAL;
2637     }
2638
2639     /* Initialize nports and max_ethqsets now that we have our Virtual
2640      * Function Resources.
2641      */
2642     size_nports_qsets(adapter);
2643
2644     adapter->flags |= CXGB4VF_FW_OK;
2645     return 0;
2646 }
2647
2648 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2649                  u8 pkt_cnt_idx, unsigned int size,
2650                  unsigned int iqe_size)
2651 {
2652     rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2653                  (pkt_cnt_idx < SGE_NCOUNTERS ?
2654                   QINTR_CNT_EN_F : 0));
2655     rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2656                 ? pkt_cnt_idx
2657                 : 0);
2658     rspq->iqe_len = iqe_size;
2659     rspq->size = size;
2660 }
2661
2662 /*
2663  * Perform default configuration of DMA queues depending on the number and
2664  * type of ports we found and the number of available CPUs.  Most settings can
2665  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2666  * being brought up for the first time.
2667  */
2668 static void cfg_queues(struct adapter *adapter)
2669 {
2670     struct sge *s = &adapter->sge;
2671     int q10g, n10g, qidx, pidx, qs;
2672     size_t iqe_size;
2673
2674     /*
2675      * We should not be called till we know how many Queue Sets we can
2676      * support.  In particular, this means that we need to know what kind
2677      * of interrupts we'll be using ...
2678      */
2679     BUG_ON((adapter->flags &
2680            (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
2681
2682     /*
2683      * Count the number of 10GbE Virtual Interfaces that we have.
2684      */
2685     n10g = 0;
2686     for_each_port(adapter, pidx)
2687         n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2688
2689     /*
2690      * We default to 1 queue per non-10G port and up to # of cores queues
2691      * per 10G port.
2692      */
2693     if (n10g == 0)
2694         q10g = 0;
2695     else {
2696         int n1g = (adapter->params.nports - n10g);
2697         q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2698         if (q10g > num_online_cpus())
2699             q10g = num_online_cpus();
2700     }
2701
2702     /*
2703      * Allocate the "Queue Sets" to the various Virtual Interfaces.
2704      * The layout will be established in setup_sge_queues() when the
2705      * adapter is brough up for the first time.
2706      */
2707     qidx = 0;
2708     for_each_port(adapter, pidx) {
2709         struct port_info *pi = adap2pinfo(adapter, pidx);
2710
2711         pi->first_qset = qidx;
2712         pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2713         qidx += pi->nqsets;
2714     }
2715     s->ethqsets = qidx;
2716
2717     /*
2718      * The Ingress Queue Entry Size for our various Response Queues needs
2719      * to be big enough to accommodate the largest message we can receive
2720      * from the chip/firmware; which is 64 bytes ...
2721      */
2722     iqe_size = 64;
2723
2724     /*
2725      * Set up default Queue Set parameters ...  Start off with the
2726      * shortest interrupt holdoff timer.
2727      */
2728     for (qs = 0; qs < s->max_ethqsets; qs++) {
2729         struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2730         struct sge_eth_txq *txq = &s->ethtxq[qs];
2731
2732         init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2733         rxq->fl.size = 72;
2734         txq->q.size = 1024;
2735     }
2736
2737     /*
2738      * The firmware event queue is used for link state changes and
2739      * notifications of TX DMA completions.
2740      */
2741     init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2742
2743     /*
2744      * The forwarded interrupt queue is used when we're in MSI interrupt
2745      * mode.  In this mode all interrupts associated with RX queues will
2746      * be forwarded to a single queue which we'll associate with our MSI
2747      * interrupt vector.  The messages dropped in the forwarded interrupt
2748      * queue will indicate which ingress queue needs servicing ...  This
2749      * queue needs to be large enough to accommodate all of the ingress
2750      * queues which are forwarding their interrupt (+1 to prevent the PIDX
2751      * from equalling the CIDX if every ingress queue has an outstanding
2752      * interrupt).  The queue doesn't need to be any larger because no
2753      * ingress queue will ever have more than one outstanding interrupt at
2754      * any time ...
2755      */
2756     init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2757           iqe_size);
2758 }
2759
2760 /*
2761  * Reduce the number of Ethernet queues across all ports to at most n.
2762  * n provides at least one queue per port.
2763  */
2764 static void reduce_ethqs(struct adapter *adapter, int n)
2765 {
2766     int i;
2767     struct port_info *pi;
2768
2769     /*
2770      * While we have too many active Ether Queue Sets, interate across the
2771      * "ports" and reduce their individual Queue Set allocations.
2772      */
2773     BUG_ON(n < adapter->params.nports);
2774     while (n < adapter->sge.ethqsets)
2775         for_each_port(adapter, i) {
2776             pi = adap2pinfo(adapter, i);
2777             if (pi->nqsets > 1) {
2778                 pi->nqsets--;
2779                 adapter->sge.ethqsets--;
2780                 if (adapter->sge.ethqsets <= n)
2781                     break;
2782             }
2783         }
2784
2785     /*
2786      * Reassign the starting Queue Sets for each of the "ports" ...
2787      */
2788     n = 0;
2789     for_each_port(adapter, i) {
2790         pi = adap2pinfo(adapter, i);
2791         pi->first_qset = n;
2792         n += pi->nqsets;
2793     }
2794 }
2795
2796 /*
2797  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2798  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2799  * need.  Minimally we need one for every Virtual Interface plus those needed
2800  * for our "extras".  Note that this process may lower the maximum number of
2801  * allowed Queue Sets ...
2802  */
2803 static int enable_msix(struct adapter *adapter)
2804 {
2805     int i, want, need, nqsets;
2806     struct msix_entry entries[MSIX_ENTRIES];
2807     struct sge *s = &adapter->sge;
2808
2809     for (i = 0; i < MSIX_ENTRIES; ++i)
2810         entries[i].entry = i;
2811
2812     /*
2813      * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2814      * plus those needed for our "extras" (for example, the firmware
2815      * message queue).  We _need_ at least one "Queue Set" per Virtual
2816      * Interface plus those needed for our "extras".  So now we get to see
2817      * if the song is right ...
2818      */
2819     want = s->max_ethqsets + MSIX_EXTRAS;
2820     need = adapter->params.nports + MSIX_EXTRAS;
2821
2822     want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2823     if (want < 0)
2824         return want;
2825
2826     nqsets = want - MSIX_EXTRAS;
2827     if (nqsets < s->max_ethqsets) {
2828         dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2829              " for %d Queue Sets\n", nqsets);
2830         s->max_ethqsets = nqsets;
2831         if (nqsets < s->ethqsets)
2832             reduce_ethqs(adapter, nqsets);
2833     }
2834     for (i = 0; i < want; ++i)
2835         adapter->msix_info[i].vec = entries[i].vector;
2836
2837     return 0;
2838 }
2839
2840 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2841     .ndo_open       = cxgb4vf_open,
2842     .ndo_stop       = cxgb4vf_stop,
2843     .ndo_start_xmit     = t4vf_eth_xmit,
2844     .ndo_get_stats      = cxgb4vf_get_stats,
2845     .ndo_set_rx_mode    = cxgb4vf_set_rxmode,
2846     .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2847     .ndo_validate_addr  = eth_validate_addr,
2848     .ndo_eth_ioctl      = cxgb4vf_do_ioctl,
2849     .ndo_change_mtu     = cxgb4vf_change_mtu,
2850     .ndo_fix_features   = cxgb4vf_fix_features,
2851     .ndo_set_features   = cxgb4vf_set_features,
2852 #ifdef CONFIG_NET_POLL_CONTROLLER
2853     .ndo_poll_controller    = cxgb4vf_poll_controller,
2854 #endif
2855 };
2856
2857 /**
2858  *  cxgb4vf_get_port_mask - Get port mask for the VF based on mac
2859  *              address stored on the adapter
2860  *  @adapter: The adapter
2861  *
2862  *  Find the port mask for the VF based on the index of mac
2863  *  address stored in the adapter. If no mac address is stored on
2864  *  the adapter for the VF, use the port mask received from the
2865  *  firmware.
2866  */
2867 static unsigned int cxgb4vf_get_port_mask(struct adapter *adapter)
2868 {
2869     unsigned int naddr = 1, pidx = 0;
2870     unsigned int pmask, rmask = 0;
2871     u8 mac[ETH_ALEN];
2872     int err;
2873
2874     pmask = adapter->params.vfres.pmask;
2875     while (pmask) {
2876         if (pmask & 1) {
2877             err = t4vf_get_vf_mac_acl(adapter, pidx, &naddr, mac);
2878             if (!err && !is_zero_ether_addr(mac))
2879                 rmask |= (1 << pidx);
2880         }
2881         pmask >>= 1;
2882         pidx++;
2883     }
2884     if (!rmask)
2885         rmask = adapter->params.vfres.pmask;
2886
2887     return rmask;
2888 }
2889
2890 /*
2891  * "Probe" a device: initialize a device and construct all kernel and driver
2892  * state needed to manage the device.  This routine is called "init_one" in
2893  * the PF Driver ...
2894  */
2895 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2896                  const struct pci_device_id *ent)
2897 {
2898     struct adapter *adapter;
2899     struct net_device *netdev;
2900     struct port_info *pi;
2901     unsigned int pmask;
2902     int err, pidx;
2903
2904     /*
2905      * Initialize generic PCI device state.
2906      */
2907     err = pci_enable_device(pdev);
2908     if (err)
2909         return dev_err_probe(&pdev->dev, err, "cannot enable PCI device\n");
2910
2911     /*
2912      * Reserve PCI resources for the device.  If we can't get them some
2913      * other driver may have already claimed the device ...
2914      */
2915     err = pci_request_regions(pdev, KBUILD_MODNAME);
2916     if (err) {
2917         dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2918         goto err_disable_device;
2919     }
2920
2921     /*
2922      * Set up our DMA mask
2923      */
2924     err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2925     if (err) {
2926         dev_err(&pdev->dev, "no usable DMA configuration\n");
2927         goto err_release_regions;
2928     }
2929
2930     /*
2931      * Enable bus mastering for the device ...
2932      */
2933     pci_set_master(pdev);
2934
2935     /*
2936      * Allocate our adapter data structure and attach it to the device.
2937      */
2938     adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2939     if (!adapter) {
2940         err = -ENOMEM;
2941         goto err_release_regions;
2942     }
2943     pci_set_drvdata(pdev, adapter);
2944     adapter->pdev = pdev;
2945     adapter->pdev_dev = &pdev->dev;
2946
2947     adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2948                     (sizeof(struct mbox_cmd) *
2949                      T4VF_OS_LOG_MBOX_CMDS),
2950                     GFP_KERNEL);
2951     if (!adapter->mbox_log) {
2952         err = -ENOMEM;
2953         goto err_free_adapter;
2954     }
2955     adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2956
2957     /*
2958      * Initialize SMP data synchronization resources.
2959      */
2960     spin_lock_init(&adapter->stats_lock);
2961     spin_lock_init(&adapter->mbox_lock);
2962     INIT_LIST_HEAD(&adapter->mlist.list);
2963
2964     /*
2965      * Map our I/O registers in BAR0.
2966      */
2967     adapter->regs = pci_ioremap_bar(pdev, 0);
2968     if (!adapter->regs) {
2969         dev_err(&pdev->dev, "cannot map device registers\n");
2970         err = -ENOMEM;
2971         goto err_free_adapter;
2972     }
2973
2974     /* Wait for the device to become ready before proceeding ...
2975      */
2976     err = t4vf_prep_adapter(adapter);
2977     if (err) {
2978         dev_err(adapter->pdev_dev, "device didn't become ready:"
2979             " err=%d\n", err);
2980         goto err_unmap_bar0;
2981     }
2982
2983     /* For T5 and later we want to use the new BAR-based User Doorbells,
2984      * so we need to map BAR2 here ...
2985      */
2986     if (!is_t4(adapter->params.chip)) {
2987         adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2988                        pci_resource_len(pdev, 2));
2989         if (!adapter->bar2) {
2990             dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2991             err = -ENOMEM;
2992             goto err_unmap_bar0;
2993         }
2994     }
2995     /*
2996      * Initialize adapter level features.
2997      */
2998     adapter->name = pci_name(pdev);
2999     adapter->msg_enable = DFLT_MSG_ENABLE;
3000
3001     /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
3002      * Ingress Packet Data to Free List Buffers in order to allow for
3003      * chipset performance optimizations between the Root Complex and
3004      * Memory Controllers.  (Messages to the associated Ingress Queue
3005      * notifying new Packet Placement in the Free Lists Buffers will be
3006      * send without the Relaxed Ordering Attribute thus guaranteeing that
3007      * all preceding PCIe Transaction Layer Packets will be processed
3008      * first.)  But some Root Complexes have various issues with Upstream
3009      * Transaction Layer Packets with the Relaxed Ordering Attribute set.
3010      * The PCIe devices which under the Root Complexes will be cleared the
3011      * Relaxed Ordering bit in the configuration space, So we check our
3012      * PCIe configuration space to see if it's flagged with advice against
3013      * using Relaxed Ordering.
3014      */
3015     if (!pcie_relaxed_ordering_enabled(pdev))
3016         adapter->flags |= CXGB4VF_ROOT_NO_RELAXED_ORDERING;
3017
3018     err = adap_init0(adapter);
3019     if (err)
3020         dev_err(&pdev->dev,
3021             "Adapter initialization failed, error %d. Continuing in debug mode\n",
3022             err);
3023
3024     /* Initialize hash mac addr list */
3025     INIT_LIST_HEAD(&adapter->mac_hlist);
3026
3027     /*
3028      * Allocate our "adapter ports" and stitch everything together.
3029      */
3030     pmask = cxgb4vf_get_port_mask(adapter);
3031     for_each_port(adapter, pidx) {
3032         int port_id, viid;
3033         u8 mac[ETH_ALEN];
3034         unsigned int naddr = 1;
3035
3036         /*
3037          * We simplistically allocate our virtual interfaces
3038          * sequentially across the port numbers to which we have
3039          * access rights.  This should be configurable in some manner
3040          * ...
3041          */
3042         if (pmask == 0)
3043             break;
3044         port_id = ffs(pmask) - 1;
3045         pmask &= ~(1 << port_id);
3046
3047         /*
3048          * Allocate our network device and stitch things together.
3049          */
3050         netdev = alloc_etherdev_mq(sizeof(struct port_info),
3051                        MAX_PORT_QSETS);
3052         if (netdev == NULL) {
3053             err = -ENOMEM;
3054             goto err_free_dev;
3055         }
3056         adapter->port[pidx] = netdev;
3057         SET_NETDEV_DEV(netdev, &pdev->dev);
3058         pi = netdev_priv(netdev);
3059         pi->adapter = adapter;
3060         pi->pidx = pidx;
3061         pi->port_id = port_id;
3062
3063         /*
3064          * Initialize the starting state of our "port" and register
3065          * it.
3066          */
3067         pi->xact_addr_filt = -1;
3068         netdev->irq = pdev->irq;
3069
3070         netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_GRO |
3071             NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
3072             NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
3073         netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
3074         netdev->vlan_features = netdev->features & VLAN_FEAT;
3075
3076         netdev->priv_flags |= IFF_UNICAST_FLT;
3077         netdev->min_mtu = 81;
3078         netdev->max_mtu = ETH_MAX_MTU;
3079
3080         netdev->netdev_ops = &cxgb4vf_netdev_ops;
3081         netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3082         netdev->dev_port = pi->port_id;
3083
3084         /*
3085          * If we haven't been able to contact the firmware, there's
3086          * nothing else we can do for this "port" ...
3087          */
3088         if (!(adapter->flags & CXGB4VF_FW_OK))
3089             continue;
3090
3091         viid = t4vf_alloc_vi(adapter, port_id);
3092         if (viid < 0) {
3093             dev_err(&pdev->dev,
3094                 "cannot allocate VI for port %d: err=%d\n",
3095                 port_id, viid);
3096             err = viid;
3097             goto err_free_dev;
3098         }
3099         pi->viid = viid;
3100
3101         /*
3102          * Initialize the hardware/software state for the port.
3103          */
3104         err = t4vf_port_init(adapter, pidx);
3105         if (err) {
3106             dev_err(&pdev->dev, "cannot initialize port %d\n",
3107                 pidx);
3108             goto err_free_dev;
3109         }
3110
3111         err = t4vf_get_vf_mac_acl(adapter, port_id, &naddr, mac);
3112         if (err) {
3113             dev_err(&pdev->dev,
3114                 "unable to determine MAC ACL address, "
3115                 "continuing anyway.. (status %d)\n", err);
3116         } else if (naddr && adapter->params.vfres.nvi == 1) {
3117             struct sockaddr addr;
3118
3119             ether_addr_copy(addr.sa_data, mac);
3120             err = cxgb4vf_set_mac_addr(netdev, &addr);
3121             if (err) {
3122                 dev_err(&pdev->dev,
3123                     "unable to set MAC address %pM\n",
3124                     mac);
3125                 goto err_free_dev;
3126             }
3127             dev_info(&pdev->dev,
3128                  "Using assigned MAC ACL: %pM\n", mac);
3129         }
3130     }
3131
3132     /* See what interrupts we'll be using.  If we've been configured to
3133      * use MSI-X interrupts, try to enable them but fall back to using
3134      * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3135      * get MSI interrupts we bail with the error.
3136      */
3137     if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3138         adapter->flags |= CXGB4VF_USING_MSIX;
3139     else {
3140         if (msi == MSI_MSIX) {
3141             dev_info(adapter->pdev_dev,
3142                  "Unable to use MSI-X Interrupts; falling "
3143                  "back to MSI Interrupts\n");
3144
3145             /* We're going to need a Forwarded Interrupt Queue so
3146              * that may cut into how many Queue Sets we can
3147              * support.
3148              */
3149             msi = MSI_MSI;
3150             size_nports_qsets(adapter);
3151         }
3152         err = pci_enable_msi(pdev);
3153         if (err) {
3154             dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3155                 " err=%d\n", err);
3156             goto err_free_dev;
3157         }
3158         adapter->flags |= CXGB4VF_USING_MSI;
3159     }
3160
3161     /* Now that we know how many "ports" we have and what interrupt
3162      * mechanism we're going to use, we can configure our queue resources.
3163      */
3164     cfg_queues(adapter);
3165
3166     /*
3167      * The "card" is now ready to go.  If any errors occur during device
3168      * registration we do not fail the whole "card" but rather proceed
3169      * only with the ports we manage to register successfully.  However we
3170      * must register at least one net device.
3171      */
3172     for_each_port(adapter, pidx) {
3173         struct port_info *pi = netdev_priv(adapter->port[pidx]);
3174         netdev = adapter->port[pidx];
3175         if (netdev == NULL)
3176             continue;
3177
3178         netif_set_real_num_tx_queues(netdev, pi->nqsets);
3179         netif_set_real_num_rx_queues(netdev, pi->nqsets);
3180
3181         err = register_netdev(netdev);
3182         if (err) {
3183             dev_warn(&pdev->dev, "cannot register net device %s,"
3184                  " skipping\n", netdev->name);
3185             continue;
3186         }
3187
3188         netif_carrier_off(netdev);
3189         set_bit(pidx, &adapter->registered_device_map);
3190     }
3191     if (adapter->registered_device_map == 0) {
3192         dev_err(&pdev->dev, "could not register any net devices\n");
3193         err = -EINVAL;
3194         goto err_disable_interrupts;
3195     }
3196
3197     /*
3198      * Set up our debugfs entries.
3199      */
3200     if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3201         adapter->debugfs_root =
3202             debugfs_create_dir(pci_name(pdev),
3203                        cxgb4vf_debugfs_root);
3204         setup_debugfs(adapter);
3205     }
3206
3207     /*
3208      * Print a short notice on the existence and configuration of the new
3209      * VF network device ...
3210      */
3211     for_each_port(adapter, pidx) {
3212         dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3213              adapter->port[pidx]->name,
3214              (adapter->flags & CXGB4VF_USING_MSIX) ? "MSI-X" :
3215              (adapter->flags & CXGB4VF_USING_MSI)  ? "MSI" : "");
3216     }
3217
3218     /*
3219      * Return success!
3220      */
3221     return 0;
3222
3223     /*
3224      * Error recovery and exit code.  Unwind state that's been created
3225      * so far and return the error.
3226      */
3227 err_disable_interrupts:
3228     if (adapter->flags & CXGB4VF_USING_MSIX) {
3229         pci_disable_msix(adapter->pdev);
3230         adapter->flags &= ~CXGB4VF_USING_MSIX;
3231     } else if (adapter->flags & CXGB4VF_USING_MSI) {
3232         pci_disable_msi(adapter->pdev);
3233         adapter->flags &= ~CXGB4VF_USING_MSI;
3234     }
3235
3236 err_free_dev:
3237     for_each_port(adapter, pidx) {
3238         netdev = adapter->port[pidx];
3239         if (netdev == NULL)
3240             continue;
3241         pi = netdev_priv(netdev);
3242         if (pi->viid)
3243             t4vf_free_vi(adapter, pi->viid);
3244         if (test_bit(pidx, &adapter->registered_device_map))
3245             unregister_netdev(netdev);
3246         free_netdev(netdev);
3247     }
3248
3249     if (!is_t4(adapter->params.chip))
3250         iounmap(adapter->bar2);
3251
3252 err_unmap_bar0:
3253     iounmap(adapter->regs);
3254
3255 err_free_adapter:
3256     kfree(adapter->mbox_log);
3257     kfree(adapter);
3258
3259 err_release_regions:
3260     pci_release_regions(pdev);
3261     pci_clear_master(pdev);
3262
3263 err_disable_device:
3264     pci_disable_device(pdev);
3265
3266     return err;
3267 }
3268
3269 /*
3270  * "Remove" a device: tear down all kernel and driver state created in the
3271  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3272  * that this is called "remove_one" in the PF Driver.)
3273  */
3274 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3275 {
3276     struct adapter *adapter = pci_get_drvdata(pdev);
3277     struct hash_mac_addr *entry, *tmp;
3278
3279     /*
3280      * Tear down driver state associated with device.
3281      */
3282     if (adapter) {
3283         int pidx;
3284
3285         /*
3286          * Stop all of our activity.  Unregister network port,
3287          * disable interrupts, etc.
3288          */
3289         for_each_port(adapter, pidx)
3290             if (test_bit(pidx, &adapter->registered_device_map))
3291                 unregister_netdev(adapter->port[pidx]);
3292         t4vf_sge_stop(adapter);
3293         if (adapter->flags & CXGB4VF_USING_MSIX) {
3294             pci_disable_msix(adapter->pdev);
3295             adapter->flags &= ~CXGB4VF_USING_MSIX;
3296         } else if (adapter->flags & CXGB4VF_USING_MSI) {
3297             pci_disable_msi(adapter->pdev);
3298             adapter->flags &= ~CXGB4VF_USING_MSI;
3299         }
3300
3301         /*
3302          * Tear down our debugfs entries.
3303          */
3304         if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3305             cleanup_debugfs(adapter);
3306             debugfs_remove_recursive(adapter->debugfs_root);
3307         }
3308
3309         /*
3310          * Free all of the various resources which we've acquired ...
3311          */
3312         t4vf_free_sge_resources(adapter);
3313         for_each_port(adapter, pidx) {
3314             struct net_device *netdev = adapter->port[pidx];
3315             struct port_info *pi;
3316
3317             if (netdev == NULL)
3318                 continue;
3319
3320             pi = netdev_priv(netdev);
3321             if (pi->viid)
3322                 t4vf_free_vi(adapter, pi->viid);
3323             free_netdev(netdev);
3324         }
3325         iounmap(adapter->regs);
3326         if (!is_t4(adapter->params.chip))
3327             iounmap(adapter->bar2);
3328         kfree(adapter->mbox_log);
3329         list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
3330                      list) {
3331             list_del(&entry->list);
3332             kfree(entry);
3333         }
3334         kfree(adapter);
3335     }
3336
3337     /*
3338      * Disable the device and release its PCI resources.
3339      */
3340     pci_disable_device(pdev);
3341     pci_clear_master(pdev);
3342     pci_release_regions(pdev);
3343 }
3344
3345 /*
3346  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3347  * delivery.
3348  */
3349 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3350 {
3351     struct adapter *adapter;
3352     int pidx;
3353
3354     adapter = pci_get_drvdata(pdev);
3355     if (!adapter)
3356         return;
3357
3358     /* Disable all Virtual Interfaces.  This will shut down the
3359      * delivery of all ingress packets into the chip for these
3360      * Virtual Interfaces.
3361      */
3362     for_each_port(adapter, pidx)
3363         if (test_bit(pidx, &adapter->registered_device_map))
3364             unregister_netdev(adapter->port[pidx]);
3365
3366     /* Free up all Queues which will prevent further DMA and
3367      * Interrupts allowing various internal pathways to drain.
3368      */
3369     t4vf_sge_stop(adapter);
3370     if (adapter->flags & CXGB4VF_USING_MSIX) {
3371         pci_disable_msix(adapter->pdev);
3372         adapter->flags &= ~CXGB4VF_USING_MSIX;
3373     } else if (adapter->flags & CXGB4VF_USING_MSI) {
3374         pci_disable_msi(adapter->pdev);
3375         adapter->flags &= ~CXGB4VF_USING_MSI;
3376     }
3377
3378     /*
3379      * Free up all Queues which will prevent further DMA and
3380      * Interrupts allowing various internal pathways to drain.
3381      */
3382     t4vf_free_sge_resources(adapter);
3383     pci_set_drvdata(pdev, NULL);
3384 }
3385
3386 /* Macros needed to support the PCI Device ID Table ...
3387  */
3388 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3389     static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3390 #define CH_PCI_DEVICE_ID_FUNCTION   0x8
3391
3392 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3393         { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3394
3395 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3396
3397 #include "../cxgb4/t4_pci_id_tbl.h"
3398
3399 MODULE_DESCRIPTION(DRV_DESC);
3400 MODULE_AUTHOR("Chelsio Communications");
3401 MODULE_LICENSE("Dual BSD/GPL");
3402 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3403
3404 static struct pci_driver cxgb4vf_driver = {
3405     .name       = KBUILD_MODNAME,
3406     .id_table   = cxgb4vf_pci_tbl,
3407     .probe      = cxgb4vf_pci_probe,
3408     .remove     = cxgb4vf_pci_remove,
3409     .shutdown   = cxgb4vf_pci_shutdown,
3410 };
3411
3412 /*
3413  * Initialize global driver state.
3414  */
3415 static int __init cxgb4vf_module_init(void)
3416 {
3417     int ret;
3418
3419     /*
3420      * Vet our module parameters.
3421      */
3422     if (msi != MSI_MSIX && msi != MSI_MSI) {
3423         pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3424             msi, MSI_MSIX, MSI_MSI);
3425         return -EINVAL;
3426     }
3427
3428     /* Debugfs support is optional, debugfs will warn if this fails */
3429     cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3430
3431     ret = pci_register_driver(&cxgb4vf_driver);
3432     if (ret < 0)
3433         debugfs_remove(cxgb4vf_debugfs_root);
3434     return ret;
3435 }
3436
3437 /*
3438  * Tear down global driver state.
3439  */
3440 static void __exit cxgb4vf_module_exit(void)
3441 {
3442     pci_unregister_driver(&cxgb4vf_driver);
3443     debugfs_remove(cxgb4vf_debugfs_root);
3444 }
3445
3446 module_init(cxgb4vf_module_init);
3447 module_exit(cxgb4vf_module_exit);