Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2015 Cavium, Inc.
0004  */
0005 
0006 #include <linux/module.h>
0007 #include <linux/interrupt.h>
0008 #include <linux/pci.h>
0009 #include <linux/etherdevice.h>
0010 #include <linux/of.h>
0011 #include <linux/if_vlan.h>
0012 
0013 #include "nic_reg.h"
0014 #include "nic.h"
0015 #include "q_struct.h"
0016 #include "thunder_bgx.h"
0017 
0018 #define DRV_NAME    "nicpf"
0019 #define DRV_VERSION "1.0"
0020 
0021 #define NIC_VF_PER_MBX_REG      64
0022 
0023 struct hw_info {
0024     u8      bgx_cnt;
0025     u8      chans_per_lmac;
0026     u8      chans_per_bgx; /* Rx/Tx chans */
0027     u8      chans_per_rgx;
0028     u8      chans_per_lbk;
0029     u16     cpi_cnt;
0030     u16     rssi_cnt;
0031     u16     rss_ind_tbl_size;
0032     u16     tl4_cnt;
0033     u16     tl3_cnt;
0034     u8      tl2_cnt;
0035     u8      tl1_cnt;
0036     bool        tl1_per_bgx; /* TL1 per BGX or per LMAC */
0037 };
0038 
0039 struct nicpf {
0040     struct pci_dev      *pdev;
0041     struct hw_info          *hw;
0042     u8          node;
0043     unsigned int        flags;
0044     u8          num_vf_en;      /* No of VF enabled */
0045     bool            vf_enabled[MAX_NUM_VFS_SUPPORTED];
0046     void __iomem        *reg_base;       /* Register start address */
0047     u8          num_sqs_en; /* Secondary qsets enabled */
0048     u64         nicvf[MAX_NUM_VFS_SUPPORTED];
0049     u8          vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
0050     u8          pqs_vf[MAX_NUM_VFS_SUPPORTED];
0051     bool            sqs_used[MAX_NUM_VFS_SUPPORTED];
0052     struct pkind_cfg    pkind;
0053 #define NIC_SET_VF_LMAC_MAP(bgx, lmac)  (((bgx & 0xF) << 4) | (lmac & 0xF))
0054 #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map)   ((map >> 4) & 0xF)
0055 #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)  (map & 0xF)
0056     u8          *vf_lmac_map;
0057     u16         cpi_base[MAX_NUM_VFS_SUPPORTED];
0058     u16         rssi_base[MAX_NUM_VFS_SUPPORTED];
0059 
0060     /* MSI-X */
0061     u8          num_vec;
0062     unsigned int        irq_allocated[NIC_PF_MSIX_VECTORS];
0063     char            irq_name[NIC_PF_MSIX_VECTORS][20];
0064 };
0065 
0066 /* Supported devices */
0067 static const struct pci_device_id nic_id_table[] = {
0068     { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
0069     { 0, }  /* end of table */
0070 };
0071 
0072 MODULE_AUTHOR("Sunil Goutham");
0073 MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver");
0074 MODULE_LICENSE("GPL v2");
0075 MODULE_VERSION(DRV_VERSION);
0076 MODULE_DEVICE_TABLE(pci, nic_id_table);
0077 
0078 /* The Cavium ThunderX network controller can *only* be found in SoCs
0079  * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
0080  * registers on this platform are implicitly strongly ordered with respect
0081  * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
0082  * with no memory barriers in this driver.  The readq()/writeq() functions add
0083  * explicit ordering operation which in this case are redundant, and only
0084  * add overhead.
0085  */
0086 
0087 /* Register read/write APIs */
0088 static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val)
0089 {
0090     writeq_relaxed(val, nic->reg_base + offset);
0091 }
0092 
0093 static u64 nic_reg_read(struct nicpf *nic, u64 offset)
0094 {
0095     return readq_relaxed(nic->reg_base + offset);
0096 }
0097 
0098 /* PF -> VF mailbox communication APIs */
0099 static void nic_enable_mbx_intr(struct nicpf *nic)
0100 {
0101     int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
0102 
0103 #define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
0104 
0105     /* Clear it, to avoid spurious interrupts (if any) */
0106     nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
0107 
0108     /* Enable mailbox interrupt for all VFs */
0109     nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
0110     /* One mailbox intr enable reg per 64 VFs */
0111     if (vf_cnt > 64) {
0112         nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
0113                   INTR_MASK(vf_cnt - 64));
0114         nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
0115                   INTR_MASK(vf_cnt - 64));
0116     }
0117 }
0118 
0119 static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
0120 {
0121     nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf));
0122 }
0123 
0124 static u64 nic_get_mbx_addr(int vf)
0125 {
0126     return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT);
0127 }
0128 
0129 /* Send a mailbox message to VF
0130  * @vf: vf to which this message to be sent
0131  * @mbx: Message to be sent
0132  */
0133 static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
0134 {
0135     void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf);
0136     u64 *msg = (u64 *)mbx;
0137 
0138     /* In first revision HW, mbox interrupt is triggerred
0139      * when PF writes to MBOX(1), in next revisions when
0140      * PF writes to MBOX(0)
0141      */
0142     if (pass1_silicon(nic->pdev)) {
0143         /* see the comment for nic_reg_write()/nic_reg_read()
0144          * functions above
0145          */
0146         writeq_relaxed(msg[0], mbx_addr);
0147         writeq_relaxed(msg[1], mbx_addr + 8);
0148     } else {
0149         writeq_relaxed(msg[1], mbx_addr + 8);
0150         writeq_relaxed(msg[0], mbx_addr);
0151     }
0152 }
0153 
0154 /* Responds to VF's READY message with VF's
0155  * ID, node, MAC address e.t.c
0156  * @vf: VF which sent READY message
0157  */
0158 static void nic_mbx_send_ready(struct nicpf *nic, int vf)
0159 {
0160     union nic_mbx mbx = {};
0161     int bgx_idx, lmac;
0162     const char *mac;
0163 
0164     mbx.nic_cfg.msg = NIC_MBOX_MSG_READY;
0165     mbx.nic_cfg.vf_id = vf;
0166 
0167     mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
0168 
0169     if (vf < nic->num_vf_en) {
0170         bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0171         lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0172 
0173         mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
0174         if (mac)
0175             ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
0176     }
0177     mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
0178     mbx.nic_cfg.node_id = nic->node;
0179 
0180     mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
0181 
0182     nic_send_msg_to_vf(nic, vf, &mbx);
0183 }
0184 
0185 /* ACKs VF's mailbox message
0186  * @vf: VF to which ACK to be sent
0187  */
0188 static void nic_mbx_send_ack(struct nicpf *nic, int vf)
0189 {
0190     union nic_mbx mbx = {};
0191 
0192     mbx.msg.msg = NIC_MBOX_MSG_ACK;
0193     nic_send_msg_to_vf(nic, vf, &mbx);
0194 }
0195 
0196 /* NACKs VF's mailbox message that PF is not able to
0197  * complete the action
0198  * @vf: VF to which ACK to be sent
0199  */
0200 static void nic_mbx_send_nack(struct nicpf *nic, int vf)
0201 {
0202     union nic_mbx mbx = {};
0203 
0204     mbx.msg.msg = NIC_MBOX_MSG_NACK;
0205     nic_send_msg_to_vf(nic, vf, &mbx);
0206 }
0207 
0208 /* Flush all in flight receive packets to memory and
0209  * bring down an active RQ
0210  */
0211 static int nic_rcv_queue_sw_sync(struct nicpf *nic)
0212 {
0213     u16 timeout = ~0x00;
0214 
0215     nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01);
0216     /* Wait till sync cycle is finished */
0217     while (timeout) {
0218         if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1)
0219             break;
0220         timeout--;
0221     }
0222     nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00);
0223     if (!timeout) {
0224         dev_err(&nic->pdev->dev, "Receive queue software sync failed");
0225         return 1;
0226     }
0227     return 0;
0228 }
0229 
0230 /* Get BGX Rx/Tx stats and respond to VF's request */
0231 static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
0232 {
0233     int bgx_idx, lmac;
0234     union nic_mbx mbx = {};
0235 
0236     bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
0237     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
0238 
0239     mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
0240     mbx.bgx_stats.vf_id = bgx->vf_id;
0241     mbx.bgx_stats.rx = bgx->rx;
0242     mbx.bgx_stats.idx = bgx->idx;
0243     if (bgx->rx)
0244         mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx,
0245                                 lmac, bgx->idx);
0246     else
0247         mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx,
0248                                 lmac, bgx->idx);
0249     nic_send_msg_to_vf(nic, bgx->vf_id, &mbx);
0250 }
0251 
0252 /* Update hardware min/max frame size */
0253 static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
0254 {
0255     int bgx, lmac, lmac_cnt;
0256     u64 lmac_credits;
0257 
0258     if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS))
0259         return 1;
0260 
0261     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0262     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0263     lmac += bgx * MAX_LMAC_PER_BGX;
0264 
0265     new_frs += VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
0266 
0267     /* Update corresponding LMAC credits */
0268     lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
0269     lmac_credits = nic_reg_read(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8));
0270     lmac_credits &= ~(0xFFFFFULL << 12);
0271     lmac_credits |= (((((48 * 1024) / lmac_cnt) - new_frs) / 16) << 12);
0272     nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credits);
0273 
0274     /* Enforce MTU in HW
0275      * This config is supported only from 88xx pass 2.0 onwards.
0276      */
0277     if (!pass1_silicon(nic->pdev))
0278         nic_reg_write(nic,
0279                   NIC_PF_LMAC_0_7_CFG2 + (lmac * 8), new_frs);
0280     return 0;
0281 }
0282 
0283 /* Set minimum transmit packet size */
0284 static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
0285 {
0286     int lmac, max_lmac;
0287     u16 sdevid;
0288     u64 lmac_cfg;
0289 
0290     /* There is a issue in HW where-in while sending GSO sized
0291      * pkts as part of TSO, if pkt len falls below this size
0292      * NIC will zero PAD packet and also updates IP total length.
0293      * Hence set this value to lessthan min pkt size of MAC+IP+TCP
0294      * headers, BGX will do the padding to transmit 64 byte pkt.
0295      */
0296     if (size > 52)
0297         size = 52;
0298 
0299     pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
0300     /* 81xx's RGX has only one LMAC */
0301     if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
0302         max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
0303     else
0304         max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
0305 
0306     for (lmac = 0; lmac < max_lmac; lmac++) {
0307         lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
0308         lmac_cfg &= ~(0xF << 2);
0309         lmac_cfg |= ((size / 4) << 2);
0310         nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg);
0311     }
0312 }
0313 
0314 /* Function to check number of LMACs present and set VF::LMAC mapping.
0315  * Mapping will be used while initializing channels.
0316  */
0317 static void nic_set_lmac_vf_mapping(struct nicpf *nic)
0318 {
0319     unsigned bgx_map = bgx_get_map(nic->node);
0320     int bgx, next_bgx_lmac = 0;
0321     int lmac, lmac_cnt = 0;
0322     u64 lmac_credit;
0323 
0324     nic->num_vf_en = 0;
0325 
0326     for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
0327         if (!(bgx_map & (1 << bgx)))
0328             continue;
0329         lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
0330         for (lmac = 0; lmac < lmac_cnt; lmac++)
0331             nic->vf_lmac_map[next_bgx_lmac++] =
0332                         NIC_SET_VF_LMAC_MAP(bgx, lmac);
0333         nic->num_vf_en += lmac_cnt;
0334 
0335         /* Program LMAC credits */
0336         lmac_credit = (1ull << 1); /* channel credit enable */
0337         lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */
0338         /* 48KB BGX Tx buffer size, each unit is of size 16bytes */
0339         lmac_credit |= (((((48 * 1024) / lmac_cnt) -
0340                 NIC_HW_MAX_FRS) / 16) << 12);
0341         lmac = bgx * MAX_LMAC_PER_BGX;
0342         for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++)
0343             nic_reg_write(nic,
0344                       NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
0345                       lmac_credit);
0346 
0347         /* On CN81XX there are only 8 VFs but max possible no of
0348          * interfaces are 9.
0349          */
0350         if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
0351             nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
0352             break;
0353         }
0354     }
0355 }
0356 
0357 static void nic_get_hw_info(struct nicpf *nic)
0358 {
0359     u16 sdevid;
0360     struct hw_info *hw = nic->hw;
0361 
0362     pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
0363 
0364     switch (sdevid) {
0365     case PCI_SUBSYS_DEVID_88XX_NIC_PF:
0366         hw->bgx_cnt = MAX_BGX_PER_CN88XX;
0367         hw->chans_per_lmac = 16;
0368         hw->chans_per_bgx = 128;
0369         hw->cpi_cnt = 2048;
0370         hw->rssi_cnt = 4096;
0371         hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
0372         hw->tl3_cnt = 256;
0373         hw->tl2_cnt = 64;
0374         hw->tl1_cnt = 2;
0375         hw->tl1_per_bgx = true;
0376         break;
0377     case PCI_SUBSYS_DEVID_81XX_NIC_PF:
0378         hw->bgx_cnt = MAX_BGX_PER_CN81XX;
0379         hw->chans_per_lmac = 8;
0380         hw->chans_per_bgx = 32;
0381         hw->chans_per_rgx = 8;
0382         hw->chans_per_lbk = 24;
0383         hw->cpi_cnt = 512;
0384         hw->rssi_cnt = 256;
0385         hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
0386         hw->tl3_cnt = 64;
0387         hw->tl2_cnt = 16;
0388         hw->tl1_cnt = 10;
0389         hw->tl1_per_bgx = false;
0390         break;
0391     case PCI_SUBSYS_DEVID_83XX_NIC_PF:
0392         hw->bgx_cnt = MAX_BGX_PER_CN83XX;
0393         hw->chans_per_lmac = 8;
0394         hw->chans_per_bgx = 32;
0395         hw->chans_per_lbk = 64;
0396         hw->cpi_cnt = 2048;
0397         hw->rssi_cnt = 1024;
0398         hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
0399         hw->tl3_cnt = 256;
0400         hw->tl2_cnt = 64;
0401         hw->tl1_cnt = 18;
0402         hw->tl1_per_bgx = false;
0403         break;
0404     }
0405     hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
0406 }
0407 
0408 #define BGX0_BLOCK 8
0409 #define BGX1_BLOCK 9
0410 
0411 static void nic_init_hw(struct nicpf *nic)
0412 {
0413     int i;
0414     u64 cqm_cfg;
0415 
0416     /* Enable NIC HW block */
0417     nic_reg_write(nic, NIC_PF_CFG, 0x3);
0418 
0419     /* Enable backpressure */
0420     nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
0421 
0422     /* TNS and TNS bypass modes are present only on 88xx
0423      * Also offset of this CSR has changed in 81xx and 83xx.
0424      */
0425     if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
0426         /* Disable TNS mode on both interfaces */
0427         nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
0428                   (NIC_TNS_BYPASS_MODE << 7) |
0429                   BGX0_BLOCK | (1ULL << 16));
0430         nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
0431                   (NIC_TNS_BYPASS_MODE << 7) |
0432                   BGX1_BLOCK | (1ULL << 16));
0433     } else {
0434         /* Configure timestamp generation timeout to 10us */
0435         for (i = 0; i < nic->hw->bgx_cnt; i++)
0436             nic_reg_write(nic, NIC_PF_INTFX_SEND_CFG | (i << 3),
0437                       (1ULL << 16));
0438     }
0439 
0440     nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
0441               (1ULL << 63) | BGX0_BLOCK);
0442     nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
0443               (1ULL << 63) | BGX1_BLOCK);
0444 
0445     /* PKIND configuration */
0446     nic->pkind.minlen = 0;
0447     nic->pkind.maxlen = NIC_HW_MAX_FRS + VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
0448     nic->pkind.lenerr_en = 1;
0449     nic->pkind.rx_hdr = 0;
0450     nic->pkind.hdr_sl = 0;
0451 
0452     for (i = 0; i < NIC_MAX_PKIND; i++)
0453         nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3),
0454                   *(u64 *)&nic->pkind);
0455 
0456     nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS);
0457 
0458     /* Timer config */
0459     nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
0460 
0461     /* Enable VLAN ethertype matching and stripping */
0462     nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
0463               (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
0464 
0465     /* Check if HW expected value is higher (could be in future chips) */
0466     cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
0467     if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
0468         nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
0469 }
0470 
0471 /* Channel parse index configuration */
0472 static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
0473 {
0474     struct hw_info *hw = nic->hw;
0475     u32 vnic, bgx, lmac, chan;
0476     u32 padd, cpi_count = 0;
0477     u64 cpi_base, cpi, rssi_base, rssi;
0478     u8  qset, rq_idx = 0;
0479 
0480     vnic = cfg->vf_id;
0481     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
0482     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
0483 
0484     chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
0485     cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
0486     rssi_base = vnic * hw->rss_ind_tbl_size;
0487 
0488     /* Rx channel configuration */
0489     nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
0490               (1ull << 63) | (vnic << 0));
0491     nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3),
0492               ((u64)cfg->cpi_alg << 62) | (cpi_base << 48));
0493 
0494     if (cfg->cpi_alg == CPI_ALG_NONE)
0495         cpi_count = 1;
0496     else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */
0497         cpi_count = 8;
0498     else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */
0499         cpi_count = 16;
0500     else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */
0501         cpi_count = NIC_MAX_CPI_PER_LMAC;
0502 
0503     /* RSS Qset, Qidx mapping */
0504     qset = cfg->vf_id;
0505     rssi = rssi_base;
0506     for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) {
0507         nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
0508                   (qset << 3) | rq_idx);
0509         rq_idx++;
0510     }
0511 
0512     rssi = 0;
0513     cpi = cpi_base;
0514     for (; cpi < (cpi_base + cpi_count); cpi++) {
0515         /* Determine port to channel adder */
0516         if (cfg->cpi_alg != CPI_ALG_DIFF)
0517             padd = cpi % cpi_count;
0518         else
0519             padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
0520 
0521         /* Leave RSS_SIZE as '0' to disable RSS */
0522         if (pass1_silicon(nic->pdev)) {
0523             nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
0524                       (vnic << 24) | (padd << 16) |
0525                       (rssi_base + rssi));
0526         } else {
0527             /* Set MPI_ALG to '0' to disable MCAM parsing */
0528             nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
0529                       (padd << 16));
0530             /* MPI index is same as CPI if MPI_ALG is not enabled */
0531             nic_reg_write(nic, NIC_PF_MPI_0_2047_CFG | (cpi << 3),
0532                       (vnic << 24) | (rssi_base + rssi));
0533         }
0534 
0535         if ((rssi + 1) >= cfg->rq_cnt)
0536             continue;
0537 
0538         if (cfg->cpi_alg == CPI_ALG_VLAN)
0539             rssi++;
0540         else if (cfg->cpi_alg == CPI_ALG_VLAN16)
0541             rssi = ((cpi - cpi_base) & 0xe) >> 1;
0542         else if (cfg->cpi_alg == CPI_ALG_DIFF)
0543             rssi = ((cpi - cpi_base) & 0x38) >> 3;
0544     }
0545     nic->cpi_base[cfg->vf_id] = cpi_base;
0546     nic->rssi_base[cfg->vf_id] = rssi_base;
0547 }
0548 
0549 /* Responsds to VF with its RSS indirection table size */
0550 static void nic_send_rss_size(struct nicpf *nic, int vf)
0551 {
0552     union nic_mbx mbx = {};
0553 
0554     mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
0555     mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
0556     nic_send_msg_to_vf(nic, vf, &mbx);
0557 }
0558 
0559 /* Receive side scaling configuration
0560  * configure:
0561  * - RSS index
0562  * - indir table i.e hash::RQ mapping
0563  * - no of hash bits to consider
0564  */
0565 static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
0566 {
0567     u8  qset, idx = 0;
0568     u64 cpi_cfg, cpi_base, rssi_base, rssi;
0569     u64 idx_addr;
0570 
0571     rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset;
0572 
0573     rssi = rssi_base;
0574 
0575     for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
0576         u8 svf = cfg->ind_tbl[idx] >> 3;
0577 
0578         if (svf)
0579             qset = nic->vf_sqs[cfg->vf_id][svf - 1];
0580         else
0581             qset = cfg->vf_id;
0582         nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
0583                   (qset << 3) | (cfg->ind_tbl[idx] & 0x7));
0584         idx++;
0585     }
0586 
0587     cpi_base = nic->cpi_base[cfg->vf_id];
0588     if (pass1_silicon(nic->pdev))
0589         idx_addr = NIC_PF_CPI_0_2047_CFG;
0590     else
0591         idx_addr = NIC_PF_MPI_0_2047_CFG;
0592     cpi_cfg = nic_reg_read(nic, idx_addr | (cpi_base << 3));
0593     cpi_cfg &= ~(0xFULL << 20);
0594     cpi_cfg |= (cfg->hash_bits << 20);
0595     nic_reg_write(nic, idx_addr | (cpi_base << 3), cpi_cfg);
0596 }
0597 
0598 /* 4 level transmit side scheduler configutation
0599  * for TNS bypass mode
0600  *
0601  * Sample configuration for SQ0 on 88xx
0602  * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
0603  * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
0604  * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
0605  * VNIC3-SQ0 -> TL4(24)  -> TL3[6]   -> TL2[1]  -> TL1[0] -> BGX0
0606  * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1
0607  * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1
0608  * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
0609  * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
0610  */
0611 static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
0612                    struct sq_cfg_msg *sq)
0613 {
0614     struct hw_info *hw = nic->hw;
0615     u32 bgx, lmac, chan;
0616     u32 tl2, tl3, tl4;
0617     u32 rr_quantum;
0618     u8 sq_idx = sq->sq_num;
0619     u8 pqs_vnic;
0620     int svf;
0621 
0622     if (sq->sqs_mode)
0623         pqs_vnic = nic->pqs_vf[vnic];
0624     else
0625         pqs_vnic = vnic;
0626 
0627     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
0628     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
0629 
0630     /* 24 bytes for FCS, IPG and preamble */
0631     rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
0632 
0633     /* For 88xx 0-511 TL4 transmits via BGX0 and
0634      * 512-1023 TL4s transmit via BGX1.
0635      */
0636     if (hw->tl1_per_bgx) {
0637         tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
0638         if (!sq->sqs_mode) {
0639             tl4 += (lmac * MAX_QUEUES_PER_QSET);
0640         } else {
0641             for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
0642                 if (nic->vf_sqs[pqs_vnic][svf] == vnic)
0643                     break;
0644             }
0645             tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
0646             tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
0647             tl4 += (svf * MAX_QUEUES_PER_QSET);
0648         }
0649     } else {
0650         tl4 = (vnic * MAX_QUEUES_PER_QSET);
0651     }
0652     tl4 += sq_idx;
0653 
0654     tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
0655     nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
0656               ((u64)vnic << NIC_QS_ID_SHIFT) |
0657               ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
0658     nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3),
0659               ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
0660 
0661     nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
0662 
0663     /* On 88xx 0-127 channels are for BGX0 and
0664      * 127-255 channels for BGX1.
0665      *
0666      * On 81xx/83xx TL3_CHAN reg should be configured with channel
0667      * within LMAC i.e 0-7 and not the actual channel number like on 88xx
0668      */
0669     chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
0670     if (hw->tl1_per_bgx)
0671         nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
0672     else
0673         nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
0674 
0675     /* Enable backpressure on the channel */
0676     nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
0677 
0678     tl2 = tl3 >> 2;
0679     nic_reg_write(nic, NIC_PF_TL3A_0_63_CFG | (tl2 << 3), tl2);
0680     nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
0681     /* No priorities as of now */
0682     nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
0683 
0684     /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
0685      * on 81xx/83xx TL2 needs to be configured to transmit to one of the
0686      * possible LMACs.
0687      *
0688      * This register doesn't exist on 88xx.
0689      */
0690     if (!hw->tl1_per_bgx)
0691         nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
0692                   lmac + (bgx * MAX_LMAC_PER_BGX));
0693 }
0694 
0695 /* Send primary nicvf pointer to secondary QS's VF */
0696 static void nic_send_pnicvf(struct nicpf *nic, int sqs)
0697 {
0698     union nic_mbx mbx = {};
0699 
0700     mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
0701     mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]];
0702     nic_send_msg_to_vf(nic, sqs, &mbx);
0703 }
0704 
0705 /* Send SQS's nicvf pointer to primary QS's VF */
0706 static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf)
0707 {
0708     union nic_mbx mbx = {};
0709     int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id];
0710 
0711     mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
0712     mbx.nicvf.sqs_id = nicvf->sqs_id;
0713     mbx.nicvf.nicvf = nic->nicvf[sqs_id];
0714     nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx);
0715 }
0716 
0717 /* Find next available Qset that can be assigned as a
0718  * secondary Qset to a VF.
0719  */
0720 static int nic_nxt_avail_sqs(struct nicpf *nic)
0721 {
0722     int sqs;
0723 
0724     for (sqs = 0; sqs < nic->num_sqs_en; sqs++) {
0725         if (!nic->sqs_used[sqs])
0726             nic->sqs_used[sqs] = true;
0727         else
0728             continue;
0729         return sqs + nic->num_vf_en;
0730     }
0731     return -1;
0732 }
0733 
0734 /* Allocate additional Qsets for requested VF */
0735 static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs)
0736 {
0737     union nic_mbx mbx = {};
0738     int idx, alloc_qs = 0;
0739     int sqs_id;
0740 
0741     if (!nic->num_sqs_en)
0742         goto send_mbox;
0743 
0744     for (idx = 0; idx < sqs->qs_count; idx++) {
0745         sqs_id = nic_nxt_avail_sqs(nic);
0746         if (sqs_id < 0)
0747             break;
0748         nic->vf_sqs[sqs->vf_id][idx] = sqs_id;
0749         nic->pqs_vf[sqs_id] = sqs->vf_id;
0750         alloc_qs++;
0751     }
0752 
0753 send_mbox:
0754     mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
0755     mbx.sqs_alloc.vf_id = sqs->vf_id;
0756     mbx.sqs_alloc.qs_count = alloc_qs;
0757     nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
0758 }
0759 
0760 static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
0761 {
0762     int bgx_idx, lmac_idx;
0763 
0764     if (lbk->vf_id >= nic->num_vf_en)
0765         return -1;
0766 
0767     bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
0768     lmac_idx = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
0769 
0770     bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
0771 
0772     /* Enable moving average calculation.
0773      * Keep the LVL/AVG delay to HW enforced minimum so that, not too many
0774      * packets sneek in between average calculations.
0775      */
0776     nic_reg_write(nic, NIC_PF_CQ_AVG_CFG,
0777               (BIT_ULL(20) | 0x2ull << 14 | 0x1));
0778     nic_reg_write(nic, NIC_PF_RRM_AVG_CFG,
0779               (BIT_ULL(20) | 0x3ull << 14 | 0x1));
0780 
0781     return 0;
0782 }
0783 
0784 /* Reset statistics counters */
0785 static int nic_reset_stat_counters(struct nicpf *nic,
0786                    int vf, struct reset_stat_cfg *cfg)
0787 {
0788     int i, stat, qnum;
0789     u64 reg_addr;
0790 
0791     for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
0792         if (cfg->rx_stat_mask & BIT(i)) {
0793             reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
0794                    (vf << NIC_QS_ID_SHIFT) |
0795                    (i << 3);
0796             nic_reg_write(nic, reg_addr, 0);
0797         }
0798     }
0799 
0800     for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
0801         if (cfg->tx_stat_mask & BIT(i)) {
0802             reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
0803                    (vf << NIC_QS_ID_SHIFT) |
0804                    (i << 3);
0805             nic_reg_write(nic, reg_addr, 0);
0806         }
0807     }
0808 
0809     for (i = 0; i <= 15; i++) {
0810         qnum = i >> 1;
0811         stat = i & 1 ? 1 : 0;
0812         reg_addr = (vf << NIC_QS_ID_SHIFT) |
0813                (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
0814         if (cfg->rq_stat_mask & BIT(i)) {
0815             reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
0816             nic_reg_write(nic, reg_addr, 0);
0817         }
0818         if (cfg->sq_stat_mask & BIT(i)) {
0819             reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
0820             nic_reg_write(nic, reg_addr, 0);
0821         }
0822     }
0823 
0824     return 0;
0825 }
0826 
0827 static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
0828 {
0829     u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
0830     u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
0831                   (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
0832 
0833     /* Configure tunnel parsing parameters */
0834     nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
0835               (1ULL << 63 | UDP_GENEVE_PORT_NUM));
0836     nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
0837               ((7ULL << 61) | prot_def));
0838     nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
0839               ((7ULL << 61) | prot_def));
0840     nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
0841               ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
0842     nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
0843               ((0xfULL << 60) | vxlan_prot_def));
0844 }
0845 
0846 static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
0847 {
0848     int bgx, lmac;
0849 
0850     nic->vf_enabled[vf] = enable;
0851 
0852     if (vf >= nic->num_vf_en)
0853         return;
0854 
0855     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0856     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0857 
0858     bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
0859 }
0860 
0861 static void nic_pause_frame(struct nicpf *nic, int vf, struct pfc *cfg)
0862 {
0863     int bgx, lmac;
0864     struct pfc pfc;
0865     union nic_mbx mbx = {};
0866 
0867     if (vf >= nic->num_vf_en)
0868         return;
0869     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0870     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0871 
0872     if (cfg->get) {
0873         bgx_lmac_get_pfc(nic->node, bgx, lmac, &pfc);
0874         mbx.pfc.msg = NIC_MBOX_MSG_PFC;
0875         mbx.pfc.autoneg = pfc.autoneg;
0876         mbx.pfc.fc_rx = pfc.fc_rx;
0877         mbx.pfc.fc_tx = pfc.fc_tx;
0878         nic_send_msg_to_vf(nic, vf, &mbx);
0879     } else {
0880         bgx_lmac_set_pfc(nic->node, bgx, lmac, cfg);
0881         nic_mbx_send_ack(nic, vf);
0882     }
0883 }
0884 
0885 /* Enable or disable HW timestamping by BGX for pkts received on a LMAC */
0886 static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
0887 {
0888     struct pkind_cfg *pkind;
0889     u8 lmac, bgx_idx;
0890     u64 pkind_val, pkind_idx;
0891 
0892     if (vf >= nic->num_vf_en)
0893         return;
0894 
0895     bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0896     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0897 
0898     pkind_idx = lmac + bgx_idx * MAX_LMAC_PER_BGX;
0899     pkind_val = nic_reg_read(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3));
0900     pkind = (struct pkind_cfg *)&pkind_val;
0901 
0902     if (ptp->enable && !pkind->hdr_sl) {
0903         /* Skiplen to exclude 8byte timestamp while parsing pkt
0904          * If not configured, will result in L2 errors.
0905          */
0906         pkind->hdr_sl = 4;
0907         /* Adjust max packet length allowed */
0908         pkind->maxlen += (pkind->hdr_sl * 2);
0909         bgx_config_timestamping(nic->node, bgx_idx, lmac, true);
0910         nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7 | (1 << 3),
0911                   (ETYPE_ALG_ENDPARSE << 16) | ETH_P_1588);
0912     } else if (!ptp->enable && pkind->hdr_sl) {
0913         pkind->maxlen -= (pkind->hdr_sl * 2);
0914         pkind->hdr_sl = 0;
0915         bgx_config_timestamping(nic->node, bgx_idx, lmac, false);
0916         nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7 | (1 << 3),
0917                   (ETYPE_ALG_SKIP << 16) | ETH_P_8021Q);
0918     }
0919 
0920     nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
0921 }
0922 
0923 /* Get BGX LMAC link status and update corresponding VF
0924  * if there is a change, valid only if internal L2 switch
0925  * is not present otherwise VF link is always treated as up
0926  */
0927 static void nic_link_status_get(struct nicpf *nic, u8 vf)
0928 {
0929     union nic_mbx mbx = {};
0930     struct bgx_link_status link;
0931     u8 bgx, lmac;
0932 
0933     mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
0934 
0935     /* Get BGX, LMAC indices for the VF */
0936     bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0937     lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
0938 
0939     /* Get interface link status */
0940     bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
0941 
0942     /* Send a mbox message to VF with current link status */
0943     mbx.link_status.link_up = link.link_up;
0944     mbx.link_status.duplex = link.duplex;
0945     mbx.link_status.speed = link.speed;
0946     mbx.link_status.mac_type = link.mac_type;
0947 
0948     /* reply with link status */
0949     nic_send_msg_to_vf(nic, vf, &mbx);
0950 }
0951 
0952 /* Interrupt handler to handle mailbox messages from VFs */
0953 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
0954 {
0955     union nic_mbx mbx = {};
0956     u64 *mbx_data;
0957     u64 mbx_addr;
0958     u64 reg_addr;
0959     u64 cfg;
0960     int bgx, lmac;
0961     int i;
0962     int ret = 0;
0963 
0964     mbx_addr = nic_get_mbx_addr(vf);
0965     mbx_data = (u64 *)&mbx;
0966 
0967     for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
0968         *mbx_data = nic_reg_read(nic, mbx_addr);
0969         mbx_data++;
0970         mbx_addr += sizeof(u64);
0971     }
0972 
0973     dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
0974         __func__, mbx.msg.msg, vf);
0975     switch (mbx.msg.msg) {
0976     case NIC_MBOX_MSG_READY:
0977         nic_mbx_send_ready(nic, vf);
0978         return;
0979     case NIC_MBOX_MSG_QS_CFG:
0980         reg_addr = NIC_PF_QSET_0_127_CFG |
0981                (mbx.qs.num << NIC_QS_ID_SHIFT);
0982         cfg = mbx.qs.cfg;
0983         /* Check if its a secondary Qset */
0984         if (vf >= nic->num_vf_en) {
0985             cfg = cfg & (~0x7FULL);
0986             /* Assign this Qset to primary Qset's VF */
0987             cfg |= nic->pqs_vf[vf];
0988         }
0989         nic_reg_write(nic, reg_addr, cfg);
0990         break;
0991     case NIC_MBOX_MSG_RQ_CFG:
0992         reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG |
0993                (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
0994                (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
0995         nic_reg_write(nic, reg_addr, mbx.rq.cfg);
0996         /* Enable CQE_RX2_S extension in CQE_RX descriptor.
0997          * This gets appended by default on 81xx/83xx chips,
0998          * for consistency enabling the same on 88xx pass2
0999          * where this is introduced.
1000          */
1001         if (pass2_silicon(nic->pdev))
1002             nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
1003         if (!pass1_silicon(nic->pdev))
1004             nic_enable_tunnel_parsing(nic, vf);
1005         break;
1006     case NIC_MBOX_MSG_RQ_BP_CFG:
1007         reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
1008                (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
1009                (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
1010         nic_reg_write(nic, reg_addr, mbx.rq.cfg);
1011         break;
1012     case NIC_MBOX_MSG_RQ_SW_SYNC:
1013         ret = nic_rcv_queue_sw_sync(nic);
1014         break;
1015     case NIC_MBOX_MSG_RQ_DROP_CFG:
1016         reg_addr = NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG |
1017                (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
1018                (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
1019         nic_reg_write(nic, reg_addr, mbx.rq.cfg);
1020         break;
1021     case NIC_MBOX_MSG_SQ_CFG:
1022         reg_addr = NIC_PF_QSET_0_127_SQ_0_7_CFG |
1023                (mbx.sq.qs_num << NIC_QS_ID_SHIFT) |
1024                (mbx.sq.sq_num << NIC_Q_NUM_SHIFT);
1025         nic_reg_write(nic, reg_addr, mbx.sq.cfg);
1026         nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
1027         break;
1028     case NIC_MBOX_MSG_SET_MAC:
1029         if (vf >= nic->num_vf_en) {
1030             ret = -1; /* NACK */
1031             break;
1032         }
1033         lmac = mbx.mac.vf_id;
1034         bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
1035         lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
1036         bgx_set_lmac_mac(nic->node, bgx, lmac, mbx.mac.mac_addr);
1037         break;
1038     case NIC_MBOX_MSG_SET_MAX_FRS:
1039         ret = nic_update_hw_frs(nic, mbx.frs.max_frs,
1040                     mbx.frs.vf_id);
1041         break;
1042     case NIC_MBOX_MSG_CPI_CFG:
1043         nic_config_cpi(nic, &mbx.cpi_cfg);
1044         break;
1045     case NIC_MBOX_MSG_RSS_SIZE:
1046         nic_send_rss_size(nic, vf);
1047         return;
1048     case NIC_MBOX_MSG_RSS_CFG:
1049     case NIC_MBOX_MSG_RSS_CFG_CONT:
1050         nic_config_rss(nic, &mbx.rss_cfg);
1051         break;
1052     case NIC_MBOX_MSG_CFG_DONE:
1053         /* Last message of VF config msg sequence */
1054         nic_enable_vf(nic, vf, true);
1055         break;
1056     case NIC_MBOX_MSG_SHUTDOWN:
1057         /* First msg in VF teardown sequence */
1058         if (vf >= nic->num_vf_en)
1059             nic->sqs_used[vf - nic->num_vf_en] = false;
1060         nic->pqs_vf[vf] = 0;
1061         nic_enable_vf(nic, vf, false);
1062         break;
1063     case NIC_MBOX_MSG_ALLOC_SQS:
1064         nic_alloc_sqs(nic, &mbx.sqs_alloc);
1065         return;
1066     case NIC_MBOX_MSG_NICVF_PTR:
1067         nic->nicvf[vf] = mbx.nicvf.nicvf;
1068         break;
1069     case NIC_MBOX_MSG_PNICVF_PTR:
1070         nic_send_pnicvf(nic, vf);
1071         return;
1072     case NIC_MBOX_MSG_SNICVF_PTR:
1073         nic_send_snicvf(nic, &mbx.nicvf);
1074         return;
1075     case NIC_MBOX_MSG_BGX_STATS:
1076         nic_get_bgx_stats(nic, &mbx.bgx_stats);
1077         return;
1078     case NIC_MBOX_MSG_LOOPBACK:
1079         ret = nic_config_loopback(nic, &mbx.lbk);
1080         break;
1081     case NIC_MBOX_MSG_RESET_STAT_COUNTER:
1082         ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
1083         break;
1084     case NIC_MBOX_MSG_PFC:
1085         nic_pause_frame(nic, vf, &mbx.pfc);
1086         return;
1087     case NIC_MBOX_MSG_PTP_CFG:
1088         nic_config_timestamp(nic, vf, &mbx.ptp);
1089         break;
1090     case NIC_MBOX_MSG_RESET_XCAST:
1091         if (vf >= nic->num_vf_en) {
1092             ret = -1; /* NACK */
1093             break;
1094         }
1095         bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1096         lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1097         bgx_reset_xcast_mode(nic->node, bgx, lmac,
1098                      vf < NIC_VF_PER_MBX_REG ? vf :
1099                      vf - NIC_VF_PER_MBX_REG);
1100         break;
1101 
1102     case NIC_MBOX_MSG_ADD_MCAST:
1103         if (vf >= nic->num_vf_en) {
1104             ret = -1; /* NACK */
1105             break;
1106         }
1107         bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1108         lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1109         bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
1110                     mbx.xcast.mac,
1111                     vf < NIC_VF_PER_MBX_REG ? vf :
1112                     vf - NIC_VF_PER_MBX_REG);
1113         break;
1114 
1115     case NIC_MBOX_MSG_SET_XCAST:
1116         if (vf >= nic->num_vf_en) {
1117             ret = -1; /* NACK */
1118             break;
1119         }
1120         bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1121         lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1122         bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
1123         break;
1124     case NIC_MBOX_MSG_BGX_LINK_CHANGE:
1125         if (vf >= nic->num_vf_en) {
1126             ret = -1; /* NACK */
1127             break;
1128         }
1129         nic_link_status_get(nic, vf);
1130         return;
1131     default:
1132         dev_err(&nic->pdev->dev,
1133             "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
1134         break;
1135     }
1136 
1137     if (!ret) {
1138         nic_mbx_send_ack(nic, vf);
1139     } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
1140         dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
1141             mbx.msg.msg, vf);
1142         nic_mbx_send_nack(nic, vf);
1143     }
1144 }
1145 
1146 static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
1147 {
1148     struct nicpf *nic = (struct nicpf *)nic_irq;
1149     int mbx;
1150     u64 intr;
1151     u8  vf;
1152 
1153     if (irq == nic->irq_allocated[NIC_PF_INTR_ID_MBOX0])
1154         mbx = 0;
1155     else
1156         mbx = 1;
1157 
1158     intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
1159     dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
1160     for (vf = 0; vf < NIC_VF_PER_MBX_REG; vf++) {
1161         if (intr & (1ULL << vf)) {
1162             dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
1163                 vf + (mbx * NIC_VF_PER_MBX_REG));
1164 
1165             nic_handle_mbx_intr(nic, vf +
1166                         (mbx * NIC_VF_PER_MBX_REG));
1167             nic_clear_mbx_intr(nic, vf, mbx);
1168         }
1169     }
1170     return IRQ_HANDLED;
1171 }
1172 
1173 static void nic_free_all_interrupts(struct nicpf *nic)
1174 {
1175     int irq;
1176 
1177     for (irq = 0; irq < nic->num_vec; irq++) {
1178         if (nic->irq_allocated[irq])
1179             free_irq(nic->irq_allocated[irq], nic);
1180         nic->irq_allocated[irq] = 0;
1181     }
1182 }
1183 
1184 static int nic_register_interrupts(struct nicpf *nic)
1185 {
1186     int i, ret, irq;
1187     nic->num_vec = pci_msix_vec_count(nic->pdev);
1188 
1189     /* Enable MSI-X */
1190     ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec,
1191                     PCI_IRQ_MSIX);
1192     if (ret < 0) {
1193         dev_err(&nic->pdev->dev,
1194             "Request for #%d msix vectors failed, returned %d\n",
1195                nic->num_vec, ret);
1196         return ret;
1197     }
1198 
1199     /* Register mailbox interrupt handler */
1200     for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
1201         sprintf(nic->irq_name[i],
1202             "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
1203 
1204         irq = pci_irq_vector(nic->pdev, i);
1205         ret = request_irq(irq, nic_mbx_intr_handler, 0,
1206                   nic->irq_name[i], nic);
1207         if (ret)
1208             goto fail;
1209 
1210         nic->irq_allocated[i] = irq;
1211     }
1212 
1213     /* Enable mailbox interrupt */
1214     nic_enable_mbx_intr(nic);
1215     return 0;
1216 
1217 fail:
1218     dev_err(&nic->pdev->dev, "Request irq failed\n");
1219     nic_free_all_interrupts(nic);
1220     pci_free_irq_vectors(nic->pdev);
1221     nic->num_vec = 0;
1222     return ret;
1223 }
1224 
1225 static void nic_unregister_interrupts(struct nicpf *nic)
1226 {
1227     nic_free_all_interrupts(nic);
1228     pci_free_irq_vectors(nic->pdev);
1229     nic->num_vec = 0;
1230 }
1231 
1232 static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
1233 {
1234     int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
1235     u16 total_vf;
1236 
1237     /* Secondary Qsets are needed only if CPU count is
1238      * morethan MAX_QUEUES_PER_QSET.
1239      */
1240     if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
1241         return 0;
1242 
1243     /* Check if its a multi-node environment */
1244     if (nr_node_ids > 1)
1245         sqs_per_vf = MAX_SQS_PER_VF;
1246 
1247     pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV);
1248     pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf);
1249     return min(total_vf - vf_en, vf_en * sqs_per_vf);
1250 }
1251 
1252 static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
1253 {
1254     int pos = 0;
1255     int vf_en;
1256     int err;
1257     u16 total_vf_cnt;
1258 
1259     pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
1260     if (!pos) {
1261         dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n");
1262         return -ENODEV;
1263     }
1264 
1265     pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt);
1266     if (total_vf_cnt < nic->num_vf_en)
1267         nic->num_vf_en = total_vf_cnt;
1268 
1269     if (!total_vf_cnt)
1270         return 0;
1271 
1272     vf_en = nic->num_vf_en;
1273     nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en);
1274     vf_en += nic->num_sqs_en;
1275 
1276     err = pci_enable_sriov(pdev, vf_en);
1277     if (err) {
1278         dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
1279             vf_en);
1280         nic->num_vf_en = 0;
1281         return err;
1282     }
1283 
1284     dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
1285          vf_en);
1286 
1287     nic->flags |= NIC_SRIOV_ENABLED;
1288     return 0;
1289 }
1290 
1291 static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1292 {
1293     struct device *dev = &pdev->dev;
1294     struct nicpf *nic;
1295     u8     max_lmac;
1296     int    err;
1297 
1298     BUILD_BUG_ON(sizeof(union nic_mbx) > 16);
1299 
1300     nic = devm_kzalloc(dev, sizeof(*nic), GFP_KERNEL);
1301     if (!nic)
1302         return -ENOMEM;
1303 
1304     nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
1305     if (!nic->hw)
1306         return -ENOMEM;
1307 
1308     pci_set_drvdata(pdev, nic);
1309 
1310     nic->pdev = pdev;
1311 
1312     err = pci_enable_device(pdev);
1313     if (err) {
1314         pci_set_drvdata(pdev, NULL);
1315         return dev_err_probe(dev, err, "Failed to enable PCI device\n");
1316     }
1317 
1318     err = pci_request_regions(pdev, DRV_NAME);
1319     if (err) {
1320         dev_err(dev, "PCI request regions failed 0x%x\n", err);
1321         goto err_disable_device;
1322     }
1323 
1324     err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
1325     if (err) {
1326         dev_err(dev, "Unable to get usable DMA configuration\n");
1327         goto err_release_regions;
1328     }
1329 
1330     /* MAP PF's configuration registers */
1331     nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
1332     if (!nic->reg_base) {
1333         dev_err(dev, "Cannot map config register space, aborting\n");
1334         err = -ENOMEM;
1335         goto err_release_regions;
1336     }
1337 
1338     nic->node = nic_get_node_id(pdev);
1339 
1340     /* Get HW capability info */
1341     nic_get_hw_info(nic);
1342 
1343     /* Allocate memory for LMAC tracking elements */
1344     err = -ENOMEM;
1345     max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
1346 
1347     nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8),
1348                           GFP_KERNEL);
1349     if (!nic->vf_lmac_map)
1350         goto err_release_regions;
1351 
1352     /* Initialize hardware */
1353     nic_init_hw(nic);
1354 
1355     nic_set_lmac_vf_mapping(nic);
1356 
1357     /* Register interrupts */
1358     err = nic_register_interrupts(nic);
1359     if (err)
1360         goto err_release_regions;
1361 
1362     /* Configure SRIOV */
1363     err = nic_sriov_init(pdev, nic);
1364     if (err)
1365         goto err_unregister_interrupts;
1366 
1367     return 0;
1368 
1369 err_unregister_interrupts:
1370     nic_unregister_interrupts(nic);
1371 err_release_regions:
1372     pci_release_regions(pdev);
1373 err_disable_device:
1374     pci_disable_device(pdev);
1375     pci_set_drvdata(pdev, NULL);
1376     return err;
1377 }
1378 
1379 static void nic_remove(struct pci_dev *pdev)
1380 {
1381     struct nicpf *nic = pci_get_drvdata(pdev);
1382 
1383     if (!nic)
1384         return;
1385 
1386     if (nic->flags & NIC_SRIOV_ENABLED)
1387         pci_disable_sriov(pdev);
1388 
1389     nic_unregister_interrupts(nic);
1390     pci_release_regions(pdev);
1391 
1392     pci_disable_device(pdev);
1393     pci_set_drvdata(pdev, NULL);
1394 }
1395 
1396 static struct pci_driver nic_driver = {
1397     .name = DRV_NAME,
1398     .id_table = nic_id_table,
1399     .probe = nic_probe,
1400     .remove = nic_remove,
1401 };
1402 
1403 static int __init nic_init_module(void)
1404 {
1405     pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
1406 
1407     return pci_register_driver(&nic_driver);
1408 }
1409 
1410 static void __exit nic_cleanup_module(void)
1411 {
1412     pci_unregister_driver(&nic_driver);
1413 }
1414 
1415 module_init(nic_init_module);
1416 module_exit(nic_cleanup_module);