intel/fm10k/fm10k_main.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright(c) 2013 - 2019 Intel Corporation. */
0003
0004 #include <linux/types.h>
0005 #include <linux/module.h>
0006 #include <net/ipv6.h>
0007 #include <net/ip.h>
0008 #include <net/tcp.h>
0009 #include <linux/if_macvlan.h>
0010 #include <linux/prefetch.h>
0011
0012 #include "fm10k.h"
0013
0014 #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
0015 char fm10k_driver_name[] = "fm10k";
0016 static const char fm10k_driver_string[] = DRV_SUMMARY;
0017 static const char fm10k_copyright[] =
0018     "Copyright(c) 2013 - 2019 Intel Corporation.";
0019
0020 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
0021 MODULE_DESCRIPTION(DRV_SUMMARY);
0022 MODULE_LICENSE("GPL v2");
0023
0024 /* single workqueue for entire fm10k driver */
0025 struct workqueue_struct *fm10k_workqueue;
0026
0027 /**
0028  * fm10k_init_module - Driver Registration Routine
0029  *
0030  * fm10k_init_module is the first routine called when the driver is
0031  * loaded.  All it does is register with the PCI subsystem.
0032  **/
0033 static int __init fm10k_init_module(void)
0034 {
0035     pr_info("%s\n", fm10k_driver_string);
0036     pr_info("%s\n", fm10k_copyright);
0037
0038     /* create driver workqueue */
0039     fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
0040                       fm10k_driver_name);
0041     if (!fm10k_workqueue)
0042         return -ENOMEM;
0043
0044     fm10k_dbg_init();
0045
0046     return fm10k_register_pci_driver();
0047 }
0048 module_init(fm10k_init_module);
0049
0050 /**
0051  * fm10k_exit_module - Driver Exit Cleanup Routine
0052  *
0053  * fm10k_exit_module is called just before the driver is removed
0054  * from memory.
0055  **/
0056 static void __exit fm10k_exit_module(void)
0057 {
0058     fm10k_unregister_pci_driver();
0059
0060     fm10k_dbg_exit();
0061
0062     /* destroy driver workqueue */
0063     destroy_workqueue(fm10k_workqueue);
0064 }
0065 module_exit(fm10k_exit_module);
0066
0067 static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
0068                     struct fm10k_rx_buffer *bi)
0069 {
0070     struct page *page = bi->page;
0071     dma_addr_t dma;
0072
0073     /* Only page will be NULL if buffer was consumed */
0074     if (likely(page))
0075         return true;
0076
0077     /* alloc new page for storage */
0078     page = dev_alloc_page();
0079     if (unlikely(!page)) {
0080         rx_ring->rx_stats.alloc_failed++;
0081         return false;
0082     }
0083
0084     /* map page for use */
0085     dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
0086
0087     /* if mapping failed free memory back to system since
0088      * there isn't much point in holding memory we can't use
0089      */
0090     if (dma_mapping_error(rx_ring->dev, dma)) {
0091         __free_page(page);
0092
0093         rx_ring->rx_stats.alloc_failed++;
0094         return false;
0095     }
0096
0097     bi->dma = dma;
0098     bi->page = page;
0099     bi->page_offset = 0;
0100
0101     return true;
0102 }
0103
0104 /**
0105  * fm10k_alloc_rx_buffers - Replace used receive buffers
0106  * @rx_ring: ring to place buffers on
0107  * @cleaned_count: number of buffers to replace
0108  **/
0109 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
0110 {
0111     union fm10k_rx_desc *rx_desc;
0112     struct fm10k_rx_buffer *bi;
0113     u16 i = rx_ring->next_to_use;
0114
0115     /* nothing to do */
0116     if (!cleaned_count)
0117         return;
0118
0119     rx_desc = FM10K_RX_DESC(rx_ring, i);
0120     bi = &rx_ring->rx_buffer[i];
0121     i -= rx_ring->count;
0122
0123     do {
0124         if (!fm10k_alloc_mapped_page(rx_ring, bi))
0125             break;
0126
0127         /* Refresh the desc even if buffer_addrs didn't change
0128          * because each write-back erases this info.
0129          */
0130         rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
0131
0132         rx_desc++;
0133         bi++;
0134         i++;
0135         if (unlikely(!i)) {
0136             rx_desc = FM10K_RX_DESC(rx_ring, 0);
0137             bi = rx_ring->rx_buffer;
0138             i -= rx_ring->count;
0139         }
0140
0141         /* clear the status bits for the next_to_use descriptor */
0142         rx_desc->d.staterr = 0;
0143
0144         cleaned_count--;
0145     } while (cleaned_count);
0146
0147     i += rx_ring->count;
0148
0149     if (rx_ring->next_to_use != i) {
0150         /* record the next descriptor to use */
0151         rx_ring->next_to_use = i;
0152
0153         /* update next to alloc since we have filled the ring */
0154         rx_ring->next_to_alloc = i;
0155
0156         /* Force memory writes to complete before letting h/w
0157          * know there are new descriptors to fetch.  (Only
0158          * applicable for weak-ordered memory model archs,
0159          * such as IA-64).
0160          */
0161         wmb();
0162
0163         /* notify hardware of new descriptors */
0164         writel(i, rx_ring->tail);
0165     }
0166 }
0167
0168 /**
0169  * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
0170  * @rx_ring: rx descriptor ring to store buffers on
0171  * @old_buff: donor buffer to have page reused
0172  *
0173  * Synchronizes page for reuse by the interface
0174  **/
0175 static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
0176                 struct fm10k_rx_buffer *old_buff)
0177 {
0178     struct fm10k_rx_buffer *new_buff;
0179     u16 nta = rx_ring->next_to_alloc;
0180
0181     new_buff = &rx_ring->rx_buffer[nta];
0182
0183     /* update, and store next to alloc */
0184     nta++;
0185     rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
0186
0187     /* transfer page from old buffer to new buffer */
0188     *new_buff = *old_buff;
0189
0190     /* sync the buffer for use by the device */
0191     dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
0192                      old_buff->page_offset,
0193                      FM10K_RX_BUFSZ,
0194                      DMA_FROM_DEVICE);
0195 }
0196
0197 static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
0198                     struct page *page,
0199                     unsigned int __maybe_unused truesize)
0200 {
0201     /* avoid re-using remote and pfmemalloc pages */
0202     if (!dev_page_is_reusable(page))
0203         return false;
0204
0205 #if (PAGE_SIZE < 8192)
0206     /* if we are only owner of page we can reuse it */
0207     if (unlikely(page_count(page) != 1))
0208         return false;
0209
0210     /* flip page offset to other buffer */
0211     rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
0212 #else
0213     /* move offset up to the next cache line */
0214     rx_buffer->page_offset += truesize;
0215
0216     if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
0217         return false;
0218 #endif
0219
0220     /* Even if we own the page, we are not allowed to use atomic_set()
0221      * This would break get_page_unless_zero() users.
0222      */
0223     page_ref_inc(page);
0224
0225     return true;
0226 }
0227
0228 /**
0229  * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
0230  * @rx_buffer: buffer containing page to add
0231  * @size: packet size from rx_desc
0232  * @rx_desc: descriptor containing length of buffer written by hardware
0233  * @skb: sk_buff to place the data into
0234  *
0235  * This function will add the data contained in rx_buffer->page to the skb.
0236  * This is done either through a direct copy if the data in the buffer is
0237  * less than the skb header size, otherwise it will just attach the page as
0238  * a frag to the skb.
0239  *
0240  * The function will then update the page offset if necessary and return
0241  * true if the buffer can be reused by the interface.
0242  **/
0243 static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
0244                   unsigned int size,
0245                   union fm10k_rx_desc *rx_desc,
0246                   struct sk_buff *skb)
0247 {
0248     struct page *page = rx_buffer->page;
0249     unsigned char *va = page_address(page) + rx_buffer->page_offset;
0250 #if (PAGE_SIZE < 8192)
0251     unsigned int truesize = FM10K_RX_BUFSZ;
0252 #else
0253     unsigned int truesize = ALIGN(size, 512);
0254 #endif
0255     unsigned int pull_len;
0256
0257     if (unlikely(skb_is_nonlinear(skb)))
0258         goto add_tail_frag;
0259
0260     if (likely(size <= FM10K_RX_HDR_LEN)) {
0261         memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
0262
0263         /* page is reusable, we can reuse buffer as-is */
0264         if (dev_page_is_reusable(page))
0265             return true;
0266
0267         /* this page cannot be reused so discard it */
0268         __free_page(page);
0269         return false;
0270     }
0271
0272     /* we need the header to contain the greater of either ETH_HLEN or
0273      * 60 bytes if the skb->len is less than 60 for skb_pad.
0274      */
0275     pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);
0276
0277     /* align pull length to size of long to optimize memcpy performance */
0278     memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
0279
0280     /* update all of the pointers */
0281     va += pull_len;
0282     size -= pull_len;
0283
0284 add_tail_frag:
0285     skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
0286             (unsigned long)va & ~PAGE_MASK, size, truesize);
0287
0288     return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
0289 }
0290
0291 static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
0292                          union fm10k_rx_desc *rx_desc,
0293                          struct sk_buff *skb)
0294 {
0295     unsigned int size = le16_to_cpu(rx_desc->w.length);
0296     struct fm10k_rx_buffer *rx_buffer;
0297     struct page *page;
0298
0299     rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
0300     page = rx_buffer->page;
0301     prefetchw(page);
0302
0303     if (likely(!skb)) {
0304         void *page_addr = page_address(page) +
0305                   rx_buffer->page_offset;
0306
0307         /* prefetch first cache line of first page */
0308         net_prefetch(page_addr);
0309
0310         /* allocate a skb to store the frags */
0311         skb = napi_alloc_skb(&rx_ring->q_vector->napi,
0312                      FM10K_RX_HDR_LEN);
0313         if (unlikely(!skb)) {
0314             rx_ring->rx_stats.alloc_failed++;
0315             return NULL;
0316         }
0317
0318         /* we will be copying header into skb->data in
0319          * pskb_may_pull so it is in our interest to prefetch
0320          * it now to avoid a possible cache miss
0321          */
0322         prefetchw(skb->data);
0323     }
0324
0325     /* we are reusing so sync this buffer for CPU use */
0326     dma_sync_single_range_for_cpu(rx_ring->dev,
0327                       rx_buffer->dma,
0328                       rx_buffer->page_offset,
0329                       size,
0330                       DMA_FROM_DEVICE);
0331
0332     /* pull page into skb */
0333     if (fm10k_add_rx_frag(rx_buffer, size, rx_desc, skb)) {
0334         /* hand second half of page back to the ring */
0335         fm10k_reuse_rx_page(rx_ring, rx_buffer);
0336     } else {
0337         /* we are not reusing the buffer so unmap it */
0338         dma_unmap_page(rx_ring->dev, rx_buffer->dma,
0339                    PAGE_SIZE, DMA_FROM_DEVICE);
0340     }
0341
0342     /* clear contents of rx_buffer */
0343     rx_buffer->page = NULL;
0344
0345     return skb;
0346 }
0347
0348 static inline void fm10k_rx_checksum(struct fm10k_ring *ring,
0349                      union fm10k_rx_desc *rx_desc,
0350                      struct sk_buff *skb)
0351 {
0352     skb_checksum_none_assert(skb);
0353
0354     /* Rx checksum disabled via ethtool */
0355     if (!(ring->netdev->features & NETIF_F_RXCSUM))
0356         return;
0357
0358     /* TCP/UDP checksum error bit is set */
0359     if (fm10k_test_staterr(rx_desc,
0360                    FM10K_RXD_STATUS_L4E |
0361                    FM10K_RXD_STATUS_L4E2 |
0362                    FM10K_RXD_STATUS_IPE |
0363                    FM10K_RXD_STATUS_IPE2)) {
0364         ring->rx_stats.csum_err++;
0365         return;
0366     }
0367
0368     /* It must be a TCP or UDP packet with a valid checksum */
0369     if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2))
0370         skb->encapsulation = true;
0371     else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS))
0372         return;
0373
0374     skb->ip_summed = CHECKSUM_UNNECESSARY;
0375
0376     ring->rx_stats.csum_good++;
0377 }
0378
0379 #define FM10K_RSS_L4_TYPES_MASK \
0380     (BIT(FM10K_RSSTYPE_IPV4_TCP) | \
0381      BIT(FM10K_RSSTYPE_IPV4_UDP) | \
0382      BIT(FM10K_RSSTYPE_IPV6_TCP) | \
0383      BIT(FM10K_RSSTYPE_IPV6_UDP))
0384
0385 static inline void fm10k_rx_hash(struct fm10k_ring *ring,
0386                  union fm10k_rx_desc *rx_desc,
0387                  struct sk_buff *skb)
0388 {
0389     u16 rss_type;
0390
0391     if (!(ring->netdev->features & NETIF_F_RXHASH))
0392         return;
0393
0394     rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK;
0395     if (!rss_type)
0396         return;
0397
0398     skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss),
0399              (BIT(rss_type) & FM10K_RSS_L4_TYPES_MASK) ?
0400              PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
0401 }
0402
0403 static void fm10k_type_trans(struct fm10k_ring *rx_ring,
0404                  union fm10k_rx_desc __maybe_unused *rx_desc,
0405                  struct sk_buff *skb)
0406 {
0407     struct net_device *dev = rx_ring->netdev;
0408     struct fm10k_l2_accel *l2_accel = rcu_dereference_bh(rx_ring->l2_accel);
0409
0410     /* check to see if DGLORT belongs to a MACVLAN */
0411     if (l2_accel) {
0412         u16 idx = le16_to_cpu(FM10K_CB(skb)->fi.w.dglort) - 1;
0413
0414         idx -= l2_accel->dglort;
0415         if (idx < l2_accel->size && l2_accel->macvlan[idx])
0416             dev = l2_accel->macvlan[idx];
0417         else
0418             l2_accel = NULL;
0419     }
0420
0421     /* Record Rx queue, or update macvlan statistics */
0422     if (!l2_accel)
0423         skb_record_rx_queue(skb, rx_ring->queue_index);
0424     else
0425         macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
0426                  false);
0427
0428     skb->protocol = eth_type_trans(skb, dev);
0429 }
0430
0431 /**
0432  * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
0433  * @rx_ring: rx descriptor ring packet is being transacted on
0434  * @rx_desc: pointer to the EOP Rx descriptor
0435  * @skb: pointer to current skb being populated
0436  *
0437  * This function checks the ring, descriptor, and packet information in
0438  * order to populate the hash, checksum, VLAN, timestamp, protocol, and
0439  * other fields within the skb.
0440  **/
0441 static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
0442                          union fm10k_rx_desc *rx_desc,
0443                          struct sk_buff *skb)
0444 {
0445     unsigned int len = skb->len;
0446
0447     fm10k_rx_hash(rx_ring, rx_desc, skb);
0448
0449     fm10k_rx_checksum(rx_ring, rx_desc, skb);
0450
0451     FM10K_CB(skb)->tstamp = rx_desc->q.timestamp;
0452
0453     FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
0454
0455     FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
0456
0457     if (rx_desc->w.vlan) {
0458         u16 vid = le16_to_cpu(rx_desc->w.vlan);
0459
0460         if ((vid & VLAN_VID_MASK) != rx_ring->vid)
0461             __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
0462         else if (vid & VLAN_PRIO_MASK)
0463             __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
0464                            vid & VLAN_PRIO_MASK);
0465     }
0466
0467     fm10k_type_trans(rx_ring, rx_desc, skb);
0468
0469     return len;
0470 }
0471
0472 /**
0473  * fm10k_is_non_eop - process handling of non-EOP buffers
0474  * @rx_ring: Rx ring being processed
0475  * @rx_desc: Rx descriptor for current buffer
0476  *
0477  * This function updates next to clean.  If the buffer is an EOP buffer
0478  * this function exits returning false, otherwise it will place the
0479  * sk_buff in the next buffer to be chained and return true indicating
0480  * that this is in fact a non-EOP buffer.
0481  **/
0482 static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
0483                  union fm10k_rx_desc *rx_desc)
0484 {
0485     u32 ntc = rx_ring->next_to_clean + 1;
0486
0487     /* fetch, update, and store next to clean */
0488     ntc = (ntc < rx_ring->count) ? ntc : 0;
0489     rx_ring->next_to_clean = ntc;
0490
0491     prefetch(FM10K_RX_DESC(rx_ring, ntc));
0492
0493     if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
0494         return false;
0495
0496     return true;
0497 }
0498
0499 /**
0500  * fm10k_cleanup_headers - Correct corrupted or empty headers
0501  * @rx_ring: rx descriptor ring packet is being transacted on
0502  * @rx_desc: pointer to the EOP Rx descriptor
0503  * @skb: pointer to current skb being fixed
0504  *
0505  * Address the case where we are pulling data in on pages only
0506  * and as such no data is present in the skb header.
0507  *
0508  * In addition if skb is not at least 60 bytes we need to pad it so that
0509  * it is large enough to qualify as a valid Ethernet frame.
0510  *
0511  * Returns true if an error was encountered and skb was freed.
0512  **/
0513 static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
0514                   union fm10k_rx_desc *rx_desc,
0515                   struct sk_buff *skb)
0516 {
0517     if (unlikely((fm10k_test_staterr(rx_desc,
0518                      FM10K_RXD_STATUS_RXE)))) {
0519 #define FM10K_TEST_RXD_BIT(rxd, bit) \
0520     ((rxd)->w.csum_err & cpu_to_le16(bit))
0521         if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_ERROR))
0522             rx_ring->rx_stats.switch_errors++;
0523         if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_NO_DESCRIPTOR))
0524             rx_ring->rx_stats.drops++;
0525         if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_PP_ERROR))
0526             rx_ring->rx_stats.pp_errors++;
0527         if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_READY))
0528             rx_ring->rx_stats.link_errors++;
0529         if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_TOO_BIG))
0530             rx_ring->rx_stats.length_errors++;
0531         dev_kfree_skb_any(skb);
0532         rx_ring->rx_stats.errors++;
0533         return true;
0534     }
0535
0536     /* if eth_skb_pad returns an error the skb was freed */
0537     if (eth_skb_pad(skb))
0538         return true;
0539
0540     return false;
0541 }
0542
0543 /**
0544  * fm10k_receive_skb - helper function to handle rx indications
0545  * @q_vector: structure containing interrupt and ring information
0546  * @skb: packet to send up
0547  **/
0548 static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
0549                   struct sk_buff *skb)
0550 {
0551     napi_gro_receive(&q_vector->napi, skb);
0552 }
0553
0554 static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
0555                   struct fm10k_ring *rx_ring,
0556                   int budget)
0557 {
0558     struct sk_buff *skb = rx_ring->skb;
0559     unsigned int total_bytes = 0, total_packets = 0;
0560     u16 cleaned_count = fm10k_desc_unused(rx_ring);
0561
0562     while (likely(total_packets < budget)) {
0563         union fm10k_rx_desc *rx_desc;
0564
0565         /* return some buffers to hardware, one at a time is too slow */
0566         if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
0567             fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
0568             cleaned_count = 0;
0569         }
0570
0571         rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
0572
0573         if (!rx_desc->d.staterr)
0574             break;
0575
0576         /* This memory barrier is needed to keep us from reading
0577          * any other fields out of the rx_desc until we know the
0578          * descriptor has been written back
0579          */
0580         dma_rmb();
0581
0582         /* retrieve a buffer from the ring */
0583         skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
0584
0585         /* exit if we failed to retrieve a buffer */
0586         if (!skb)
0587             break;
0588
0589         cleaned_count++;
0590
0591         /* fetch next buffer in frame if non-eop */
0592         if (fm10k_is_non_eop(rx_ring, rx_desc))
0593             continue;
0594
0595         /* verify the packet layout is correct */
0596         if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
0597             skb = NULL;
0598             continue;
0599         }
0600
0601         /* populate checksum, timestamp, VLAN, and protocol */
0602         total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
0603
0604         fm10k_receive_skb(q_vector, skb);
0605
0606         /* reset skb pointer */
0607         skb = NULL;
0608
0609         /* update budget accounting */
0610         total_packets++;
0611     }
0612
0613     /* place incomplete frames back on ring for completion */
0614     rx_ring->skb = skb;
0615
0616     u64_stats_update_begin(&rx_ring->syncp);
0617     rx_ring->stats.packets += total_packets;
0618     rx_ring->stats.bytes += total_bytes;
0619     u64_stats_update_end(&rx_ring->syncp);
0620     q_vector->rx.total_packets += total_packets;
0621     q_vector->rx.total_bytes += total_bytes;
0622
0623     return total_packets;
0624 }
0625
0626 #define VXLAN_HLEN (sizeof(struct udphdr) + 8)
0627 static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb)
0628 {
0629     struct fm10k_intfc *interface = netdev_priv(skb->dev);
0630
0631     if (interface->vxlan_port != udp_hdr(skb)->dest)
0632         return NULL;
0633
0634     /* return offset of udp_hdr plus 8 bytes for VXLAN header */
0635     return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN);
0636 }
0637
0638 #define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF)
0639 #define NVGRE_TNI htons(0x2000)
0640 struct fm10k_nvgre_hdr {
0641     __be16 flags;
0642     __be16 proto;
0643     __be32 tni;
0644 };
0645
0646 static struct ethhdr *fm10k_gre_is_nvgre(struct sk_buff *skb)
0647 {
0648     struct fm10k_nvgre_hdr *nvgre_hdr;
0649     int hlen = ip_hdrlen(skb);
0650
0651     /* currently only IPv4 is supported due to hlen above */
0652     if (vlan_get_protocol(skb) != htons(ETH_P_IP))
0653         return NULL;
0654
0655     /* our transport header should be NVGRE */
0656     nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen);
0657
0658     /* verify all reserved flags are 0 */
0659     if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS)
0660         return NULL;
0661
0662     /* report start of ethernet header */
0663     if (nvgre_hdr->flags & NVGRE_TNI)
0664         return (struct ethhdr *)(nvgre_hdr + 1);
0665
0666     return (struct ethhdr *)(&nvgre_hdr->tni);
0667 }
0668
0669 __be16 fm10k_tx_encap_offload(struct sk_buff *skb)
0670 {
0671     u8 l4_hdr = 0, inner_l4_hdr = 0, inner_l4_hlen;
0672     struct ethhdr *eth_hdr;
0673
0674     if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
0675         skb->inner_protocol != htons(ETH_P_TEB))
0676         return 0;
0677
0678     switch (vlan_get_protocol(skb)) {
0679     case htons(ETH_P_IP):
0680         l4_hdr = ip_hdr(skb)->protocol;
0681         break;
0682     case htons(ETH_P_IPV6):
0683         l4_hdr = ipv6_hdr(skb)->nexthdr;
0684         break;
0685     default:
0686         return 0;
0687     }
0688
0689     switch (l4_hdr) {
0690     case IPPROTO_UDP:
0691         eth_hdr = fm10k_port_is_vxlan(skb);
0692         break;
0693     case IPPROTO_GRE:
0694         eth_hdr = fm10k_gre_is_nvgre(skb);
0695         break;
0696     default:
0697         return 0;
0698     }
0699
0700     if (!eth_hdr)
0701         return 0;
0702
0703     switch (eth_hdr->h_proto) {
0704     case htons(ETH_P_IP):
0705         inner_l4_hdr = inner_ip_hdr(skb)->protocol;
0706         break;
0707     case htons(ETH_P_IPV6):
0708         inner_l4_hdr = inner_ipv6_hdr(skb)->nexthdr;
0709         break;
0710     default:
0711         return 0;
0712     }
0713
0714     switch (inner_l4_hdr) {
0715     case IPPROTO_TCP:
0716         inner_l4_hlen = inner_tcp_hdrlen(skb);
0717         break;
0718     case IPPROTO_UDP:
0719         inner_l4_hlen = 8;
0720         break;
0721     default:
0722         return 0;
0723     }
0724
0725     /* The hardware allows tunnel offloads only if the combined inner and
0726      * outer header is 184 bytes or less
0727      */
0728     if (skb_inner_transport_header(skb) + inner_l4_hlen -
0729         skb_mac_header(skb) > FM10K_TUNNEL_HEADER_LENGTH)
0730         return 0;
0731
0732     return eth_hdr->h_proto;
0733 }
0734
0735 static int fm10k_tso(struct fm10k_ring *tx_ring,
0736              struct fm10k_tx_buffer *first)
0737 {
0738     struct sk_buff *skb = first->skb;
0739     struct fm10k_tx_desc *tx_desc;
0740     unsigned char *th;
0741     u8 hdrlen;
0742
0743     if (skb->ip_summed != CHECKSUM_PARTIAL)
0744         return 0;
0745
0746     if (!skb_is_gso(skb))
0747         return 0;
0748
0749     /* compute header lengths */
0750     if (skb->encapsulation) {
0751         if (!fm10k_tx_encap_offload(skb))
0752             goto err_vxlan;
0753         th = skb_inner_transport_header(skb);
0754     } else {
0755         th = skb_transport_header(skb);
0756     }
0757
0758     /* compute offset from SOF to transport header and add header len */
0759     hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << 2);
0760
0761     first->tx_flags |= FM10K_TX_FLAGS_CSUM;
0762
0763     /* update gso size and bytecount with header size */
0764     first->gso_segs = skb_shinfo(skb)->gso_segs;
0765     first->bytecount += (first->gso_segs - 1) * hdrlen;
0766
0767     /* populate Tx descriptor header size and mss */
0768     tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
0769     tx_desc->hdrlen = hdrlen;
0770     tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
0771
0772     return 1;
0773
0774 err_vxlan:
0775     tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
0776     if (net_ratelimit())
0777         netdev_err(tx_ring->netdev,
0778                "TSO requested for unsupported tunnel, disabling offload\n");
0779     return -1;
0780 }
0781
0782 static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
0783               struct fm10k_tx_buffer *first)
0784 {
0785     struct sk_buff *skb = first->skb;
0786     struct fm10k_tx_desc *tx_desc;
0787     union {
0788         struct iphdr *ipv4;
0789         struct ipv6hdr *ipv6;
0790         u8 *raw;
0791     } network_hdr;
0792     u8 *transport_hdr;
0793     __be16 frag_off;
0794     __be16 protocol;
0795     u8 l4_hdr = 0;
0796
0797     if (skb->ip_summed != CHECKSUM_PARTIAL)
0798         goto no_csum;
0799
0800     if (skb->encapsulation) {
0801         protocol = fm10k_tx_encap_offload(skb);
0802         if (!protocol) {
0803             if (skb_checksum_help(skb)) {
0804                 dev_warn(tx_ring->dev,
0805                      "failed to offload encap csum!\n");
0806                 tx_ring->tx_stats.csum_err++;
0807             }
0808             goto no_csum;
0809         }
0810         network_hdr.raw = skb_inner_network_header(skb);
0811         transport_hdr = skb_inner_transport_header(skb);
0812     } else {
0813         protocol = vlan_get_protocol(skb);
0814         network_hdr.raw = skb_network_header(skb);
0815         transport_hdr = skb_transport_header(skb);
0816     }
0817
0818     switch (protocol) {
0819     case htons(ETH_P_IP):
0820         l4_hdr = network_hdr.ipv4->protocol;
0821         break;
0822     case htons(ETH_P_IPV6):
0823         l4_hdr = network_hdr.ipv6->nexthdr;
0824         if (likely((transport_hdr - network_hdr.raw) ==
0825                sizeof(struct ipv6hdr)))
0826             break;
0827         ipv6_skip_exthdr(skb, network_hdr.raw - skb->data +
0828                       sizeof(struct ipv6hdr),
0829                  &l4_hdr, &frag_off);
0830         if (unlikely(frag_off))
0831             l4_hdr = NEXTHDR_FRAGMENT;
0832         break;
0833     default:
0834         break;
0835     }
0836
0837     switch (l4_hdr) {
0838     case IPPROTO_TCP:
0839     case IPPROTO_UDP:
0840         break;
0841     case IPPROTO_GRE:
0842         if (skb->encapsulation)
0843             break;
0844         fallthrough;
0845     default:
0846         if (unlikely(net_ratelimit())) {
0847             dev_warn(tx_ring->dev,
0848                  "partial checksum, version=%d l4 proto=%x\n",
0849                  protocol, l4_hdr);
0850         }
0851         skb_checksum_help(skb);
0852         tx_ring->tx_stats.csum_err++;
0853         goto no_csum;
0854     }
0855
0856     /* update TX checksum flag */
0857     first->tx_flags |= FM10K_TX_FLAGS_CSUM;
0858     tx_ring->tx_stats.csum_good++;
0859
0860 no_csum:
0861     /* populate Tx descriptor header size and mss */
0862     tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
0863     tx_desc->hdrlen = 0;
0864     tx_desc->mss = 0;
0865 }
0866
0867 #define FM10K_SET_FLAG(_input, _flag, _result) \
0868     ((_flag <= _result) ? \
0869      ((u32)(_input & _flag) * (_result / _flag)) : \
0870      ((u32)(_input & _flag) / (_flag / _result)))
0871
0872 static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
0873 {
0874     /* set type for advanced descriptor with frame checksum insertion */
0875     u32 desc_flags = 0;
0876
0877     /* set checksum offload bits */
0878     desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
0879                      FM10K_TXD_FLAG_CSUM);
0880
0881     return desc_flags;
0882 }
0883
0884 static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
0885                    struct fm10k_tx_desc *tx_desc, u16 i,
0886                    dma_addr_t dma, unsigned int size, u8 desc_flags)
0887 {
0888     /* set RS and INT for last frame in a cache line */
0889     if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
0890         desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
0891
0892     /* record values to descriptor */
0893     tx_desc->buffer_addr = cpu_to_le64(dma);
0894     tx_desc->flags = desc_flags;
0895     tx_desc->buflen = cpu_to_le16(size);
0896
0897     /* return true if we just wrapped the ring */
0898     return i == tx_ring->count;
0899 }
0900
0901 static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
0902 {
0903     netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
0904
0905     /* Memory barrier before checking head and tail */
0906     smp_mb();
0907
0908     /* Check again in a case another CPU has just made room available */
0909     if (likely(fm10k_desc_unused(tx_ring) < size))
0910         return -EBUSY;
0911
0912     /* A reprieve! - use start_queue because it doesn't call schedule */
0913     netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
0914     ++tx_ring->tx_stats.restart_queue;
0915     return 0;
0916 }
0917
0918 static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
0919 {
0920     if (likely(fm10k_desc_unused(tx_ring) >= size))
0921         return 0;
0922     return __fm10k_maybe_stop_tx(tx_ring, size);
0923 }
0924
0925 static void fm10k_tx_map(struct fm10k_ring *tx_ring,
0926              struct fm10k_tx_buffer *first)
0927 {
0928     struct sk_buff *skb = first->skb;
0929     struct fm10k_tx_buffer *tx_buffer;
0930     struct fm10k_tx_desc *tx_desc;
0931     skb_frag_t *frag;
0932     unsigned char *data;
0933     dma_addr_t dma;
0934     unsigned int data_len, size;
0935     u32 tx_flags = first->tx_flags;
0936     u16 i = tx_ring->next_to_use;
0937     u8 flags = fm10k_tx_desc_flags(skb, tx_flags);
0938
0939     tx_desc = FM10K_TX_DESC(tx_ring, i);
0940
0941     /* add HW VLAN tag */
0942     if (skb_vlan_tag_present(skb))
0943         tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
0944     else
0945         tx_desc->vlan = 0;
0946
0947     size = skb_headlen(skb);
0948     data = skb->data;
0949
0950     dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
0951
0952     data_len = skb->data_len;
0953     tx_buffer = first;
0954
0955     for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
0956         if (dma_mapping_error(tx_ring->dev, dma))
0957             goto dma_error;
0958
0959         /* record length, and DMA address */
0960         dma_unmap_len_set(tx_buffer, len, size);
0961         dma_unmap_addr_set(tx_buffer, dma, dma);
0962
0963         while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
0964             if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
0965                            FM10K_MAX_DATA_PER_TXD, flags)) {
0966                 tx_desc = FM10K_TX_DESC(tx_ring, 0);
0967                 i = 0;
0968             }
0969
0970             dma += FM10K_MAX_DATA_PER_TXD;
0971             size -= FM10K_MAX_DATA_PER_TXD;
0972         }
0973
0974         if (likely(!data_len))
0975             break;
0976
0977         if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
0978                        dma, size, flags)) {
0979             tx_desc = FM10K_TX_DESC(tx_ring, 0);
0980             i = 0;
0981         }
0982
0983         size = skb_frag_size(frag);
0984         data_len -= size;
0985
0986         dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
0987                        DMA_TO_DEVICE);
0988
0989         tx_buffer = &tx_ring->tx_buffer[i];
0990     }
0991
0992     /* write last descriptor with LAST bit set */
0993     flags |= FM10K_TXD_FLAG_LAST;
0994
0995     if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
0996         i = 0;
0997
0998     /* record bytecount for BQL */
0999     netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1000
1001     /* record SW timestamp if HW timestamp is not available */
1002     skb_tx_timestamp(first->skb);
1003
1004     /* Force memory writes to complete before letting h/w know there
1005      * are new descriptors to fetch.  (Only applicable for weak-ordered
1006      * memory model archs, such as IA-64).
1007      *
1008      * We also need this memory barrier to make certain all of the
1009      * status bits have been updated before next_to_watch is written.
1010      */
1011     wmb();
1012
1013     /* set next_to_watch value indicating a packet is present */
1014     first->next_to_watch = tx_desc;
1015
1016     tx_ring->next_to_use = i;
1017
1018     /* Make sure there is space in the ring for the next send. */
1019     fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
1020
1021     /* notify HW of packet */
1022     if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1023         writel(i, tx_ring->tail);
1024     }
1025
1026     return;
1027 dma_error:
1028     dev_err(tx_ring->dev, "TX DMA map failed\n");
1029
1030     /* clear dma mappings for failed tx_buffer map */
1031     for (;;) {
1032         tx_buffer = &tx_ring->tx_buffer[i];
1033         fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
1034         if (tx_buffer == first)
1035             break;
1036         if (i == 0)
1037             i = tx_ring->count;
1038         i--;
1039     }
1040
1041     tx_ring->next_to_use = i;
1042 }
1043
1044 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
1045                   struct fm10k_ring *tx_ring)
1046 {
1047     u16 count = TXD_USE_COUNT(skb_headlen(skb));
1048     struct fm10k_tx_buffer *first;
1049     unsigned short f;
1050     u32 tx_flags = 0;
1051     int tso;
1052
1053     /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
1054      *       + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
1055      *       + 2 desc gap to keep tail from touching head
1056      * otherwise try next time
1057      */
1058     for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
1059         skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
1060
1061         count += TXD_USE_COUNT(skb_frag_size(frag));
1062     }
1063
1064     if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
1065         tx_ring->tx_stats.tx_busy++;
1066         return NETDEV_TX_BUSY;
1067     }
1068
1069     /* record the location of the first descriptor for this packet */
1070     first = &tx_ring->tx_buffer[tx_ring->next_to_use];
1071     first->skb = skb;
1072     first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
1073     first->gso_segs = 1;
1074
1075     /* record initial flags and protocol */
1076     first->tx_flags = tx_flags;
1077
1078     tso = fm10k_tso(tx_ring, first);
1079     if (tso < 0)
1080         goto out_drop;
1081     else if (!tso)
1082         fm10k_tx_csum(tx_ring, first);
1083
1084     fm10k_tx_map(tx_ring, first);
1085
1086     return NETDEV_TX_OK;
1087
1088 out_drop:
1089     dev_kfree_skb_any(first->skb);
1090     first->skb = NULL;
1091
1092     return NETDEV_TX_OK;
1093 }
1094
1095 static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
1096 {
1097     return ring->stats.packets;
1098 }
1099
1100 /**
1101  * fm10k_get_tx_pending - how many Tx descriptors not processed
1102  * @ring: the ring structure
1103  * @in_sw: is tx_pending being checked in SW or in HW?
1104  */
1105 u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw)
1106 {
1107     struct fm10k_intfc *interface = ring->q_vector->interface;
1108     struct fm10k_hw *hw = &interface->hw;
1109     u32 head, tail;
1110
1111     if (likely(in_sw)) {
1112         head = ring->next_to_clean;
1113         tail = ring->next_to_use;
1114     } else {
1115         head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
1116         tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
1117     }
1118
1119     return ((head <= tail) ? tail : tail + ring->count) - head;
1120 }
1121
1122 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
1123 {
1124     u32 tx_done = fm10k_get_tx_completed(tx_ring);
1125     u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
1126     u32 tx_pending = fm10k_get_tx_pending(tx_ring, true);
1127
1128     clear_check_for_tx_hang(tx_ring);
1129
1130     /* Check for a hung queue, but be thorough. This verifies
1131      * that a transmit has been completed since the previous
1132      * check AND there is at least one packet pending. By
1133      * requiring this to fail twice we avoid races with
1134      * clearing the ARMED bit and conditions where we
1135      * run the check_tx_hang logic with a transmit completion
1136      * pending but without time to complete it yet.
1137      */
1138     if (!tx_pending || (tx_done_old != tx_done)) {
1139         /* update completed stats and continue */
1140         tx_ring->tx_stats.tx_done_old = tx_done;
1141         /* reset the countdown */
1142         clear_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state);
1143
1144         return false;
1145     }
1146
1147     /* make sure it is true for two checks in a row */
1148     return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state);
1149 }
1150
1151 /**
1152  * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
1153  * @interface: driver private struct
1154  **/
1155 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
1156 {
1157     /* Do the reset outside of interrupt context */
1158     if (!test_bit(__FM10K_DOWN, interface->state)) {
1159         interface->tx_timeout_count++;
1160         set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
1161         fm10k_service_event_schedule(interface);
1162     }
1163 }
1164
1165 /**
1166  * fm10k_clean_tx_irq - Reclaim resources after transmit completes
1167  * @q_vector: structure containing interrupt and ring information
1168  * @tx_ring: tx ring to clean
1169  * @napi_budget: Used to determine if we are in netpoll
1170  **/
1171 static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
1172                    struct fm10k_ring *tx_ring, int napi_budget)
1173 {
1174     struct fm10k_intfc *interface = q_vector->interface;
1175     struct fm10k_tx_buffer *tx_buffer;
1176     struct fm10k_tx_desc *tx_desc;
1177     unsigned int total_bytes = 0, total_packets = 0;
1178     unsigned int budget = q_vector->tx.work_limit;
1179     unsigned int i = tx_ring->next_to_clean;
1180
1181     if (test_bit(__FM10K_DOWN, interface->state))
1182         return true;
1183
1184     tx_buffer = &tx_ring->tx_buffer[i];
1185     tx_desc = FM10K_TX_DESC(tx_ring, i);
1186     i -= tx_ring->count;
1187
1188     do {
1189         struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
1190
1191         /* if next_to_watch is not set then there is no work pending */
1192         if (!eop_desc)
1193             break;
1194
1195         /* prevent any other reads prior to eop_desc */
1196         smp_rmb();
1197
1198         /* if DD is not set pending work has not been completed */
1199         if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
1200             break;
1201
1202         /* clear next_to_watch to prevent false hangs */
1203         tx_buffer->next_to_watch = NULL;
1204
1205         /* update the statistics for this packet */
1206         total_bytes += tx_buffer->bytecount;
1207         total_packets += tx_buffer->gso_segs;
1208
1209         /* free the skb */
1210         napi_consume_skb(tx_buffer->skb, napi_budget);
1211
1212         /* unmap skb header data */
1213         dma_unmap_single(tx_ring->dev,
1214                  dma_unmap_addr(tx_buffer, dma),
1215                  dma_unmap_len(tx_buffer, len),
1216                  DMA_TO_DEVICE);
1217
1218         /* clear tx_buffer data */
1219         tx_buffer->skb = NULL;
1220         dma_unmap_len_set(tx_buffer, len, 0);
1221
1222         /* unmap remaining buffers */
1223         while (tx_desc != eop_desc) {
1224             tx_buffer++;
1225             tx_desc++;
1226             i++;
1227             if (unlikely(!i)) {
1228                 i -= tx_ring->count;
1229                 tx_buffer = tx_ring->tx_buffer;
1230                 tx_desc = FM10K_TX_DESC(tx_ring, 0);
1231             }
1232
1233             /* unmap any remaining paged data */
1234             if (dma_unmap_len(tx_buffer, len)) {
1235                 dma_unmap_page(tx_ring->dev,
1236                            dma_unmap_addr(tx_buffer, dma),
1237                            dma_unmap_len(tx_buffer, len),
1238                            DMA_TO_DEVICE);
1239                 dma_unmap_len_set(tx_buffer, len, 0);
1240             }
1241         }
1242
1243         /* move us one more past the eop_desc for start of next pkt */
1244         tx_buffer++;
1245         tx_desc++;
1246         i++;
1247         if (unlikely(!i)) {
1248             i -= tx_ring->count;
1249             tx_buffer = tx_ring->tx_buffer;
1250             tx_desc = FM10K_TX_DESC(tx_ring, 0);
1251         }
1252
1253         /* issue prefetch for next Tx descriptor */
1254         prefetch(tx_desc);
1255
1256         /* update budget accounting */
1257         budget--;
1258     } while (likely(budget));
1259
1260     i += tx_ring->count;
1261     tx_ring->next_to_clean = i;
1262     u64_stats_update_begin(&tx_ring->syncp);
1263     tx_ring->stats.bytes += total_bytes;
1264     tx_ring->stats.packets += total_packets;
1265     u64_stats_update_end(&tx_ring->syncp);
1266     q_vector->tx.total_bytes += total_bytes;
1267     q_vector->tx.total_packets += total_packets;
1268
1269     if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
1270         /* schedule immediate reset if we believe we hung */
1271         struct fm10k_hw *hw = &interface->hw;
1272
1273         netif_err(interface, drv, tx_ring->netdev,
1274               "Detected Tx Unit Hang\n"
1275               "  Tx Queue             <%d>\n"
1276               "  TDH, TDT             <%x>, <%x>\n"
1277               "  next_to_use          <%x>\n"
1278               "  next_to_clean        <%x>\n",
1279               tx_ring->queue_index,
1280               fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
1281               fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
1282               tx_ring->next_to_use, i);
1283
1284         netif_stop_subqueue(tx_ring->netdev,
1285                     tx_ring->queue_index);
1286
1287         netif_info(interface, probe, tx_ring->netdev,
1288                "tx hang %d detected on queue %d, resetting interface\n",
1289                interface->tx_timeout_count + 1,
1290                tx_ring->queue_index);
1291
1292         fm10k_tx_timeout_reset(interface);
1293
1294         /* the netdev is about to reset, no point in enabling stuff */
1295         return true;
1296     }
1297
1298     /* notify netdev of completed buffers */
1299     netdev_tx_completed_queue(txring_txq(tx_ring),
1300                   total_packets, total_bytes);
1301
1302 #define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
1303     if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
1304              (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
1305         /* Make sure that anybody stopping the queue after this
1306          * sees the new next_to_clean.
1307          */
1308         smp_mb();
1309         if (__netif_subqueue_stopped(tx_ring->netdev,
1310                          tx_ring->queue_index) &&
1311             !test_bit(__FM10K_DOWN, interface->state)) {
1312             netif_wake_subqueue(tx_ring->netdev,
1313                         tx_ring->queue_index);
1314             ++tx_ring->tx_stats.restart_queue;
1315         }
1316     }
1317
1318     return !!budget;
1319 }
1320
1321 /**
1322  * fm10k_update_itr - update the dynamic ITR value based on packet size
1323  *
1324  *      Stores a new ITR value based on strictly on packet size.  The
1325  *      divisors and thresholds used by this function were determined based
1326  *      on theoretical maximum wire speed and testing data, in order to
1327  *      minimize response time while increasing bulk throughput.
1328  *
1329  * @ring_container: Container for rings to have ITR updated
1330  **/
1331 static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
1332 {
1333     unsigned int avg_wire_size, packets, itr_round;
1334
1335     /* Only update ITR if we are using adaptive setting */
1336     if (!ITR_IS_ADAPTIVE(ring_container->itr))
1337         goto clear_counts;
1338
1339     packets = ring_container->total_packets;
1340     if (!packets)
1341         goto clear_counts;
1342
1343     avg_wire_size = ring_container->total_bytes / packets;
1344
1345     /* The following is a crude approximation of:
1346      *  wmem_default / (size + overhead) = desired_pkts_per_int
1347      *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
1348      *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
1349      *
1350      * Assuming wmem_default is 212992 and overhead is 640 bytes per
1351      * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
1352      * formula down to
1353      *
1354      *  (34 * (size + 24)) / (size + 640) = ITR
1355      *
1356      * We first do some math on the packet size and then finally bitshift
1357      * by 8 after rounding up. We also have to account for PCIe link speed
1358      * difference as ITR scales based on this.
1359      */
1360     if (avg_wire_size <= 360) {
1361         /* Start at 250K ints/sec and gradually drop to 77K ints/sec */
1362         avg_wire_size *= 8;
1363         avg_wire_size += 376;
1364     } else if (avg_wire_size <= 1152) {
1365         /* 77K ints/sec to 45K ints/sec */
1366         avg_wire_size *= 3;
1367         avg_wire_size += 2176;
1368     } else if (avg_wire_size <= 1920) {
1369         /* 45K ints/sec to 38K ints/sec */
1370         avg_wire_size += 4480;
1371     } else {
1372         /* plateau at a limit of 38K ints/sec */
1373         avg_wire_size = 6656;
1374     }
1375
1376     /* Perform final bitshift for division after rounding up to ensure
1377      * that the calculation will never get below a 1. The bit shift
1378      * accounts for changes in the ITR due to PCIe link speed.
1379      */
1380     itr_round = READ_ONCE(ring_container->itr_scale) + 8;
1381     avg_wire_size += BIT(itr_round) - 1;
1382     avg_wire_size >>= itr_round;
1383
1384     /* write back value and retain adaptive flag */
1385     ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE;
1386
1387 clear_counts:
1388     ring_container->total_bytes = 0;
1389     ring_container->total_packets = 0;
1390 }
1391
1392 static void fm10k_qv_enable(struct fm10k_q_vector *q_vector)
1393 {
1394     /* Enable auto-mask and clear the current mask */
1395     u32 itr = FM10K_ITR_ENABLE;
1396
1397     /* Update Tx ITR */
1398     fm10k_update_itr(&q_vector->tx);
1399
1400     /* Update Rx ITR */
1401     fm10k_update_itr(&q_vector->rx);
1402
1403     /* Store Tx itr in timer slot 0 */
1404     itr |= (q_vector->tx.itr & FM10K_ITR_MAX);
1405
1406     /* Shift Rx itr to timer slot 1 */
1407     itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT;
1408
1409     /* Write the final value to the ITR register */
1410     writel(itr, q_vector->itr);
1411 }
1412
1413 static int fm10k_poll(struct napi_struct *napi, int budget)
1414 {
1415     struct fm10k_q_vector *q_vector =
1416                    container_of(napi, struct fm10k_q_vector, napi);
1417     struct fm10k_ring *ring;
1418     int per_ring_budget, work_done = 0;
1419     bool clean_complete = true;
1420
1421     fm10k_for_each_ring(ring, q_vector->tx) {
1422         if (!fm10k_clean_tx_irq(q_vector, ring, budget))
1423             clean_complete = false;
1424     }
1425
1426     /* Handle case where we are called by netpoll with a budget of 0 */
1427     if (budget <= 0)
1428         return budget;
1429
1430     /* attempt to distribute budget to each queue fairly, but don't
1431      * allow the budget to go below 1 because we'll exit polling
1432      */
1433     if (q_vector->rx.count > 1)
1434         per_ring_budget = max(budget / q_vector->rx.count, 1);
1435     else
1436         per_ring_budget = budget;
1437
1438     fm10k_for_each_ring(ring, q_vector->rx) {
1439         int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget);
1440
1441         work_done += work;
1442         if (work >= per_ring_budget)
1443             clean_complete = false;
1444     }
1445
1446     /* If all work not completed, return budget and keep polling */
1447     if (!clean_complete)
1448         return budget;
1449
1450     /* Exit the polling mode, but don't re-enable interrupts if stack might
1451      * poll us due to busy-polling
1452      */
1453     if (likely(napi_complete_done(napi, work_done)))
1454         fm10k_qv_enable(q_vector);
1455
1456     return min(work_done, budget - 1);
1457 }
1458
1459 /**
1460  * fm10k_set_qos_queues: Allocate queues for a QOS-enabled device
1461  * @interface: board private structure to initialize
1462  *
1463  * When QoS (Quality of Service) is enabled, allocate queues for
1464  * each traffic class.  If multiqueue isn't available,then abort QoS
1465  * initialization.
1466  *
1467  * This function handles all combinations of Qos and RSS.
1468  *
1469  **/
1470 static bool fm10k_set_qos_queues(struct fm10k_intfc *interface)
1471 {
1472     struct net_device *dev = interface->netdev;
1473     struct fm10k_ring_feature *f;
1474     int rss_i, i;
1475     int pcs;
1476
1477     /* Map queue offset and counts onto allocated tx queues */
1478     pcs = netdev_get_num_tc(dev);
1479
1480     if (pcs <= 1)
1481         return false;
1482
1483     /* set QoS mask and indices */
1484     f = &interface->ring_feature[RING_F_QOS];
1485     f->indices = pcs;
1486     f->mask = BIT(fls(pcs - 1)) - 1;
1487
1488     /* determine the upper limit for our current DCB mode */
1489     rss_i = interface->hw.mac.max_queues / pcs;
1490     rss_i = BIT(fls(rss_i) - 1);
1491
1492     /* set RSS mask and indices */
1493     f = &interface->ring_feature[RING_F_RSS];
1494     rss_i = min_t(u16, rss_i, f->limit);
1495     f->indices = rss_i;
1496     f->mask = BIT(fls(rss_i - 1)) - 1;
1497
1498     /* configure pause class to queue mapping */
1499     for (i = 0; i < pcs; i++)
1500         netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
1501
1502     interface->num_rx_queues = rss_i * pcs;
1503     interface->num_tx_queues = rss_i * pcs;
1504
1505     return true;
1506 }
1507
1508 /**
1509  * fm10k_set_rss_queues: Allocate queues for RSS
1510  * @interface: board private structure to initialize
1511  *
1512  * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
1513  * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
1514  *
1515  **/
1516 static bool fm10k_set_rss_queues(struct fm10k_intfc *interface)
1517 {
1518     struct fm10k_ring_feature *f;
1519     u16 rss_i;
1520
1521     f = &interface->ring_feature[RING_F_RSS];
1522     rss_i = min_t(u16, interface->hw.mac.max_queues, f->limit);
1523
1524     /* record indices and power of 2 mask for RSS */
1525     f->indices = rss_i;
1526     f->mask = BIT(fls(rss_i - 1)) - 1;
1527
1528     interface->num_rx_queues = rss_i;
1529     interface->num_tx_queues = rss_i;
1530
1531     return true;
1532 }
1533
1534 /**
1535  * fm10k_set_num_queues: Allocate queues for device, feature dependent
1536  * @interface: board private structure to initialize
1537  *
1538  * This is the top level queue allocation routine.  The order here is very
1539  * important, starting with the "most" number of features turned on at once,
1540  * and ending with the smallest set of features.  This way large combinations
1541  * can be allocated if they're turned on, and smaller combinations are the
1542  * fall through conditions.
1543  *
1544  **/
1545 static void fm10k_set_num_queues(struct fm10k_intfc *interface)
1546 {
1547     /* Attempt to setup QoS and RSS first */
1548     if (fm10k_set_qos_queues(interface))
1549         return;
1550
1551     /* If we don't have QoS, just fallback to only RSS. */
1552     fm10k_set_rss_queues(interface);
1553 }
1554
1555 /**
1556  * fm10k_reset_num_queues - Reset the number of queues to zero
1557  * @interface: board private structure
1558  *
1559  * This function should be called whenever we need to reset the number of
1560  * queues after an error condition.
1561  */
1562 static void fm10k_reset_num_queues(struct fm10k_intfc *interface)
1563 {
1564     interface->num_tx_queues = 0;
1565     interface->num_rx_queues = 0;
1566     interface->num_q_vectors = 0;
1567 }
1568
1569 /**
1570  * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector
1571  * @interface: board private structure to initialize
1572  * @v_count: q_vectors allocated on interface, used for ring interleaving
1573  * @v_idx: index of vector in interface struct
1574  * @txr_count: total number of Tx rings to allocate
1575  * @txr_idx: index of first Tx ring to allocate
1576  * @rxr_count: total number of Rx rings to allocate
1577  * @rxr_idx: index of first Rx ring to allocate
1578  *
1579  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
1580  **/
1581 static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
1582                 unsigned int v_count, unsigned int v_idx,
1583                 unsigned int txr_count, unsigned int txr_idx,
1584                 unsigned int rxr_count, unsigned int rxr_idx)
1585 {
1586     struct fm10k_q_vector *q_vector;
1587     struct fm10k_ring *ring;
1588     int ring_count;
1589
1590     ring_count = txr_count + rxr_count;
1591
1592     /* allocate q_vector and rings */
1593     q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL);
1594     if (!q_vector)
1595         return -ENOMEM;
1596
1597     /* initialize NAPI */
1598     netif_napi_add(interface->netdev, &q_vector->napi,
1599                fm10k_poll, NAPI_POLL_WEIGHT);
1600
1601     /* tie q_vector and interface together */
1602     interface->q_vector[v_idx] = q_vector;
1603     q_vector->interface = interface;
1604     q_vector->v_idx = v_idx;
1605
1606     /* initialize pointer to rings */
1607     ring = q_vector->ring;
1608
1609     /* save Tx ring container info */
1610     q_vector->tx.ring = ring;
1611     q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
1612     q_vector->tx.itr = interface->tx_itr;
1613     q_vector->tx.itr_scale = interface->hw.mac.itr_scale;
1614     q_vector->tx.count = txr_count;
1615
1616     while (txr_count) {
1617         /* assign generic ring traits */
1618         ring->dev = &interface->pdev->dev;
1619         ring->netdev = interface->netdev;
1620
1621         /* configure backlink on ring */
1622         ring->q_vector = q_vector;
1623
1624         /* apply Tx specific ring traits */
1625         ring->count = interface->tx_ring_count;
1626         ring->queue_index = txr_idx;
1627
1628         /* assign ring to interface */
1629         interface->tx_ring[txr_idx] = ring;
1630
1631         /* update count and index */
1632         txr_count--;
1633         txr_idx += v_count;
1634
1635         /* push pointer to next ring */
1636         ring++;
1637     }
1638
1639     /* save Rx ring container info */
1640     q_vector->rx.ring = ring;
1641     q_vector->rx.itr = interface->rx_itr;
1642     q_vector->rx.itr_scale = interface->hw.mac.itr_scale;
1643     q_vector->rx.count = rxr_count;
1644
1645     while (rxr_count) {
1646         /* assign generic ring traits */
1647         ring->dev = &interface->pdev->dev;
1648         ring->netdev = interface->netdev;
1649         rcu_assign_pointer(ring->l2_accel, interface->l2_accel);
1650
1651         /* configure backlink on ring */
1652         ring->q_vector = q_vector;
1653
1654         /* apply Rx specific ring traits */
1655         ring->count = interface->rx_ring_count;
1656         ring->queue_index = rxr_idx;
1657
1658         /* assign ring to interface */
1659         interface->rx_ring[rxr_idx] = ring;
1660
1661         /* update count and index */
1662         rxr_count--;
1663         rxr_idx += v_count;
1664
1665         /* push pointer to next ring */
1666         ring++;
1667     }
1668
1669     fm10k_dbg_q_vector_init(q_vector);
1670
1671     return 0;
1672 }
1673
1674 /**
1675  * fm10k_free_q_vector - Free memory allocated for specific interrupt vector
1676  * @interface: board private structure to initialize
1677  * @v_idx: Index of vector to be freed
1678  *
1679  * This function frees the memory allocated to the q_vector.  In addition if
1680  * NAPI is enabled it will delete any references to the NAPI struct prior
1681  * to freeing the q_vector.
1682  **/
1683 static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx)
1684 {
1685     struct fm10k_q_vector *q_vector = interface->q_vector[v_idx];
1686     struct fm10k_ring *ring;
1687
1688     fm10k_dbg_q_vector_exit(q_vector);
1689
1690     fm10k_for_each_ring(ring, q_vector->tx)
1691         interface->tx_ring[ring->queue_index] = NULL;
1692
1693     fm10k_for_each_ring(ring, q_vector->rx)
1694         interface->rx_ring[ring->queue_index] = NULL;
1695
1696     interface->q_vector[v_idx] = NULL;
1697     netif_napi_del(&q_vector->napi);
1698     kfree_rcu(q_vector, rcu);
1699 }
1700
1701 /**
1702  * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors
1703  * @interface: board private structure to initialize
1704  *
1705  * We allocate one q_vector per queue interrupt.  If allocation fails we
1706  * return -ENOMEM.
1707  **/
1708 static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface)
1709 {
1710     unsigned int q_vectors = interface->num_q_vectors;
1711     unsigned int rxr_remaining = interface->num_rx_queues;
1712     unsigned int txr_remaining = interface->num_tx_queues;
1713     unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1714     int err;
1715
1716     if (q_vectors >= (rxr_remaining + txr_remaining)) {
1717         for (; rxr_remaining; v_idx++) {
1718             err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1719                            0, 0, 1, rxr_idx);
1720             if (err)
1721                 goto err_out;
1722
1723             /* update counts and index */
1724             rxr_remaining--;
1725             rxr_idx++;
1726         }
1727     }
1728
1729     for (; v_idx < q_vectors; v_idx++) {
1730         int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1731         int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1732
1733         err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1734                        tqpv, txr_idx,
1735                        rqpv, rxr_idx);
1736
1737         if (err)
1738             goto err_out;
1739
1740         /* update counts and index */
1741         rxr_remaining -= rqpv;
1742         txr_remaining -= tqpv;
1743         rxr_idx++;
1744         txr_idx++;
1745     }
1746
1747     return 0;
1748
1749 err_out:
1750     fm10k_reset_num_queues(interface);
1751
1752     while (v_idx--)
1753         fm10k_free_q_vector(interface, v_idx);
1754
1755     return -ENOMEM;
1756 }
1757
1758 /**
1759  * fm10k_free_q_vectors - Free memory allocated for interrupt vectors
1760  * @interface: board private structure to initialize
1761  *
1762  * This function frees the memory allocated to the q_vectors.  In addition if
1763  * NAPI is enabled it will delete any references to the NAPI struct prior
1764  * to freeing the q_vector.
1765  **/
1766 static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
1767 {
1768     int v_idx = interface->num_q_vectors;
1769
1770     fm10k_reset_num_queues(interface);
1771
1772     while (v_idx--)
1773         fm10k_free_q_vector(interface, v_idx);
1774 }
1775
1776 /**
1777  * fm10k_reset_msix_capability - reset MSI-X capability
1778  * @interface: board private structure to initialize
1779  *
1780  * Reset the MSI-X capability back to its starting state
1781  **/
1782 static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
1783 {
1784     pci_disable_msix(interface->pdev);
1785     kfree(interface->msix_entries);
1786     interface->msix_entries = NULL;
1787 }
1788
1789 /**
1790  * fm10k_init_msix_capability - configure MSI-X capability
1791  * @interface: board private structure to initialize
1792  *
1793  * Attempt to configure the interrupts using the best available
1794  * capabilities of the hardware and the kernel.
1795  **/
1796 static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
1797 {
1798     struct fm10k_hw *hw = &interface->hw;
1799     int v_budget, vector;
1800
1801     /* It's easy to be greedy for MSI-X vectors, but it really
1802      * doesn't do us much good if we have a lot more vectors
1803      * than CPU's.  So let's be conservative and only ask for
1804      * (roughly) the same number of vectors as there are CPU's.
1805      * the default is to use pairs of vectors
1806      */
1807     v_budget = max(interface->num_rx_queues, interface->num_tx_queues);
1808     v_budget = min_t(u16, v_budget, num_online_cpus());
1809
1810     /* account for vectors not related to queues */
1811     v_budget += NON_Q_VECTORS;
1812
1813     /* At the same time, hardware can only support a maximum of
1814      * hw.mac->max_msix_vectors vectors.  With features
1815      * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
1816      * descriptor queues supported by our device.  Thus, we cap it off in
1817      * those rare cases where the cpu count also exceeds our vector limit.
1818      */
1819     v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
1820
1821     /* A failure in MSI-X entry allocation is fatal. */
1822     interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
1823                       GFP_KERNEL);
1824     if (!interface->msix_entries)
1825         return -ENOMEM;
1826
1827     /* populate entry values */
1828     for (vector = 0; vector < v_budget; vector++)
1829         interface->msix_entries[vector].entry = vector;
1830
1831     /* Attempt to enable MSI-X with requested value */
1832     v_budget = pci_enable_msix_range(interface->pdev,
1833                      interface->msix_entries,
1834                      MIN_MSIX_COUNT(hw),
1835                      v_budget);
1836     if (v_budget < 0) {
1837         kfree(interface->msix_entries);
1838         interface->msix_entries = NULL;
1839         return v_budget;
1840     }
1841
1842     /* record the number of queues available for q_vectors */
1843     interface->num_q_vectors = v_budget - NON_Q_VECTORS;
1844
1845     return 0;
1846 }
1847
1848 /**
1849  * fm10k_cache_ring_qos - Descriptor ring to register mapping for QoS
1850  * @interface: Interface structure continaining rings and devices
1851  *
1852  * Cache the descriptor ring offsets for Qos
1853  **/
1854 static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface)
1855 {
1856     struct net_device *dev = interface->netdev;
1857     int pc, offset, rss_i, i;
1858     u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + 1;
1859     u8 num_pcs = netdev_get_num_tc(dev);
1860
1861     if (num_pcs <= 1)
1862         return false;
1863
1864     rss_i = interface->ring_feature[RING_F_RSS].indices;
1865
1866     for (pc = 0, offset = 0; pc < num_pcs; pc++, offset += rss_i) {
1867         int q_idx = pc;
1868
1869         for (i = 0; i < rss_i; i++) {
1870             interface->tx_ring[offset + i]->reg_idx = q_idx;
1871             interface->tx_ring[offset + i]->qos_pc = pc;
1872             interface->rx_ring[offset + i]->reg_idx = q_idx;
1873             interface->rx_ring[offset + i]->qos_pc = pc;
1874             q_idx += pc_stride;
1875         }
1876     }
1877
1878     return true;
1879 }
1880
1881 /**
1882  * fm10k_cache_ring_rss - Descriptor ring to register mapping for RSS
1883  * @interface: Interface structure continaining rings and devices
1884  *
1885  * Cache the descriptor ring offsets for RSS
1886  **/
1887 static void fm10k_cache_ring_rss(struct fm10k_intfc *interface)
1888 {
1889     int i;
1890
1891     for (i = 0; i < interface->num_rx_queues; i++)
1892         interface->rx_ring[i]->reg_idx = i;
1893
1894     for (i = 0; i < interface->num_tx_queues; i++)
1895         interface->tx_ring[i]->reg_idx = i;
1896 }
1897
1898 /**
1899  * fm10k_assign_rings - Map rings to network devices
1900  * @interface: Interface structure containing rings and devices
1901  *
1902  * This function is meant to go though and configure both the network
1903  * devices so that they contain rings, and configure the rings so that
1904  * they function with their network devices.
1905  **/
1906 static void fm10k_assign_rings(struct fm10k_intfc *interface)
1907 {
1908     if (fm10k_cache_ring_qos(interface))
1909         return;
1910
1911     fm10k_cache_ring_rss(interface);
1912 }
1913
1914 static void fm10k_init_reta(struct fm10k_intfc *interface)
1915 {
1916     u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
1917     u32 reta;
1918
1919     /* If the Rx flow indirection table has been configured manually, we
1920      * need to maintain it when possible.
1921      */
1922     if (netif_is_rxfh_configured(interface->netdev)) {
1923         for (i = FM10K_RETA_SIZE; i--;) {
1924             reta = interface->reta[i];
1925             if ((((reta << 24) >> 24) < rss_i) &&
1926                 (((reta << 16) >> 24) < rss_i) &&
1927                 (((reta <<  8) >> 24) < rss_i) &&
1928                 (((reta)       >> 24) < rss_i))
1929                 continue;
1930
1931             /* this should never happen */
1932             dev_err(&interface->pdev->dev,
1933                 "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n");
1934             goto repopulate_reta;
1935         }
1936
1937         /* do nothing if all of the elements are in bounds */
1938         return;
1939     }
1940
1941 repopulate_reta:
1942     fm10k_write_reta(interface, NULL);
1943 }
1944
1945 /**
1946  * fm10k_init_queueing_scheme - Determine proper queueing scheme
1947  * @interface: board private structure to initialize
1948  *
1949  * We determine which queueing scheme to use based on...
1950  * - Hardware queue count (num_*_queues)
1951  *   - defined by miscellaneous hardware support/features (RSS, etc.)
1952  **/
1953 int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
1954 {
1955     int err;
1956
1957     /* Number of supported queues */
1958     fm10k_set_num_queues(interface);
1959
1960     /* Configure MSI-X capability */
1961     err = fm10k_init_msix_capability(interface);
1962     if (err) {
1963         dev_err(&interface->pdev->dev,
1964             "Unable to initialize MSI-X capability\n");
1965         goto err_init_msix;
1966     }
1967
1968     /* Allocate memory for queues */
1969     err = fm10k_alloc_q_vectors(interface);
1970     if (err) {
1971         dev_err(&interface->pdev->dev,
1972             "Unable to allocate queue vectors\n");
1973         goto err_alloc_q_vectors;
1974     }
1975
1976     /* Map rings to devices, and map devices to physical queues */
1977     fm10k_assign_rings(interface);
1978
1979     /* Initialize RSS redirection table */
1980     fm10k_init_reta(interface);
1981
1982     return 0;
1983
1984 err_alloc_q_vectors:
1985     fm10k_reset_msix_capability(interface);
1986 err_init_msix:
1987     fm10k_reset_num_queues(interface);
1988     return err;
1989 }
1990
1991 /**
1992  * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings
1993  * @interface: board private structure to clear queueing scheme on
1994  *
1995  * We go through and clear queueing specific resources and reset the structure
1996  * to pre-load conditions
1997  **/
1998 void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface)
1999 {
2000     fm10k_free_q_vectors(interface);
2001     fm10k_reset_msix_capability(interface);
2002 }