Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /****************************************************************************
0003  * Driver for Solarflare network controllers and boards
0004  * Copyright 2018 Solarflare Communications Inc.
0005  *
0006  * This program is free software; you can redistribute it and/or modify it
0007  * under the terms of the GNU General Public License version 2 as published
0008  * by the Free Software Foundation, incorporated herein by reference.
0009  */
0010 
0011 #include "net_driver.h"
0012 #include <linux/module.h>
0013 #include <linux/iommu.h>
0014 #include "efx.h"
0015 #include "nic.h"
0016 #include "rx_common.h"
0017 
0018 /* This is the percentage fill level below which new RX descriptors
0019  * will be added to the RX descriptor ring.
0020  */
0021 static unsigned int rx_refill_threshold;
0022 module_param(rx_refill_threshold, uint, 0444);
0023 MODULE_PARM_DESC(rx_refill_threshold,
0024          "RX descriptor ring refill threshold (%)");
0025 
0026 /* RX maximum head room required.
0027  *
0028  * This must be at least 1 to prevent overflow, plus one packet-worth
0029  * to allow pipelined receives.
0030  */
0031 #define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
0032 
0033 static void efx_unmap_rx_buffer(struct efx_nic *efx,
0034                 struct efx_rx_buffer *rx_buf);
0035 
0036 /* Check the RX page recycle ring for a page that can be reused. */
0037 static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
0038 {
0039     struct efx_nic *efx = rx_queue->efx;
0040     struct efx_rx_page_state *state;
0041     unsigned int index;
0042     struct page *page;
0043 
0044     if (unlikely(!rx_queue->page_ring))
0045         return NULL;
0046     index = rx_queue->page_remove & rx_queue->page_ptr_mask;
0047     page = rx_queue->page_ring[index];
0048     if (page == NULL)
0049         return NULL;
0050 
0051     rx_queue->page_ring[index] = NULL;
0052     /* page_remove cannot exceed page_add. */
0053     if (rx_queue->page_remove != rx_queue->page_add)
0054         ++rx_queue->page_remove;
0055 
0056     /* If page_count is 1 then we hold the only reference to this page. */
0057     if (page_count(page) == 1) {
0058         ++rx_queue->page_recycle_count;
0059         return page;
0060     } else {
0061         state = page_address(page);
0062         dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
0063                    PAGE_SIZE << efx->rx_buffer_order,
0064                    DMA_FROM_DEVICE);
0065         put_page(page);
0066         ++rx_queue->page_recycle_failed;
0067     }
0068 
0069     return NULL;
0070 }
0071 
0072 /* Attempt to recycle the page if there is an RX recycle ring; the page can
0073  * only be added if this is the final RX buffer, to prevent pages being used in
0074  * the descriptor ring and appearing in the recycle ring simultaneously.
0075  */
0076 static void efx_recycle_rx_page(struct efx_channel *channel,
0077                 struct efx_rx_buffer *rx_buf)
0078 {
0079     struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
0080     struct efx_nic *efx = rx_queue->efx;
0081     struct page *page = rx_buf->page;
0082     unsigned int index;
0083 
0084     /* Only recycle the page after processing the final buffer. */
0085     if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
0086         return;
0087 
0088     index = rx_queue->page_add & rx_queue->page_ptr_mask;
0089     if (rx_queue->page_ring[index] == NULL) {
0090         unsigned int read_index = rx_queue->page_remove &
0091             rx_queue->page_ptr_mask;
0092 
0093         /* The next slot in the recycle ring is available, but
0094          * increment page_remove if the read pointer currently
0095          * points here.
0096          */
0097         if (read_index == index)
0098             ++rx_queue->page_remove;
0099         rx_queue->page_ring[index] = page;
0100         ++rx_queue->page_add;
0101         return;
0102     }
0103     ++rx_queue->page_recycle_full;
0104     efx_unmap_rx_buffer(efx, rx_buf);
0105     put_page(rx_buf->page);
0106 }
0107 
0108 /* Recycle the pages that are used by buffers that have just been received. */
0109 void efx_siena_recycle_rx_pages(struct efx_channel *channel,
0110                 struct efx_rx_buffer *rx_buf,
0111                 unsigned int n_frags)
0112 {
0113     struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
0114 
0115     if (unlikely(!rx_queue->page_ring))
0116         return;
0117 
0118     do {
0119         efx_recycle_rx_page(channel, rx_buf);
0120         rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
0121     } while (--n_frags);
0122 }
0123 
0124 void efx_siena_discard_rx_packet(struct efx_channel *channel,
0125                  struct efx_rx_buffer *rx_buf,
0126                  unsigned int n_frags)
0127 {
0128     struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
0129 
0130     efx_siena_recycle_rx_pages(channel, rx_buf, n_frags);
0131 
0132     efx_siena_free_rx_buffers(rx_queue, rx_buf, n_frags);
0133 }
0134 
0135 static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
0136 {
0137     unsigned int bufs_in_recycle_ring, page_ring_size;
0138     struct efx_nic *efx = rx_queue->efx;
0139 
0140     bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
0141     page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
0142                         efx->rx_bufs_per_page);
0143     rx_queue->page_ring = kcalloc(page_ring_size,
0144                       sizeof(*rx_queue->page_ring), GFP_KERNEL);
0145     if (!rx_queue->page_ring)
0146         rx_queue->page_ptr_mask = 0;
0147     else
0148         rx_queue->page_ptr_mask = page_ring_size - 1;
0149 }
0150 
0151 static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
0152 {
0153     struct efx_nic *efx = rx_queue->efx;
0154     int i;
0155 
0156     if (unlikely(!rx_queue->page_ring))
0157         return;
0158 
0159     /* Unmap and release the pages in the recycle ring. Remove the ring. */
0160     for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
0161         struct page *page = rx_queue->page_ring[i];
0162         struct efx_rx_page_state *state;
0163 
0164         if (page == NULL)
0165             continue;
0166 
0167         state = page_address(page);
0168         dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
0169                    PAGE_SIZE << efx->rx_buffer_order,
0170                    DMA_FROM_DEVICE);
0171         put_page(page);
0172     }
0173     kfree(rx_queue->page_ring);
0174     rx_queue->page_ring = NULL;
0175 }
0176 
0177 static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
0178                    struct efx_rx_buffer *rx_buf)
0179 {
0180     /* Release the page reference we hold for the buffer. */
0181     if (rx_buf->page)
0182         put_page(rx_buf->page);
0183 
0184     /* If this is the last buffer in a page, unmap and free it. */
0185     if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
0186         efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
0187         efx_siena_free_rx_buffers(rx_queue, rx_buf, 1);
0188     }
0189     rx_buf->page = NULL;
0190 }
0191 
0192 int efx_siena_probe_rx_queue(struct efx_rx_queue *rx_queue)
0193 {
0194     struct efx_nic *efx = rx_queue->efx;
0195     unsigned int entries;
0196     int rc;
0197 
0198     /* Create the smallest power-of-two aligned ring */
0199     entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
0200     EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
0201     rx_queue->ptr_mask = entries - 1;
0202 
0203     netif_dbg(efx, probe, efx->net_dev,
0204           "creating RX queue %d size %#x mask %#x\n",
0205           efx_rx_queue_index(rx_queue), efx->rxq_entries,
0206           rx_queue->ptr_mask);
0207 
0208     /* Allocate RX buffers */
0209     rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
0210                    GFP_KERNEL);
0211     if (!rx_queue->buffer)
0212         return -ENOMEM;
0213 
0214     rc = efx_nic_probe_rx(rx_queue);
0215     if (rc) {
0216         kfree(rx_queue->buffer);
0217         rx_queue->buffer = NULL;
0218     }
0219 
0220     return rc;
0221 }
0222 
0223 void efx_siena_init_rx_queue(struct efx_rx_queue *rx_queue)
0224 {
0225     unsigned int max_fill, trigger, max_trigger;
0226     struct efx_nic *efx = rx_queue->efx;
0227     int rc = 0;
0228 
0229     netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
0230           "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
0231 
0232     /* Initialise ptr fields */
0233     rx_queue->added_count = 0;
0234     rx_queue->notified_count = 0;
0235     rx_queue->removed_count = 0;
0236     rx_queue->min_fill = -1U;
0237     efx_init_rx_recycle_ring(rx_queue);
0238 
0239     rx_queue->page_remove = 0;
0240     rx_queue->page_add = rx_queue->page_ptr_mask + 1;
0241     rx_queue->page_recycle_count = 0;
0242     rx_queue->page_recycle_failed = 0;
0243     rx_queue->page_recycle_full = 0;
0244 
0245     /* Initialise limit fields */
0246     max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
0247     max_trigger =
0248         max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
0249     if (rx_refill_threshold != 0) {
0250         trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
0251         if (trigger > max_trigger)
0252             trigger = max_trigger;
0253     } else {
0254         trigger = max_trigger;
0255     }
0256 
0257     rx_queue->max_fill = max_fill;
0258     rx_queue->fast_fill_trigger = trigger;
0259     rx_queue->refill_enabled = true;
0260 
0261     /* Initialise XDP queue information */
0262     rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
0263                   rx_queue->core_index, 0);
0264 
0265     if (rc) {
0266         netif_err(efx, rx_err, efx->net_dev,
0267               "Failure to initialise XDP queue information rc=%d\n",
0268               rc);
0269         efx->xdp_rxq_info_failed = true;
0270     } else {
0271         rx_queue->xdp_rxq_info_valid = true;
0272     }
0273 
0274     /* Set up RX descriptor ring */
0275     efx_nic_init_rx(rx_queue);
0276 }
0277 
0278 void efx_siena_fini_rx_queue(struct efx_rx_queue *rx_queue)
0279 {
0280     struct efx_rx_buffer *rx_buf;
0281     int i;
0282 
0283     netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
0284           "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
0285 
0286     del_timer_sync(&rx_queue->slow_fill);
0287 
0288     /* Release RX buffers from the current read ptr to the write ptr */
0289     if (rx_queue->buffer) {
0290         for (i = rx_queue->removed_count; i < rx_queue->added_count;
0291              i++) {
0292             unsigned int index = i & rx_queue->ptr_mask;
0293 
0294             rx_buf = efx_rx_buffer(rx_queue, index);
0295             efx_fini_rx_buffer(rx_queue, rx_buf);
0296         }
0297     }
0298 
0299     efx_fini_rx_recycle_ring(rx_queue);
0300 
0301     if (rx_queue->xdp_rxq_info_valid)
0302         xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
0303 
0304     rx_queue->xdp_rxq_info_valid = false;
0305 }
0306 
0307 void efx_siena_remove_rx_queue(struct efx_rx_queue *rx_queue)
0308 {
0309     netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
0310           "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
0311 
0312     efx_nic_remove_rx(rx_queue);
0313 
0314     kfree(rx_queue->buffer);
0315     rx_queue->buffer = NULL;
0316 }
0317 
0318 /* Unmap a DMA-mapped page.  This function is only called for the final RX
0319  * buffer in a page.
0320  */
0321 static void efx_unmap_rx_buffer(struct efx_nic *efx,
0322                 struct efx_rx_buffer *rx_buf)
0323 {
0324     struct page *page = rx_buf->page;
0325 
0326     if (page) {
0327         struct efx_rx_page_state *state = page_address(page);
0328 
0329         dma_unmap_page(&efx->pci_dev->dev,
0330                    state->dma_addr,
0331                    PAGE_SIZE << efx->rx_buffer_order,
0332                    DMA_FROM_DEVICE);
0333     }
0334 }
0335 
0336 void efx_siena_free_rx_buffers(struct efx_rx_queue *rx_queue,
0337                    struct efx_rx_buffer *rx_buf,
0338                    unsigned int num_bufs)
0339 {
0340     do {
0341         if (rx_buf->page) {
0342             put_page(rx_buf->page);
0343             rx_buf->page = NULL;
0344         }
0345         rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
0346     } while (--num_bufs);
0347 }
0348 
0349 void efx_siena_rx_slow_fill(struct timer_list *t)
0350 {
0351     struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
0352 
0353     /* Post an event to cause NAPI to run and refill the queue */
0354     efx_nic_generate_fill_event(rx_queue);
0355     ++rx_queue->slow_fill_count;
0356 }
0357 
0358 static void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
0359 {
0360     mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
0361 }
0362 
0363 /* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
0364  *
0365  * @rx_queue:       Efx RX queue
0366  *
0367  * This allocates a batch of pages, maps them for DMA, and populates
0368  * struct efx_rx_buffers for each one. Return a negative error code or
0369  * 0 on success. If a single page can be used for multiple buffers,
0370  * then the page will either be inserted fully, or not at all.
0371  */
0372 static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
0373 {
0374     unsigned int page_offset, index, count;
0375     struct efx_nic *efx = rx_queue->efx;
0376     struct efx_rx_page_state *state;
0377     struct efx_rx_buffer *rx_buf;
0378     dma_addr_t dma_addr;
0379     struct page *page;
0380 
0381     count = 0;
0382     do {
0383         page = efx_reuse_page(rx_queue);
0384         if (page == NULL) {
0385             page = alloc_pages(__GFP_COMP |
0386                        (atomic ? GFP_ATOMIC : GFP_KERNEL),
0387                        efx->rx_buffer_order);
0388             if (unlikely(page == NULL))
0389                 return -ENOMEM;
0390             dma_addr =
0391                 dma_map_page(&efx->pci_dev->dev, page, 0,
0392                          PAGE_SIZE << efx->rx_buffer_order,
0393                          DMA_FROM_DEVICE);
0394             if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
0395                                dma_addr))) {
0396                 __free_pages(page, efx->rx_buffer_order);
0397                 return -EIO;
0398             }
0399             state = page_address(page);
0400             state->dma_addr = dma_addr;
0401         } else {
0402             state = page_address(page);
0403             dma_addr = state->dma_addr;
0404         }
0405 
0406         dma_addr += sizeof(struct efx_rx_page_state);
0407         page_offset = sizeof(struct efx_rx_page_state);
0408 
0409         do {
0410             index = rx_queue->added_count & rx_queue->ptr_mask;
0411             rx_buf = efx_rx_buffer(rx_queue, index);
0412             rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
0413                        EFX_XDP_HEADROOM;
0414             rx_buf->page = page;
0415             rx_buf->page_offset = page_offset + efx->rx_ip_align +
0416                           EFX_XDP_HEADROOM;
0417             rx_buf->len = efx->rx_dma_len;
0418             rx_buf->flags = 0;
0419             ++rx_queue->added_count;
0420             get_page(page);
0421             dma_addr += efx->rx_page_buf_step;
0422             page_offset += efx->rx_page_buf_step;
0423         } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
0424 
0425         rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
0426     } while (++count < efx->rx_pages_per_batch);
0427 
0428     return 0;
0429 }
0430 
0431 void efx_siena_rx_config_page_split(struct efx_nic *efx)
0432 {
0433     efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
0434                       EFX_XDP_HEADROOM + EFX_XDP_TAILROOM,
0435                       EFX_RX_BUF_ALIGNMENT);
0436     efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
0437         ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
0438         efx->rx_page_buf_step);
0439     efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
0440         efx->rx_bufs_per_page;
0441     efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
0442                            efx->rx_bufs_per_page);
0443 }
0444 
0445 /* efx_siena_fast_push_rx_descriptors - push new RX descriptors quickly
0446  * @rx_queue:       RX descriptor queue
0447  *
0448  * This will aim to fill the RX descriptor queue up to
0449  * @rx_queue->@max_fill. If there is insufficient atomic
0450  * memory to do so, a slow fill will be scheduled.
0451  *
0452  * The caller must provide serialisation (none is used here). In practise,
0453  * this means this function must run from the NAPI handler, or be called
0454  * when NAPI is disabled.
0455  */
0456 void efx_siena_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue,
0457                     bool atomic)
0458 {
0459     struct efx_nic *efx = rx_queue->efx;
0460     unsigned int fill_level, batch_size;
0461     int space, rc = 0;
0462 
0463     if (!rx_queue->refill_enabled)
0464         return;
0465 
0466     /* Calculate current fill level, and exit if we don't need to fill */
0467     fill_level = (rx_queue->added_count - rx_queue->removed_count);
0468     EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
0469     if (fill_level >= rx_queue->fast_fill_trigger)
0470         goto out;
0471 
0472     /* Record minimum fill level */
0473     if (unlikely(fill_level < rx_queue->min_fill)) {
0474         if (fill_level)
0475             rx_queue->min_fill = fill_level;
0476     }
0477 
0478     batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
0479     space = rx_queue->max_fill - fill_level;
0480     EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
0481 
0482     netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
0483            "RX queue %d fast-filling descriptor ring from"
0484            " level %d to level %d\n",
0485            efx_rx_queue_index(rx_queue), fill_level,
0486            rx_queue->max_fill);
0487 
0488     do {
0489         rc = efx_init_rx_buffers(rx_queue, atomic);
0490         if (unlikely(rc)) {
0491             /* Ensure that we don't leave the rx queue empty */
0492             efx_schedule_slow_fill(rx_queue);
0493             goto out;
0494         }
0495     } while ((space -= batch_size) >= batch_size);
0496 
0497     netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
0498            "RX queue %d fast-filled descriptor ring "
0499            "to level %d\n", efx_rx_queue_index(rx_queue),
0500            rx_queue->added_count - rx_queue->removed_count);
0501 
0502  out:
0503     if (rx_queue->notified_count != rx_queue->added_count)
0504         efx_nic_notify_rx_desc(rx_queue);
0505 }
0506 
0507 /* Pass a received packet up through GRO.  GRO can handle pages
0508  * regardless of checksum state and skbs with a good checksum.
0509  */
0510 void
0511 efx_siena_rx_packet_gro(struct efx_channel *channel,
0512             struct efx_rx_buffer *rx_buf,
0513             unsigned int n_frags, u8 *eh, __wsum csum)
0514 {
0515     struct napi_struct *napi = &channel->napi_str;
0516     struct efx_nic *efx = channel->efx;
0517     struct sk_buff *skb;
0518 
0519     skb = napi_get_frags(napi);
0520     if (unlikely(!skb)) {
0521         struct efx_rx_queue *rx_queue;
0522 
0523         rx_queue = efx_channel_get_rx_queue(channel);
0524         efx_siena_free_rx_buffers(rx_queue, rx_buf, n_frags);
0525         return;
0526     }
0527 
0528     if (efx->net_dev->features & NETIF_F_RXHASH)
0529         skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
0530                  PKT_HASH_TYPE_L3);
0531     if (csum) {
0532         skb->csum = csum;
0533         skb->ip_summed = CHECKSUM_COMPLETE;
0534     } else {
0535         skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
0536                   CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
0537     }
0538     skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
0539 
0540     for (;;) {
0541         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
0542                    rx_buf->page, rx_buf->page_offset,
0543                    rx_buf->len);
0544         rx_buf->page = NULL;
0545         skb->len += rx_buf->len;
0546         if (skb_shinfo(skb)->nr_frags == n_frags)
0547             break;
0548 
0549         rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
0550     }
0551 
0552     skb->data_len = skb->len;
0553     skb->truesize += n_frags * efx->rx_buffer_truesize;
0554 
0555     skb_record_rx_queue(skb, channel->rx_queue.core_index);
0556 
0557     napi_gro_frags(napi);
0558 }
0559 
0560 /* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
0561  * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
0562  */
0563 struct efx_rss_context *efx_siena_alloc_rss_context_entry(struct efx_nic *efx)
0564 {
0565     struct list_head *head = &efx->rss_context.list;
0566     struct efx_rss_context *ctx, *new;
0567     u32 id = 1; /* Don't use zero, that refers to the master RSS context */
0568 
0569     WARN_ON(!mutex_is_locked(&efx->rss_lock));
0570 
0571     /* Search for first gap in the numbering */
0572     list_for_each_entry(ctx, head, list) {
0573         if (ctx->user_id != id)
0574             break;
0575         id++;
0576         /* Check for wrap.  If this happens, we have nearly 2^32
0577          * allocated RSS contexts, which seems unlikely.
0578          */
0579         if (WARN_ON_ONCE(!id))
0580             return NULL;
0581     }
0582 
0583     /* Create the new entry */
0584     new = kmalloc(sizeof(*new), GFP_KERNEL);
0585     if (!new)
0586         return NULL;
0587     new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
0588     new->rx_hash_udp_4tuple = false;
0589 
0590     /* Insert the new entry into the gap */
0591     new->user_id = id;
0592     list_add_tail(&new->list, &ctx->list);
0593     return new;
0594 }
0595 
0596 struct efx_rss_context *efx_siena_find_rss_context_entry(struct efx_nic *efx,
0597                              u32 id)
0598 {
0599     struct list_head *head = &efx->rss_context.list;
0600     struct efx_rss_context *ctx;
0601 
0602     WARN_ON(!mutex_is_locked(&efx->rss_lock));
0603 
0604     list_for_each_entry(ctx, head, list)
0605         if (ctx->user_id == id)
0606             return ctx;
0607     return NULL;
0608 }
0609 
0610 void efx_siena_free_rss_context_entry(struct efx_rss_context *ctx)
0611 {
0612     list_del(&ctx->list);
0613     kfree(ctx);
0614 }
0615 
0616 void efx_siena_set_default_rx_indir_table(struct efx_nic *efx,
0617                       struct efx_rss_context *ctx)
0618 {
0619     size_t i;
0620 
0621     for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
0622         ctx->rx_indir_table[i] =
0623             ethtool_rxfh_indir_default(i, efx->rss_spread);
0624 }
0625 
0626 /**
0627  * efx_siena_filter_is_mc_recipient - test whether spec is a multicast recipient
0628  * @spec: Specification to test
0629  *
0630  * Return: %true if the specification is a non-drop RX filter that
0631  * matches a local MAC address I/G bit value of 1 or matches a local
0632  * IPv4 or IPv6 address value in the respective multicast address
0633  * range.  Otherwise %false.
0634  */
0635 bool efx_siena_filter_is_mc_recipient(const struct efx_filter_spec *spec)
0636 {
0637     if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
0638         spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
0639         return false;
0640 
0641     if (spec->match_flags &
0642         (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
0643         is_multicast_ether_addr(spec->loc_mac))
0644         return true;
0645 
0646     if ((spec->match_flags &
0647          (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
0648         (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
0649         if (spec->ether_type == htons(ETH_P_IP) &&
0650             ipv4_is_multicast(spec->loc_host[0]))
0651             return true;
0652         if (spec->ether_type == htons(ETH_P_IPV6) &&
0653             ((const u8 *)spec->loc_host)[0] == 0xff)
0654             return true;
0655     }
0656 
0657     return false;
0658 }
0659 
0660 bool efx_siena_filter_spec_equal(const struct efx_filter_spec *left,
0661                  const struct efx_filter_spec *right)
0662 {
0663     if ((left->match_flags ^ right->match_flags) |
0664         ((left->flags ^ right->flags) &
0665          (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
0666         return false;
0667 
0668     return memcmp(&left->outer_vid, &right->outer_vid,
0669               sizeof(struct efx_filter_spec) -
0670               offsetof(struct efx_filter_spec, outer_vid)) == 0;
0671 }
0672 
0673 u32 efx_siena_filter_spec_hash(const struct efx_filter_spec *spec)
0674 {
0675     BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
0676     return jhash2((const u32 *)&spec->outer_vid,
0677               (sizeof(struct efx_filter_spec) -
0678                offsetof(struct efx_filter_spec, outer_vid)) / 4,
0679               0);
0680 }
0681 
0682 #ifdef CONFIG_RFS_ACCEL
0683 bool efx_siena_rps_check_rule(struct efx_arfs_rule *rule,
0684                   unsigned int filter_idx, bool *force)
0685 {
0686     if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
0687         /* ARFS is currently updating this entry, leave it */
0688         return false;
0689     }
0690     if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
0691         /* ARFS tried and failed to update this, so it's probably out
0692          * of date.  Remove the filter and the ARFS rule entry.
0693          */
0694         rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
0695         *force = true;
0696         return true;
0697     } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
0698         /* ARFS has moved on, so old filter is not needed.  Since we did
0699          * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
0700          * not be removed by efx_siena_rps_hash_del() subsequently.
0701          */
0702         *force = true;
0703         return true;
0704     }
0705     /* Remove it iff ARFS wants to. */
0706     return true;
0707 }
0708 
0709 static
0710 struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
0711                        const struct efx_filter_spec *spec)
0712 {
0713     u32 hash = efx_siena_filter_spec_hash(spec);
0714 
0715     lockdep_assert_held(&efx->rps_hash_lock);
0716     if (!efx->rps_hash_table)
0717         return NULL;
0718     return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
0719 }
0720 
0721 struct efx_arfs_rule *efx_siena_rps_hash_find(struct efx_nic *efx,
0722                     const struct efx_filter_spec *spec)
0723 {
0724     struct efx_arfs_rule *rule;
0725     struct hlist_head *head;
0726     struct hlist_node *node;
0727 
0728     head = efx_rps_hash_bucket(efx, spec);
0729     if (!head)
0730         return NULL;
0731     hlist_for_each(node, head) {
0732         rule = container_of(node, struct efx_arfs_rule, node);
0733         if (efx_siena_filter_spec_equal(spec, &rule->spec))
0734             return rule;
0735     }
0736     return NULL;
0737 }
0738 
0739 static struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
0740                     const struct efx_filter_spec *spec,
0741                     bool *new)
0742 {
0743     struct efx_arfs_rule *rule;
0744     struct hlist_head *head;
0745     struct hlist_node *node;
0746 
0747     head = efx_rps_hash_bucket(efx, spec);
0748     if (!head)
0749         return NULL;
0750     hlist_for_each(node, head) {
0751         rule = container_of(node, struct efx_arfs_rule, node);
0752         if (efx_siena_filter_spec_equal(spec, &rule->spec)) {
0753             *new = false;
0754             return rule;
0755         }
0756     }
0757     rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
0758     *new = true;
0759     if (rule) {
0760         memcpy(&rule->spec, spec, sizeof(rule->spec));
0761         hlist_add_head(&rule->node, head);
0762     }
0763     return rule;
0764 }
0765 
0766 void efx_siena_rps_hash_del(struct efx_nic *efx,
0767                 const struct efx_filter_spec *spec)
0768 {
0769     struct efx_arfs_rule *rule;
0770     struct hlist_head *head;
0771     struct hlist_node *node;
0772 
0773     head = efx_rps_hash_bucket(efx, spec);
0774     if (WARN_ON(!head))
0775         return;
0776     hlist_for_each(node, head) {
0777         rule = container_of(node, struct efx_arfs_rule, node);
0778         if (efx_siena_filter_spec_equal(spec, &rule->spec)) {
0779             /* Someone already reused the entry.  We know that if
0780              * this check doesn't fire (i.e. filter_id == REMOVING)
0781              * then the REMOVING mark was put there by our caller,
0782              * because caller is holding a lock on filter table and
0783              * only holders of that lock set REMOVING.
0784              */
0785             if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
0786                 return;
0787             hlist_del(node);
0788             kfree(rule);
0789             return;
0790         }
0791     }
0792     /* We didn't find it. */
0793     WARN_ON(1);
0794 }
0795 #endif
0796 
0797 int efx_siena_probe_filters(struct efx_nic *efx)
0798 {
0799     int rc;
0800 
0801     mutex_lock(&efx->mac_lock);
0802     down_write(&efx->filter_sem);
0803     rc = efx->type->filter_table_probe(efx);
0804     if (rc)
0805         goto out_unlock;
0806 
0807 #ifdef CONFIG_RFS_ACCEL
0808     if (efx->type->offload_features & NETIF_F_NTUPLE) {
0809         struct efx_channel *channel;
0810         int i, success = 1;
0811 
0812         efx_for_each_channel(channel, efx) {
0813             channel->rps_flow_id =
0814                 kcalloc(efx->type->max_rx_ip_filters,
0815                     sizeof(*channel->rps_flow_id),
0816                     GFP_KERNEL);
0817             if (!channel->rps_flow_id)
0818                 success = 0;
0819             else
0820                 for (i = 0;
0821                      i < efx->type->max_rx_ip_filters;
0822                      ++i)
0823                     channel->rps_flow_id[i] =
0824                         RPS_FLOW_ID_INVALID;
0825             channel->rfs_expire_index = 0;
0826             channel->rfs_filter_count = 0;
0827         }
0828 
0829         if (!success) {
0830             efx_for_each_channel(channel, efx)
0831                 kfree(channel->rps_flow_id);
0832             efx->type->filter_table_remove(efx);
0833             rc = -ENOMEM;
0834             goto out_unlock;
0835         }
0836     }
0837 #endif
0838 out_unlock:
0839     up_write(&efx->filter_sem);
0840     mutex_unlock(&efx->mac_lock);
0841     return rc;
0842 }
0843 
0844 void efx_siena_remove_filters(struct efx_nic *efx)
0845 {
0846 #ifdef CONFIG_RFS_ACCEL
0847     struct efx_channel *channel;
0848 
0849     efx_for_each_channel(channel, efx) {
0850         cancel_delayed_work_sync(&channel->filter_work);
0851         kfree(channel->rps_flow_id);
0852         channel->rps_flow_id = NULL;
0853     }
0854 #endif
0855     down_write(&efx->filter_sem);
0856     efx->type->filter_table_remove(efx);
0857     up_write(&efx->filter_sem);
0858 }
0859 
0860 #ifdef CONFIG_RFS_ACCEL
0861 
0862 static void efx_filter_rfs_work(struct work_struct *data)
0863 {
0864     struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
0865                                   work);
0866     struct efx_nic *efx = netdev_priv(req->net_dev);
0867     struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
0868     int slot_idx = req - efx->rps_slot;
0869     struct efx_arfs_rule *rule;
0870     u16 arfs_id = 0;
0871     int rc;
0872 
0873     rc = efx->type->filter_insert(efx, &req->spec, true);
0874     if (rc >= 0)
0875         /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
0876         rc %= efx->type->max_rx_ip_filters;
0877     if (efx->rps_hash_table) {
0878         spin_lock_bh(&efx->rps_hash_lock);
0879         rule = efx_siena_rps_hash_find(efx, &req->spec);
0880         /* The rule might have already gone, if someone else's request
0881          * for the same spec was already worked and then expired before
0882          * we got around to our work.  In that case we have nothing
0883          * tying us to an arfs_id, meaning that as soon as the filter
0884          * is considered for expiry it will be removed.
0885          */
0886         if (rule) {
0887             if (rc < 0)
0888                 rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
0889             else
0890                 rule->filter_id = rc;
0891             arfs_id = rule->arfs_id;
0892         }
0893         spin_unlock_bh(&efx->rps_hash_lock);
0894     }
0895     if (rc >= 0) {
0896         /* Remember this so we can check whether to expire the filter
0897          * later.
0898          */
0899         mutex_lock(&efx->rps_mutex);
0900         if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
0901             channel->rfs_filter_count++;
0902         channel->rps_flow_id[rc] = req->flow_id;
0903         mutex_unlock(&efx->rps_mutex);
0904 
0905         if (req->spec.ether_type == htons(ETH_P_IP))
0906             netif_info(efx, rx_status, efx->net_dev,
0907                    "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
0908                    (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
0909                    req->spec.rem_host, ntohs(req->spec.rem_port),
0910                    req->spec.loc_host, ntohs(req->spec.loc_port),
0911                    req->rxq_index, req->flow_id, rc, arfs_id);
0912         else
0913             netif_info(efx, rx_status, efx->net_dev,
0914                    "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
0915                    (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
0916                    req->spec.rem_host, ntohs(req->spec.rem_port),
0917                    req->spec.loc_host, ntohs(req->spec.loc_port),
0918                    req->rxq_index, req->flow_id, rc, arfs_id);
0919         channel->n_rfs_succeeded++;
0920     } else {
0921         if (req->spec.ether_type == htons(ETH_P_IP))
0922             netif_dbg(efx, rx_status, efx->net_dev,
0923                   "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
0924                   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
0925                   req->spec.rem_host, ntohs(req->spec.rem_port),
0926                   req->spec.loc_host, ntohs(req->spec.loc_port),
0927                   req->rxq_index, req->flow_id, rc, arfs_id);
0928         else
0929             netif_dbg(efx, rx_status, efx->net_dev,
0930                   "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
0931                   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
0932                   req->spec.rem_host, ntohs(req->spec.rem_port),
0933                   req->spec.loc_host, ntohs(req->spec.loc_port),
0934                   req->rxq_index, req->flow_id, rc, arfs_id);
0935         channel->n_rfs_failed++;
0936         /* We're overloading the NIC's filter tables, so let's do a
0937          * chunk of extra expiry work.
0938          */
0939         __efx_siena_filter_rfs_expire(channel,
0940                           min(channel->rfs_filter_count,
0941                           100u));
0942     }
0943 
0944     /* Release references */
0945     clear_bit(slot_idx, &efx->rps_slot_map);
0946     dev_put(req->net_dev);
0947 }
0948 
0949 int efx_siena_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
0950              u16 rxq_index, u32 flow_id)
0951 {
0952     struct efx_nic *efx = netdev_priv(net_dev);
0953     struct efx_async_filter_insertion *req;
0954     struct efx_arfs_rule *rule;
0955     struct flow_keys fk;
0956     int slot_idx;
0957     bool new;
0958     int rc;
0959 
0960     /* find a free slot */
0961     for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
0962         if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
0963             break;
0964     if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
0965         return -EBUSY;
0966 
0967     if (flow_id == RPS_FLOW_ID_INVALID) {
0968         rc = -EINVAL;
0969         goto out_clear;
0970     }
0971 
0972     if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
0973         rc = -EPROTONOSUPPORT;
0974         goto out_clear;
0975     }
0976 
0977     if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
0978         rc = -EPROTONOSUPPORT;
0979         goto out_clear;
0980     }
0981     if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
0982         rc = -EPROTONOSUPPORT;
0983         goto out_clear;
0984     }
0985 
0986     req = efx->rps_slot + slot_idx;
0987     efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
0988                efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
0989                rxq_index);
0990     req->spec.match_flags =
0991         EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
0992         EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
0993         EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
0994     req->spec.ether_type = fk.basic.n_proto;
0995     req->spec.ip_proto = fk.basic.ip_proto;
0996 
0997     if (fk.basic.n_proto == htons(ETH_P_IP)) {
0998         req->spec.rem_host[0] = fk.addrs.v4addrs.src;
0999         req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
1000     } else {
1001         memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
1002                sizeof(struct in6_addr));
1003         memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
1004                sizeof(struct in6_addr));
1005     }
1006 
1007     req->spec.rem_port = fk.ports.src;
1008     req->spec.loc_port = fk.ports.dst;
1009 
1010     if (efx->rps_hash_table) {
1011         /* Add it to ARFS hash table */
1012         spin_lock(&efx->rps_hash_lock);
1013         rule = efx_rps_hash_add(efx, &req->spec, &new);
1014         if (!rule) {
1015             rc = -ENOMEM;
1016             goto out_unlock;
1017         }
1018         if (new)
1019             rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
1020         rc = rule->arfs_id;
1021         /* Skip if existing or pending filter already does the right thing */
1022         if (!new && rule->rxq_index == rxq_index &&
1023             rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
1024             goto out_unlock;
1025         rule->rxq_index = rxq_index;
1026         rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
1027         spin_unlock(&efx->rps_hash_lock);
1028     } else {
1029         /* Without an ARFS hash table, we just use arfs_id 0 for all
1030          * filters.  This means if multiple flows hash to the same
1031          * flow_id, all but the most recently touched will be eligible
1032          * for expiry.
1033          */
1034         rc = 0;
1035     }
1036 
1037     /* Queue the request */
1038     dev_hold(req->net_dev = net_dev);
1039     INIT_WORK(&req->work, efx_filter_rfs_work);
1040     req->rxq_index = rxq_index;
1041     req->flow_id = flow_id;
1042     schedule_work(&req->work);
1043     return rc;
1044 out_unlock:
1045     spin_unlock(&efx->rps_hash_lock);
1046 out_clear:
1047     clear_bit(slot_idx, &efx->rps_slot_map);
1048     return rc;
1049 }
1050 
1051 bool __efx_siena_filter_rfs_expire(struct efx_channel *channel,
1052                    unsigned int quota)
1053 {
1054     bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
1055     struct efx_nic *efx = channel->efx;
1056     unsigned int index, size, start;
1057     u32 flow_id;
1058 
1059     if (!mutex_trylock(&efx->rps_mutex))
1060         return false;
1061     expire_one = efx->type->filter_rfs_expire_one;
1062     index = channel->rfs_expire_index;
1063     start = index;
1064     size = efx->type->max_rx_ip_filters;
1065     while (quota) {
1066         flow_id = channel->rps_flow_id[index];
1067 
1068         if (flow_id != RPS_FLOW_ID_INVALID) {
1069             quota--;
1070             if (expire_one(efx, flow_id, index)) {
1071                 netif_info(efx, rx_status, efx->net_dev,
1072                        "expired filter %d [channel %u flow %u]\n",
1073                        index, channel->channel, flow_id);
1074                 channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
1075                 channel->rfs_filter_count--;
1076             }
1077         }
1078         if (++index == size)
1079             index = 0;
1080         /* If we were called with a quota that exceeds the total number
1081          * of filters in the table (which shouldn't happen, but could
1082          * if two callers race), ensure that we don't loop forever -
1083          * stop when we've examined every row of the table.
1084          */
1085         if (index == start)
1086             break;
1087     }
1088 
1089     channel->rfs_expire_index = index;
1090     mutex_unlock(&efx->rps_mutex);
1091     return true;
1092 }
1093 
1094 #endif /* CONFIG_RFS_ACCEL */