Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
0002 
0003 #include <linux/bpf_trace.h>
0004 #include <linux/dma-mapping.h>
0005 #include <linux/etherdevice.h>
0006 #include <linux/filter.h>
0007 #include <linux/irq.h>
0008 #include <linux/pci.h>
0009 #include <linux/skbuff.h>
0010 #include "funeth_txrx.h"
0011 #include "funeth.h"
0012 #include "fun_queue.h"
0013 
0014 #define CREATE_TRACE_POINTS
0015 #include "funeth_trace.h"
0016 
0017 /* Given the device's max supported MTU and pages of at least 4KB a packet can
0018  * be scattered into at most 4 buffers.
0019  */
0020 #define RX_MAX_FRAGS 4
0021 
0022 /* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */
0023 #define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
0024 
0025 /* We try to reuse pages for our buffers. To avoid frequent page ref writes we
0026  * take EXTRA_PAGE_REFS references at once and then hand them out one per packet
0027  * occupying the buffer.
0028  */
0029 #define EXTRA_PAGE_REFS 1000000
0030 #define MIN_PAGE_REFS 1000
0031 
0032 enum {
0033     FUN_XDP_FLUSH_REDIR = 1,
0034     FUN_XDP_FLUSH_TX = 2,
0035 };
0036 
0037 /* See if a page is running low on refs we are holding and if so take more. */
0038 static void refresh_refs(struct funeth_rxbuf *buf)
0039 {
0040     if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) {
0041         buf->pg_refs += EXTRA_PAGE_REFS;
0042         page_ref_add(buf->page, EXTRA_PAGE_REFS);
0043     }
0044 }
0045 
0046 /* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its
0047  * page is worth retaining and there's room for it. Otherwise the page is
0048  * unmapped and our references released.
0049  */
0050 static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf)
0051 {
0052     struct funeth_rx_cache *c = &q->cache;
0053 
0054     if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) {
0055         c->bufs[c->prod_cnt & c->mask] = *buf;
0056         c->prod_cnt++;
0057     } else {
0058         dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
0059                      DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
0060         __page_frag_cache_drain(buf->page, buf->pg_refs);
0061     }
0062 }
0063 
0064 /* Get a page from the Rx buffer cache. We only consider the next available
0065  * page and return it if we own all its references.
0066  */
0067 static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb)
0068 {
0069     struct funeth_rx_cache *c = &q->cache;
0070     struct funeth_rxbuf *buf;
0071 
0072     if (c->prod_cnt == c->cons_cnt)
0073         return false;             /* empty cache */
0074 
0075     buf = &c->bufs[c->cons_cnt & c->mask];
0076     if (page_ref_count(buf->page) == buf->pg_refs) {
0077         dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
0078                        PAGE_SIZE, DMA_FROM_DEVICE);
0079         *rb = *buf;
0080         buf->page = NULL;
0081         refresh_refs(rb);
0082         c->cons_cnt++;
0083         return true;
0084     }
0085 
0086     /* Page can't be reused. If the cache is full drop this page. */
0087     if (c->prod_cnt - c->cons_cnt > c->mask) {
0088         dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
0089                      DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
0090         __page_frag_cache_drain(buf->page, buf->pg_refs);
0091         buf->page = NULL;
0092         c->cons_cnt++;
0093     }
0094     return false;
0095 }
0096 
0097 /* Allocate and DMA-map a page for receive. */
0098 static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb,
0099                  int node, gfp_t gfp)
0100 {
0101     struct page *p;
0102 
0103     if (cache_get(q, rb))
0104         return 0;
0105 
0106     p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0);
0107     if (unlikely(!p))
0108         return -ENOMEM;
0109 
0110     rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE,
0111                     DMA_FROM_DEVICE);
0112     if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) {
0113         FUN_QSTAT_INC(q, rx_map_err);
0114         __free_page(p);
0115         return -ENOMEM;
0116     }
0117 
0118     FUN_QSTAT_INC(q, rx_page_alloc);
0119 
0120     rb->page = p;
0121     rb->pg_refs = 1;
0122     refresh_refs(rb);
0123     rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p);
0124     return 0;
0125 }
0126 
0127 static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb)
0128 {
0129     if (rb->page) {
0130         dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE,
0131                    DMA_FROM_DEVICE);
0132         __page_frag_cache_drain(rb->page, rb->pg_refs);
0133         rb->page = NULL;
0134     }
0135 }
0136 
0137 /* Run the XDP program assigned to an Rx queue.
0138  * Return %NULL if the buffer is consumed, or the virtual address of the packet
0139  * to turn into an skb.
0140  */
0141 static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
0142              int ref_ok, struct funeth_txq *xdp_q)
0143 {
0144     struct bpf_prog *xdp_prog;
0145     struct xdp_frame *xdpf;
0146     struct xdp_buff xdp;
0147     u32 act;
0148 
0149     /* VA includes the headroom, frag size includes headroom + tailroom */
0150     xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN),
0151               &q->xdp_rxq);
0152     xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) -
0153              (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false);
0154 
0155     xdp_prog = READ_ONCE(q->xdp_prog);
0156     act = bpf_prog_run_xdp(xdp_prog, &xdp);
0157 
0158     switch (act) {
0159     case XDP_PASS:
0160         /* remove headroom, which may not be FUN_XDP_HEADROOM now */
0161         skb_frag_size_set(frags, xdp.data_end - xdp.data);
0162         skb_frag_off_add(frags, xdp.data - xdp.data_hard_start);
0163         goto pass;
0164     case XDP_TX:
0165         if (unlikely(!ref_ok))
0166             goto pass;
0167 
0168         xdpf = xdp_convert_buff_to_frame(&xdp);
0169         if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
0170             goto xdp_error;
0171         FUN_QSTAT_INC(q, xdp_tx);
0172         q->xdp_flush |= FUN_XDP_FLUSH_TX;
0173         break;
0174     case XDP_REDIRECT:
0175         if (unlikely(!ref_ok))
0176             goto pass;
0177         if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog)))
0178             goto xdp_error;
0179         FUN_QSTAT_INC(q, xdp_redir);
0180         q->xdp_flush |= FUN_XDP_FLUSH_REDIR;
0181         break;
0182     default:
0183         bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act);
0184         fallthrough;
0185     case XDP_ABORTED:
0186         trace_xdp_exception(q->netdev, xdp_prog, act);
0187 xdp_error:
0188         q->cur_buf->pg_refs++; /* return frags' page reference */
0189         FUN_QSTAT_INC(q, xdp_err);
0190         break;
0191     case XDP_DROP:
0192         q->cur_buf->pg_refs++;
0193         FUN_QSTAT_INC(q, xdp_drops);
0194         break;
0195     }
0196     return NULL;
0197 
0198 pass:
0199     return xdp.data;
0200 }
0201 
0202 /* A CQE contains a fixed completion structure along with optional metadata and
0203  * even packet data. Given the start address of a CQE return the start of the
0204  * contained fixed structure, which lies at the end.
0205  */
0206 static const void *cqe_to_info(const void *cqe)
0207 {
0208     return cqe + FUNETH_CQE_INFO_OFFSET;
0209 }
0210 
0211 /* The inverse of cqe_to_info(). */
0212 static const void *info_to_cqe(const void *cqe_info)
0213 {
0214     return cqe_info - FUNETH_CQE_INFO_OFFSET;
0215 }
0216 
0217 /* Return the type of hash provided by the device based on the L3 and L4
0218  * protocols it parsed for the packet.
0219  */
0220 static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse)
0221 {
0222     static const enum pkt_hash_types htype_map[] = {
0223         PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
0224         PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4,
0225         PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
0226         PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3
0227     };
0228     u16 key;
0229 
0230     /* Build the key from the TCP/UDP and IP/IPv6 bits */
0231     key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) |
0232           ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1);
0233 
0234     return htype_map[key];
0235 }
0236 
0237 /* Each received packet can be scattered across several Rx buffers or can
0238  * share a buffer with previously received packets depending on the buffer
0239  * and packet sizes and the room available in the most recently used buffer.
0240  *
0241  * The rules are:
0242  * - If the buffer at the head of an RQ has not been used it gets (part of) the
0243  *   next incoming packet.
0244  * - Otherwise, if the packet fully fits in the buffer's remaining space the
0245  *   packet is written there.
0246  * - Otherwise, the packet goes into the next Rx buffer.
0247  *
0248  * This function returns the Rx buffer for a packet or fragment thereof of the
0249  * given length. If it isn't @buf it either recycles or frees that buffer
0250  * before advancing the queue to the next buffer.
0251  *
0252  * If called repeatedly with the remaining length of a packet it will walk
0253  * through all the buffers containing the packet.
0254  */
0255 static struct funeth_rxbuf *
0256 get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len)
0257 {
0258     if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset)
0259         return buf;            /* @buf holds (part of) the packet */
0260 
0261     /* The packet occupies part of the next buffer. Move there after
0262      * replenishing the current buffer slot either with the spare page or
0263      * by reusing the slot's existing page. Note that if a spare page isn't
0264      * available and the current packet occupies @buf it is a multi-frag
0265      * packet that will be dropped leaving @buf available for reuse.
0266      */
0267     if ((page_ref_count(buf->page) == buf->pg_refs &&
0268          buf->node == numa_mem_id()) || !q->spare_buf.page) {
0269         dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
0270                        PAGE_SIZE, DMA_FROM_DEVICE);
0271         refresh_refs(buf);
0272     } else {
0273         cache_offer(q, buf);
0274         *buf = q->spare_buf;
0275         q->spare_buf.page = NULL;
0276         q->rqes[q->rq_cons & q->rq_mask] =
0277             FUN_EPRQ_RQBUF_INIT(buf->dma_addr);
0278     }
0279     q->buf_offset = 0;
0280     q->rq_cons++;
0281     return &q->bufs[q->rq_cons & q->rq_mask];
0282 }
0283 
0284 /* Gather the page fragments making up the first Rx packet on @q. Its total
0285  * length @tot_len includes optional head- and tail-rooms.
0286  *
0287  * Return 0 if the device retains ownership of at least some of the pages.
0288  * In this case the caller may only copy the packet.
0289  *
0290  * A non-zero return value gives the caller permission to use references to the
0291  * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least
0292  * one of the pages is PF_MEMALLOC.
0293  *
0294  * Regardless of outcome the caller is granted a reference to each of the pages.
0295  */
0296 static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
0297               skb_frag_t *frags)
0298 {
0299     struct funeth_rxbuf *buf = q->cur_buf;
0300     unsigned int frag_len;
0301     int ref_ok = 1;
0302 
0303     for (;;) {
0304         buf = get_buf(q, buf, tot_len);
0305 
0306         /* We always keep the RQ full of buffers so before we can give
0307          * one of our pages to the stack we require that we can obtain
0308          * a replacement page. If we can't the packet will either be
0309          * copied or dropped so we can retain ownership of the page and
0310          * reuse it.
0311          */
0312         if (!q->spare_buf.page &&
0313             funeth_alloc_page(q, &q->spare_buf, numa_mem_id(),
0314                       GFP_ATOMIC | __GFP_MEMALLOC))
0315             ref_ok = 0;
0316 
0317         frag_len = min_t(unsigned int, tot_len,
0318                  PAGE_SIZE - q->buf_offset);
0319         dma_sync_single_for_cpu(q->dma_dev,
0320                     buf->dma_addr + q->buf_offset,
0321                     frag_len, DMA_FROM_DEVICE);
0322         buf->pg_refs--;
0323         if (ref_ok)
0324             ref_ok |= buf->node;
0325 
0326         __skb_frag_set_page(frags, buf->page);
0327         skb_frag_off_set(frags, q->buf_offset);
0328         skb_frag_size_set(frags++, frag_len);
0329 
0330         tot_len -= frag_len;
0331         if (!tot_len)
0332             break;
0333 
0334         q->buf_offset = PAGE_SIZE;
0335     }
0336     q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN);
0337     q->cur_buf = buf;
0338     return ref_ok;
0339 }
0340 
0341 static bool rx_hwtstamp_enabled(const struct net_device *dev)
0342 {
0343     const struct funeth_priv *d = netdev_priv(dev);
0344 
0345     return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL;
0346 }
0347 
0348 /* Advance the CQ pointers and phase tag to the next CQE. */
0349 static void advance_cq(struct funeth_rxq *q)
0350 {
0351     if (unlikely(q->cq_head == q->cq_mask)) {
0352         q->cq_head = 0;
0353         q->phase ^= 1;
0354         q->next_cqe_info = cqe_to_info(q->cqes);
0355     } else {
0356         q->cq_head++;
0357         q->next_cqe_info += FUNETH_CQE_SIZE;
0358     }
0359     prefetch(q->next_cqe_info);
0360 }
0361 
0362 /* Process the packet represented by the head CQE of @q. Gather the packet's
0363  * fragments, run it through the optional XDP program, and if needed construct
0364  * an skb and pass it to the stack.
0365  */
0366 static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q)
0367 {
0368     const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info);
0369     unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len);
0370     struct net_device *ndev = q->netdev;
0371     skb_frag_t frags[RX_MAX_FRAGS];
0372     struct skb_shared_info *si;
0373     unsigned int headroom;
0374     gro_result_t gro_res;
0375     struct sk_buff *skb;
0376     int ref_ok;
0377     void *va;
0378     u16 cv;
0379 
0380     u64_stats_update_begin(&q->syncp);
0381     q->stats.rx_pkts++;
0382     q->stats.rx_bytes += pkt_len;
0383     u64_stats_update_end(&q->syncp);
0384 
0385     advance_cq(q);
0386 
0387     /* account for head- and tail-room, present only for 1-buffer packets */
0388     tot_len = pkt_len;
0389     headroom = be16_to_cpu(rxreq->headroom);
0390     if (likely(headroom))
0391         tot_len += FUN_RX_TAILROOM + headroom;
0392 
0393     ref_ok = fun_gather_pkt(q, tot_len, frags);
0394     va = skb_frag_address(frags);
0395     if (xdp_q && headroom == FUN_XDP_HEADROOM) {
0396         va = fun_run_xdp(q, frags, va, ref_ok, xdp_q);
0397         if (!va)
0398             return;
0399         headroom = 0;   /* XDP_PASS trims it */
0400     }
0401     if (unlikely(!ref_ok))
0402         goto no_mem;
0403 
0404     if (likely(headroom)) {
0405         /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */
0406         prefetch(va + headroom);
0407         skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN));
0408         if (unlikely(!skb))
0409             goto no_mem;
0410 
0411         skb_reserve(skb, headroom);
0412         __skb_put(skb, pkt_len);
0413         skb->protocol = eth_type_trans(skb, ndev);
0414     } else {
0415         prefetch(va);
0416         skb = napi_get_frags(q->napi);
0417         if (unlikely(!skb))
0418             goto no_mem;
0419 
0420         if (ref_ok < 0)
0421             skb->pfmemalloc = 1;
0422 
0423         si = skb_shinfo(skb);
0424         si->nr_frags = rxreq->nsgl;
0425         for (i = 0; i < si->nr_frags; i++)
0426             si->frags[i] = frags[i];
0427 
0428         skb->len = pkt_len;
0429         skb->data_len = pkt_len;
0430         skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN);
0431     }
0432 
0433     skb_record_rx_queue(skb, q->qidx);
0434     cv = be16_to_cpu(rxreq->pkt_cv);
0435     if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash))
0436         skb_set_hash(skb, be32_to_cpu(rxreq->hash),
0437                  cqe_to_pkt_hash_type(cv));
0438     if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) {
0439         FUN_QSTAT_INC(q, rx_cso);
0440         skb->ip_summed = CHECKSUM_UNNECESSARY;
0441         skb->csum_level = be16_to_cpu(rxreq->csum) - 1;
0442     }
0443     if (unlikely(rx_hwtstamp_enabled(q->netdev)))
0444         skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp);
0445 
0446     trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv);
0447 
0448     gro_res = skb->data_len ? napi_gro_frags(q->napi) :
0449                   napi_gro_receive(q->napi, skb);
0450     if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE)
0451         FUN_QSTAT_INC(q, gro_merged);
0452     else if (gro_res == GRO_HELD)
0453         FUN_QSTAT_INC(q, gro_pkts);
0454     return;
0455 
0456 no_mem:
0457     FUN_QSTAT_INC(q, rx_mem_drops);
0458 
0459     /* Release the references we've been granted for the frag pages.
0460      * We return the ref of the last frag and free the rest.
0461      */
0462     q->cur_buf->pg_refs++;
0463     for (i = 0; i < rxreq->nsgl - 1; i++)
0464         __free_page(skb_frag_page(frags + i));
0465 }
0466 
0467 /* Return 0 if the phase tag of the CQE at the CQ's head matches expectations
0468  * indicating the CQE is new.
0469  */
0470 static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase)
0471 {
0472     u16 sf_p = be16_to_cpu(ci->sf_p);
0473 
0474     return (sf_p & 1) ^ phase;
0475 }
0476 
0477 /* Walk through a CQ identifying and processing fresh CQEs up to the given
0478  * budget. Return the remaining budget.
0479  */
0480 static int fun_process_cqes(struct funeth_rxq *q, int budget)
0481 {
0482     struct funeth_priv *fp = netdev_priv(q->netdev);
0483     struct funeth_txq **xdpqs, *xdp_q = NULL;
0484 
0485     xdpqs = rcu_dereference_bh(fp->xdpqs);
0486     if (xdpqs)
0487         xdp_q = xdpqs[smp_processor_id()];
0488 
0489     while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) {
0490         /* access other descriptor fields after the phase check */
0491         dma_rmb();
0492 
0493         fun_handle_cqe_pkt(q, xdp_q);
0494         budget--;
0495     }
0496 
0497     if (unlikely(q->xdp_flush)) {
0498         if (q->xdp_flush & FUN_XDP_FLUSH_TX)
0499             fun_txq_wr_db(xdp_q);
0500         if (q->xdp_flush & FUN_XDP_FLUSH_REDIR)
0501             xdp_do_flush();
0502         q->xdp_flush = 0;
0503     }
0504 
0505     return budget;
0506 }
0507 
0508 /* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ
0509  * doorbells as needed.
0510  */
0511 int fun_rxq_napi_poll(struct napi_struct *napi, int budget)
0512 {
0513     struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
0514     struct funeth_rxq *q = irq->rxq;
0515     int work_done = budget - fun_process_cqes(q, budget);
0516     u32 cq_db_val = q->cq_head;
0517 
0518     if (unlikely(work_done >= budget))
0519         FUN_QSTAT_INC(q, rx_budget);
0520     else if (napi_complete_done(napi, work_done))
0521         cq_db_val |= q->irq_db_val;
0522 
0523     /* check whether to post new Rx buffers */
0524     if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) {
0525         u64_stats_update_begin(&q->syncp);
0526         q->stats.rx_bufs += q->rq_cons - q->rq_cons_db;
0527         u64_stats_update_end(&q->syncp);
0528         q->rq_cons_db = q->rq_cons;
0529         writel((q->rq_cons - 1) & q->rq_mask, q->rq_db);
0530     }
0531 
0532     writel(cq_db_val, q->cq_db);
0533     return work_done;
0534 }
0535 
0536 /* Free the Rx buffers of an Rx queue. */
0537 static void fun_rxq_free_bufs(struct funeth_rxq *q)
0538 {
0539     struct funeth_rxbuf *b = q->bufs;
0540     unsigned int i;
0541 
0542     for (i = 0; i <= q->rq_mask; i++, b++)
0543         funeth_free_page(q, b);
0544 
0545     funeth_free_page(q, &q->spare_buf);
0546     q->cur_buf = NULL;
0547 }
0548 
0549 /* Initially provision an Rx queue with Rx buffers. */
0550 static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node)
0551 {
0552     struct funeth_rxbuf *b = q->bufs;
0553     unsigned int i;
0554 
0555     for (i = 0; i <= q->rq_mask; i++, b++) {
0556         if (funeth_alloc_page(q, b, node, GFP_KERNEL)) {
0557             fun_rxq_free_bufs(q);
0558             return -ENOMEM;
0559         }
0560         q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr);
0561     }
0562     q->cur_buf = q->bufs;
0563     return 0;
0564 }
0565 
0566 /* Initialize a used-buffer cache of the given depth. */
0567 static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth,
0568                   int node)
0569 {
0570     c->mask = depth - 1;
0571     c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node);
0572     return c->bufs ? 0 : -ENOMEM;
0573 }
0574 
0575 /* Deallocate an Rx queue's used-buffer cache and its contents. */
0576 static void fun_rxq_free_cache(struct funeth_rxq *q)
0577 {
0578     struct funeth_rxbuf *b = q->cache.bufs;
0579     unsigned int i;
0580 
0581     for (i = 0; i <= q->cache.mask; i++, b++)
0582         funeth_free_page(q, b);
0583 
0584     kvfree(q->cache.bufs);
0585     q->cache.bufs = NULL;
0586 }
0587 
0588 int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog)
0589 {
0590     struct funeth_priv *fp = netdev_priv(q->netdev);
0591     struct fun_admin_epcq_req cmd;
0592     u16 headroom;
0593     int err;
0594 
0595     headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
0596     if (headroom != q->headroom) {
0597         cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
0598                             sizeof(cmd));
0599         cmd.u.modify =
0600             FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY,
0601                                0, q->hw_cqid, headroom);
0602         err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0,
0603                         0);
0604         if (err)
0605             return err;
0606         q->headroom = headroom;
0607     }
0608 
0609     WRITE_ONCE(q->xdp_prog, prog);
0610     return 0;
0611 }
0612 
0613 /* Create an Rx queue, allocating the host memory it needs. */
0614 static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev,
0615                         unsigned int qidx,
0616                         unsigned int ncqe,
0617                         unsigned int nrqe,
0618                         struct fun_irq *irq)
0619 {
0620     struct funeth_priv *fp = netdev_priv(dev);
0621     struct funeth_rxq *q;
0622     int err = -ENOMEM;
0623     int numa_node;
0624 
0625     numa_node = fun_irq_node(irq);
0626     q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
0627     if (!q)
0628         goto err;
0629 
0630     q->qidx = qidx;
0631     q->netdev = dev;
0632     q->cq_mask = ncqe - 1;
0633     q->rq_mask = nrqe - 1;
0634     q->numa_node = numa_node;
0635     q->rq_db_thres = nrqe / 4;
0636     u64_stats_init(&q->syncp);
0637     q->dma_dev = &fp->pdev->dev;
0638 
0639     q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes),
0640                      sizeof(*q->bufs), false, numa_node,
0641                      &q->rq_dma_addr, (void **)&q->bufs, NULL);
0642     if (!q->rqes)
0643         goto free_q;
0644 
0645     q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0,
0646                      false, numa_node, &q->cq_dma_addr, NULL,
0647                      NULL);
0648     if (!q->cqes)
0649         goto free_rqes;
0650 
0651     err = fun_rxq_init_cache(&q->cache, nrqe, numa_node);
0652     if (err)
0653         goto free_cqes;
0654 
0655     err = fun_rxq_alloc_bufs(q, numa_node);
0656     if (err)
0657         goto free_cache;
0658 
0659     q->stats.rx_bufs = q->rq_mask;
0660     q->init_state = FUN_QSTATE_INIT_SW;
0661     return q;
0662 
0663 free_cache:
0664     fun_rxq_free_cache(q);
0665 free_cqes:
0666     dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes,
0667               q->cq_dma_addr);
0668 free_rqes:
0669     fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes,
0670               q->rq_dma_addr, q->bufs);
0671 free_q:
0672     kfree(q);
0673 err:
0674     netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx);
0675     return ERR_PTR(err);
0676 }
0677 
0678 static void fun_rxq_free_sw(struct funeth_rxq *q)
0679 {
0680     struct funeth_priv *fp = netdev_priv(q->netdev);
0681 
0682     fun_rxq_free_cache(q);
0683     fun_rxq_free_bufs(q);
0684     fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false,
0685               q->rqes, q->rq_dma_addr, q->bufs);
0686     dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE,
0687               q->cqes, q->cq_dma_addr);
0688 
0689     /* Before freeing the queue transfer key counters to the device. */
0690     fp->rx_packets += q->stats.rx_pkts;
0691     fp->rx_bytes   += q->stats.rx_bytes;
0692     fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops;
0693 
0694     kfree(q);
0695 }
0696 
0697 /* Create an Rx queue's resources on the device. */
0698 int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq)
0699 {
0700     struct funeth_priv *fp = netdev_priv(q->netdev);
0701     unsigned int ncqe = q->cq_mask + 1;
0702     unsigned int nrqe = q->rq_mask + 1;
0703     int err;
0704 
0705     err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx,
0706                    irq->napi.napi_id);
0707     if (err)
0708         goto out;
0709 
0710     err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED,
0711                      NULL);
0712     if (err)
0713         goto xdp_unreg;
0714 
0715     q->phase = 1;
0716     q->irq_cnt = 0;
0717     q->cq_head = 0;
0718     q->rq_cons = 0;
0719     q->rq_cons_db = 0;
0720     q->buf_offset = 0;
0721     q->napi = &irq->napi;
0722     q->irq_db_val = fp->cq_irq_db;
0723     q->next_cqe_info = cqe_to_info(q->cqes);
0724 
0725     q->xdp_prog = fp->xdp_prog;
0726     q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
0727 
0728     err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
0729                 FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0,
0730                 FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0,
0731                 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT,
0732                 &q->hw_sqid, &q->rq_db);
0733     if (err)
0734         goto xdp_unreg;
0735 
0736     err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
0737                 FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0,
0738                 q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe,
0739                 q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0,
0740                 irq->irq_idx, 0, fp->fdev->kern_end_qid,
0741                 &q->hw_cqid, &q->cq_db);
0742     if (err)
0743         goto free_rq;
0744 
0745     irq->rxq = q;
0746     writel(q->rq_mask, q->rq_db);
0747     q->init_state = FUN_QSTATE_INIT_FULL;
0748 
0749     netif_info(fp, ifup, q->netdev,
0750            "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n",
0751            q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx,
0752            q->numa_node, q->headroom);
0753     return 0;
0754 
0755 free_rq:
0756     fun_destroy_sq(fp->fdev, q->hw_sqid);
0757 xdp_unreg:
0758     xdp_rxq_info_unreg(&q->xdp_rxq);
0759 out:
0760     netdev_err(q->netdev,
0761            "Failed to create Rx queue %u on device, error %d\n",
0762            q->qidx, err);
0763     return err;
0764 }
0765 
0766 static void fun_rxq_free_dev(struct funeth_rxq *q)
0767 {
0768     struct funeth_priv *fp = netdev_priv(q->netdev);
0769     struct fun_irq *irq;
0770 
0771     if (q->init_state < FUN_QSTATE_INIT_FULL)
0772         return;
0773 
0774     irq = container_of(q->napi, struct fun_irq, napi);
0775     netif_info(fp, ifdown, q->netdev,
0776            "Freeing Rx queue %u (id %u/%u), IRQ %u\n",
0777            q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx);
0778 
0779     irq->rxq = NULL;
0780     xdp_rxq_info_unreg(&q->xdp_rxq);
0781     fun_destroy_sq(fp->fdev, q->hw_sqid);
0782     fun_destroy_cq(fp->fdev, q->hw_cqid);
0783     q->init_state = FUN_QSTATE_INIT_SW;
0784 }
0785 
0786 /* Create or advance an Rx queue, allocating all the host and device resources
0787  * needed to reach the target state.
0788  */
0789 int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
0790               unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
0791               int state, struct funeth_rxq **qp)
0792 {
0793     struct funeth_rxq *q = *qp;
0794     int err;
0795 
0796     if (!q) {
0797         q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq);
0798         if (IS_ERR(q))
0799             return PTR_ERR(q);
0800     }
0801 
0802     if (q->init_state >= state)
0803         goto out;
0804 
0805     err = fun_rxq_create_dev(q, irq);
0806     if (err) {
0807         if (!*qp)
0808             fun_rxq_free_sw(q);
0809         return err;
0810     }
0811 
0812 out:
0813     *qp = q;
0814     return 0;
0815 }
0816 
0817 /* Free Rx queue resources until it reaches the target state. */
0818 struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state)
0819 {
0820     if (state < FUN_QSTATE_INIT_FULL)
0821         fun_rxq_free_dev(q);
0822 
0823     if (state == FUN_QSTATE_DESTROYED) {
0824         fun_rxq_free_sw(q);
0825         q = NULL;
0826     }
0827 
0828     return q;
0829 }