0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0033
0034 #include <linux/module.h>
0035 #include <linux/kernel.h>
0036 #include <linux/netdevice.h>
0037 #include <linux/etherdevice.h>
0038 #include <linux/skbuff.h>
0039 #include <linux/ethtool.h>
0040 #include <linux/if_ether.h>
0041 #include <net/tcp.h>
0042 #include <linux/udp.h>
0043 #include <linux/moduleparam.h>
0044 #include <linux/mm.h>
0045 #include <linux/slab.h>
0046 #include <net/ip.h>
0047 #include <linux/bpf.h>
0048 #include <net/page_pool.h>
0049 #include <linux/bpf_trace.h>
0050
0051 #include <xen/xen.h>
0052 #include <xen/xenbus.h>
0053 #include <xen/events.h>
0054 #include <xen/page.h>
0055 #include <xen/platform_pci.h>
0056 #include <xen/grant_table.h>
0057
0058 #include <xen/interface/io/netif.h>
0059 #include <xen/interface/memory.h>
0060 #include <xen/interface/grant_table.h>
0061
0062
0063 #define MAX_QUEUES_DEFAULT 8
0064 static unsigned int xennet_max_queues;
0065 module_param_named(max_queues, xennet_max_queues, uint, 0644);
0066 MODULE_PARM_DESC(max_queues,
0067 "Maximum number of queues per virtual interface");
0068
0069 static bool __read_mostly xennet_trusted = true;
0070 module_param_named(trusted, xennet_trusted, bool, 0644);
0071 MODULE_PARM_DESC(trusted, "Is the backend trusted");
0072
0073 #define XENNET_TIMEOUT (5 * HZ)
0074
0075 static const struct ethtool_ops xennet_ethtool_ops;
0076
0077 struct netfront_cb {
0078 int pull_to;
0079 };
0080
0081 #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
0082
0083 #define RX_COPY_THRESHOLD 256
0084
0085 #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
0086 #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
0087
0088
0089 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
0090
0091
0092 #define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
0093
0094
0095 #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
0096
0097 static DECLARE_WAIT_QUEUE_HEAD(module_wq);
0098
0099 struct netfront_stats {
0100 u64 packets;
0101 u64 bytes;
0102 struct u64_stats_sync syncp;
0103 };
0104
0105 struct netfront_info;
0106
0107 struct netfront_queue {
0108 unsigned int id;
0109 char name[QUEUE_NAME_SIZE];
0110 struct netfront_info *info;
0111
0112 struct bpf_prog __rcu *xdp_prog;
0113
0114 struct napi_struct napi;
0115
0116
0117
0118
0119 unsigned int tx_evtchn, rx_evtchn;
0120 unsigned int tx_irq, rx_irq;
0121
0122 char tx_irq_name[IRQ_NAME_SIZE];
0123 char rx_irq_name[IRQ_NAME_SIZE];
0124
0125 spinlock_t tx_lock;
0126 struct xen_netif_tx_front_ring tx;
0127 int tx_ring_ref;
0128
0129
0130
0131
0132
0133 struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
0134 unsigned short tx_link[NET_TX_RING_SIZE];
0135 #define TX_LINK_NONE 0xffff
0136 #define TX_PENDING 0xfffe
0137 grant_ref_t gref_tx_head;
0138 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
0139 struct page *grant_tx_page[NET_TX_RING_SIZE];
0140 unsigned tx_skb_freelist;
0141 unsigned int tx_pend_queue;
0142
0143 spinlock_t rx_lock ____cacheline_aligned_in_smp;
0144 struct xen_netif_rx_front_ring rx;
0145 int rx_ring_ref;
0146
0147 struct timer_list rx_refill_timer;
0148
0149 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
0150 grant_ref_t gref_rx_head;
0151 grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
0152
0153 unsigned int rx_rsp_unconsumed;
0154 spinlock_t rx_cons_lock;
0155
0156 struct page_pool *page_pool;
0157 struct xdp_rxq_info xdp_rxq;
0158 };
0159
0160 struct netfront_info {
0161 struct list_head list;
0162 struct net_device *netdev;
0163
0164 struct xenbus_device *xbdev;
0165
0166
0167 struct netfront_queue *queues;
0168
0169
0170 struct netfront_stats __percpu *rx_stats;
0171 struct netfront_stats __percpu *tx_stats;
0172
0173
0174 bool netback_has_xdp_headroom;
0175 bool netfront_xdp_enabled;
0176
0177
0178 bool broken;
0179
0180
0181 bool bounce;
0182
0183 atomic_t rx_gso_checksum_fixup;
0184 };
0185
0186 struct netfront_rx_info {
0187 struct xen_netif_rx_response rx;
0188 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
0189 };
0190
0191
0192
0193
0194
0195 static void add_id_to_list(unsigned *head, unsigned short *list,
0196 unsigned short id)
0197 {
0198 list[id] = *head;
0199 *head = id;
0200 }
0201
0202 static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
0203 {
0204 unsigned int id = *head;
0205
0206 if (id != TX_LINK_NONE) {
0207 *head = list[id];
0208 list[id] = TX_LINK_NONE;
0209 }
0210 return id;
0211 }
0212
0213 static int xennet_rxidx(RING_IDX idx)
0214 {
0215 return idx & (NET_RX_RING_SIZE - 1);
0216 }
0217
0218 static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
0219 RING_IDX ri)
0220 {
0221 int i = xennet_rxidx(ri);
0222 struct sk_buff *skb = queue->rx_skbs[i];
0223 queue->rx_skbs[i] = NULL;
0224 return skb;
0225 }
0226
0227 static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
0228 RING_IDX ri)
0229 {
0230 int i = xennet_rxidx(ri);
0231 grant_ref_t ref = queue->grant_rx_ref[i];
0232 queue->grant_rx_ref[i] = INVALID_GRANT_REF;
0233 return ref;
0234 }
0235
0236 #ifdef CONFIG_SYSFS
0237 static const struct attribute_group xennet_dev_group;
0238 #endif
0239
0240 static bool xennet_can_sg(struct net_device *dev)
0241 {
0242 return dev->features & NETIF_F_SG;
0243 }
0244
0245
0246 static void rx_refill_timeout(struct timer_list *t)
0247 {
0248 struct netfront_queue *queue = from_timer(queue, t, rx_refill_timer);
0249 napi_schedule(&queue->napi);
0250 }
0251
0252 static int netfront_tx_slot_available(struct netfront_queue *queue)
0253 {
0254 return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
0255 (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
0256 }
0257
0258 static void xennet_maybe_wake_tx(struct netfront_queue *queue)
0259 {
0260 struct net_device *dev = queue->info->netdev;
0261 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
0262
0263 if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
0264 netfront_tx_slot_available(queue) &&
0265 likely(netif_running(dev)))
0266 netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
0267 }
0268
0269
0270 static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
0271 {
0272 struct sk_buff *skb;
0273 struct page *page;
0274
0275 skb = __netdev_alloc_skb(queue->info->netdev,
0276 RX_COPY_THRESHOLD + NET_IP_ALIGN,
0277 GFP_ATOMIC | __GFP_NOWARN);
0278 if (unlikely(!skb))
0279 return NULL;
0280
0281 page = page_pool_alloc_pages(queue->page_pool,
0282 GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
0283 if (unlikely(!page)) {
0284 kfree_skb(skb);
0285 return NULL;
0286 }
0287 skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
0288
0289
0290 skb_reserve(skb, NET_IP_ALIGN);
0291 skb->dev = queue->info->netdev;
0292
0293 return skb;
0294 }
0295
0296
0297 static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
0298 {
0299 RING_IDX req_prod = queue->rx.req_prod_pvt;
0300 int notify;
0301 int err = 0;
0302
0303 if (unlikely(!netif_carrier_ok(queue->info->netdev)))
0304 return;
0305
0306 for (req_prod = queue->rx.req_prod_pvt;
0307 req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
0308 req_prod++) {
0309 struct sk_buff *skb;
0310 unsigned short id;
0311 grant_ref_t ref;
0312 struct page *page;
0313 struct xen_netif_rx_request *req;
0314
0315 skb = xennet_alloc_one_rx_buffer(queue);
0316 if (!skb) {
0317 err = -ENOMEM;
0318 break;
0319 }
0320
0321 id = xennet_rxidx(req_prod);
0322
0323 BUG_ON(queue->rx_skbs[id]);
0324 queue->rx_skbs[id] = skb;
0325
0326 ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
0327 WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
0328 queue->grant_rx_ref[id] = ref;
0329
0330 page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
0331
0332 req = RING_GET_REQUEST(&queue->rx, req_prod);
0333 gnttab_page_grant_foreign_access_ref_one(ref,
0334 queue->info->xbdev->otherend_id,
0335 page,
0336 0);
0337 req->id = id;
0338 req->gref = ref;
0339 }
0340
0341 queue->rx.req_prod_pvt = req_prod;
0342
0343
0344
0345
0346
0347
0348 if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
0349 unlikely(err)) {
0350 mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
0351 return;
0352 }
0353
0354 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
0355 if (notify)
0356 notify_remote_via_irq(queue->rx_irq);
0357 }
0358
0359 static int xennet_open(struct net_device *dev)
0360 {
0361 struct netfront_info *np = netdev_priv(dev);
0362 unsigned int num_queues = dev->real_num_tx_queues;
0363 unsigned int i = 0;
0364 struct netfront_queue *queue = NULL;
0365
0366 if (!np->queues || np->broken)
0367 return -ENODEV;
0368
0369 for (i = 0; i < num_queues; ++i) {
0370 queue = &np->queues[i];
0371 napi_enable(&queue->napi);
0372
0373 spin_lock_bh(&queue->rx_lock);
0374 if (netif_carrier_ok(dev)) {
0375 xennet_alloc_rx_buffers(queue);
0376 queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
0377 if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
0378 napi_schedule(&queue->napi);
0379 }
0380 spin_unlock_bh(&queue->rx_lock);
0381 }
0382
0383 netif_tx_start_all_queues(dev);
0384
0385 return 0;
0386 }
0387
0388 static bool xennet_tx_buf_gc(struct netfront_queue *queue)
0389 {
0390 RING_IDX cons, prod;
0391 unsigned short id;
0392 struct sk_buff *skb;
0393 bool more_to_do;
0394 bool work_done = false;
0395 const struct device *dev = &queue->info->netdev->dev;
0396
0397 BUG_ON(!netif_carrier_ok(queue->info->netdev));
0398
0399 do {
0400 prod = queue->tx.sring->rsp_prod;
0401 if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
0402 dev_alert(dev, "Illegal number of responses %u\n",
0403 prod - queue->tx.rsp_cons);
0404 goto err;
0405 }
0406 rmb();
0407
0408 for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
0409 struct xen_netif_tx_response txrsp;
0410
0411 work_done = true;
0412
0413 RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
0414 if (txrsp.status == XEN_NETIF_RSP_NULL)
0415 continue;
0416
0417 id = txrsp.id;
0418 if (id >= RING_SIZE(&queue->tx)) {
0419 dev_alert(dev,
0420 "Response has incorrect id (%u)\n",
0421 id);
0422 goto err;
0423 }
0424 if (queue->tx_link[id] != TX_PENDING) {
0425 dev_alert(dev,
0426 "Response for inactive request\n");
0427 goto err;
0428 }
0429
0430 queue->tx_link[id] = TX_LINK_NONE;
0431 skb = queue->tx_skbs[id];
0432 queue->tx_skbs[id] = NULL;
0433 if (unlikely(!gnttab_end_foreign_access_ref(
0434 queue->grant_tx_ref[id]))) {
0435 dev_alert(dev,
0436 "Grant still in use by backend domain\n");
0437 goto err;
0438 }
0439 gnttab_release_grant_reference(
0440 &queue->gref_tx_head, queue->grant_tx_ref[id]);
0441 queue->grant_tx_ref[id] = INVALID_GRANT_REF;
0442 queue->grant_tx_page[id] = NULL;
0443 add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
0444 dev_kfree_skb_irq(skb);
0445 }
0446
0447 queue->tx.rsp_cons = prod;
0448
0449 RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
0450 } while (more_to_do);
0451
0452 xennet_maybe_wake_tx(queue);
0453
0454 return work_done;
0455
0456 err:
0457 queue->info->broken = true;
0458 dev_alert(dev, "Disabled for further use\n");
0459
0460 return work_done;
0461 }
0462
0463 struct xennet_gnttab_make_txreq {
0464 struct netfront_queue *queue;
0465 struct sk_buff *skb;
0466 struct page *page;
0467 struct xen_netif_tx_request *tx;
0468 struct xen_netif_tx_request tx_local;
0469 unsigned int size;
0470 };
0471
0472 static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
0473 unsigned int len, void *data)
0474 {
0475 struct xennet_gnttab_make_txreq *info = data;
0476 unsigned int id;
0477 struct xen_netif_tx_request *tx;
0478 grant_ref_t ref;
0479
0480 struct page *page = info->page;
0481 struct netfront_queue *queue = info->queue;
0482 struct sk_buff *skb = info->skb;
0483
0484 id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
0485 tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
0486 ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
0487 WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
0488
0489 gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
0490 gfn, GNTMAP_readonly);
0491
0492 queue->tx_skbs[id] = skb;
0493 queue->grant_tx_page[id] = page;
0494 queue->grant_tx_ref[id] = ref;
0495
0496 info->tx_local.id = id;
0497 info->tx_local.gref = ref;
0498 info->tx_local.offset = offset;
0499 info->tx_local.size = len;
0500 info->tx_local.flags = 0;
0501
0502 *tx = info->tx_local;
0503
0504
0505
0506
0507
0508 add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
0509
0510 info->tx = tx;
0511 info->size += info->tx_local.size;
0512 }
0513
0514 static struct xen_netif_tx_request *xennet_make_first_txreq(
0515 struct xennet_gnttab_make_txreq *info,
0516 unsigned int offset, unsigned int len)
0517 {
0518 info->size = 0;
0519
0520 gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
0521
0522 return info->tx;
0523 }
0524
0525 static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
0526 unsigned int len, void *data)
0527 {
0528 struct xennet_gnttab_make_txreq *info = data;
0529
0530 info->tx->flags |= XEN_NETTXF_more_data;
0531 skb_get(info->skb);
0532 xennet_tx_setup_grant(gfn, offset, len, data);
0533 }
0534
0535 static void xennet_make_txreqs(
0536 struct xennet_gnttab_make_txreq *info,
0537 struct page *page,
0538 unsigned int offset, unsigned int len)
0539 {
0540
0541 page += offset >> PAGE_SHIFT;
0542 offset &= ~PAGE_MASK;
0543
0544 while (len) {
0545 info->page = page;
0546 info->size = 0;
0547
0548 gnttab_foreach_grant_in_range(page, offset, len,
0549 xennet_make_one_txreq,
0550 info);
0551
0552 page++;
0553 offset = 0;
0554 len -= info->size;
0555 }
0556 }
0557
0558
0559
0560
0561
0562 static int xennet_count_skb_slots(struct sk_buff *skb)
0563 {
0564 int i, frags = skb_shinfo(skb)->nr_frags;
0565 int slots;
0566
0567 slots = gnttab_count_grant(offset_in_page(skb->data),
0568 skb_headlen(skb));
0569
0570 for (i = 0; i < frags; i++) {
0571 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
0572 unsigned long size = skb_frag_size(frag);
0573 unsigned long offset = skb_frag_off(frag);
0574
0575
0576 offset &= ~PAGE_MASK;
0577
0578 slots += gnttab_count_grant(offset, size);
0579 }
0580
0581 return slots;
0582 }
0583
0584 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
0585 struct net_device *sb_dev)
0586 {
0587 unsigned int num_queues = dev->real_num_tx_queues;
0588 u32 hash;
0589 u16 queue_idx;
0590
0591
0592 if (num_queues == 1) {
0593 queue_idx = 0;
0594 } else {
0595 hash = skb_get_hash(skb);
0596 queue_idx = hash % num_queues;
0597 }
0598
0599 return queue_idx;
0600 }
0601
0602 static void xennet_mark_tx_pending(struct netfront_queue *queue)
0603 {
0604 unsigned int i;
0605
0606 while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
0607 TX_LINK_NONE)
0608 queue->tx_link[i] = TX_PENDING;
0609 }
0610
0611 static int xennet_xdp_xmit_one(struct net_device *dev,
0612 struct netfront_queue *queue,
0613 struct xdp_frame *xdpf)
0614 {
0615 struct netfront_info *np = netdev_priv(dev);
0616 struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
0617 struct xennet_gnttab_make_txreq info = {
0618 .queue = queue,
0619 .skb = NULL,
0620 .page = virt_to_page(xdpf->data),
0621 };
0622 int notify;
0623
0624 xennet_make_first_txreq(&info,
0625 offset_in_page(xdpf->data),
0626 xdpf->len);
0627
0628 xennet_mark_tx_pending(queue);
0629
0630 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
0631 if (notify)
0632 notify_remote_via_irq(queue->tx_irq);
0633
0634 u64_stats_update_begin(&tx_stats->syncp);
0635 tx_stats->bytes += xdpf->len;
0636 tx_stats->packets++;
0637 u64_stats_update_end(&tx_stats->syncp);
0638
0639 xennet_tx_buf_gc(queue);
0640
0641 return 0;
0642 }
0643
0644 static int xennet_xdp_xmit(struct net_device *dev, int n,
0645 struct xdp_frame **frames, u32 flags)
0646 {
0647 unsigned int num_queues = dev->real_num_tx_queues;
0648 struct netfront_info *np = netdev_priv(dev);
0649 struct netfront_queue *queue = NULL;
0650 unsigned long irq_flags;
0651 int nxmit = 0;
0652 int i;
0653
0654 if (unlikely(np->broken))
0655 return -ENODEV;
0656 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
0657 return -EINVAL;
0658
0659 queue = &np->queues[smp_processor_id() % num_queues];
0660
0661 spin_lock_irqsave(&queue->tx_lock, irq_flags);
0662 for (i = 0; i < n; i++) {
0663 struct xdp_frame *xdpf = frames[i];
0664
0665 if (!xdpf)
0666 continue;
0667 if (xennet_xdp_xmit_one(dev, queue, xdpf))
0668 break;
0669 nxmit++;
0670 }
0671 spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
0672
0673 return nxmit;
0674 }
0675
0676 struct sk_buff *bounce_skb(const struct sk_buff *skb)
0677 {
0678 unsigned int headerlen = skb_headroom(skb);
0679
0680 unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
0681 XEN_PAGE_SIZE);
0682 struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
0683
0684 if (!n)
0685 return NULL;
0686
0687 if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
0688 WARN_ONCE(1, "misaligned skb allocated\n");
0689 kfree_skb(n);
0690 return NULL;
0691 }
0692
0693
0694 skb_reserve(n, headerlen);
0695
0696 skb_put(n, skb->len);
0697
0698 BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
0699
0700 skb_copy_header(n, skb);
0701 return n;
0702 }
0703
0704 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
0705
0706 static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
0707 {
0708 struct netfront_info *np = netdev_priv(dev);
0709 struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
0710 struct xen_netif_tx_request *first_tx;
0711 unsigned int i;
0712 int notify;
0713 int slots;
0714 struct page *page;
0715 unsigned int offset;
0716 unsigned int len;
0717 unsigned long flags;
0718 struct netfront_queue *queue = NULL;
0719 struct xennet_gnttab_make_txreq info = { };
0720 unsigned int num_queues = dev->real_num_tx_queues;
0721 u16 queue_index;
0722 struct sk_buff *nskb;
0723
0724
0725 if (num_queues < 1)
0726 goto drop;
0727 if (unlikely(np->broken))
0728 goto drop;
0729
0730 queue_index = skb_get_queue_mapping(skb);
0731 queue = &np->queues[queue_index];
0732
0733
0734
0735
0736 if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
0737 net_alert_ratelimited(
0738 "xennet: skb->len = %u, too big for wire format\n",
0739 skb->len);
0740 goto drop;
0741 }
0742
0743 slots = xennet_count_skb_slots(skb);
0744 if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
0745 net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
0746 slots, skb->len);
0747 if (skb_linearize(skb))
0748 goto drop;
0749 }
0750
0751 page = virt_to_page(skb->data);
0752 offset = offset_in_page(skb->data);
0753
0754
0755
0756
0757
0758
0759
0760
0761 if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
0762 nskb = bounce_skb(skb);
0763 if (!nskb)
0764 goto drop;
0765 dev_consume_skb_any(skb);
0766 skb = nskb;
0767 page = virt_to_page(skb->data);
0768 offset = offset_in_page(skb->data);
0769 }
0770
0771 len = skb_headlen(skb);
0772
0773 spin_lock_irqsave(&queue->tx_lock, flags);
0774
0775 if (unlikely(!netif_carrier_ok(dev) ||
0776 (slots > 1 && !xennet_can_sg(dev)) ||
0777 netif_needs_gso(skb, netif_skb_features(skb)))) {
0778 spin_unlock_irqrestore(&queue->tx_lock, flags);
0779 goto drop;
0780 }
0781
0782
0783 info.queue = queue;
0784 info.skb = skb;
0785 info.page = page;
0786 first_tx = xennet_make_first_txreq(&info, offset, len);
0787 offset += info.tx_local.size;
0788 if (offset == PAGE_SIZE) {
0789 page++;
0790 offset = 0;
0791 }
0792 len -= info.tx_local.size;
0793
0794 if (skb->ip_summed == CHECKSUM_PARTIAL)
0795
0796 first_tx->flags |= XEN_NETTXF_csum_blank |
0797 XEN_NETTXF_data_validated;
0798 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
0799
0800 first_tx->flags |= XEN_NETTXF_data_validated;
0801
0802
0803 if (skb_shinfo(skb)->gso_size) {
0804 struct xen_netif_extra_info *gso;
0805
0806 gso = (struct xen_netif_extra_info *)
0807 RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
0808
0809 first_tx->flags |= XEN_NETTXF_extra_info;
0810
0811 gso->u.gso.size = skb_shinfo(skb)->gso_size;
0812 gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
0813 XEN_NETIF_GSO_TYPE_TCPV6 :
0814 XEN_NETIF_GSO_TYPE_TCPV4;
0815 gso->u.gso.pad = 0;
0816 gso->u.gso.features = 0;
0817
0818 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
0819 gso->flags = 0;
0820 }
0821
0822
0823 xennet_make_txreqs(&info, page, offset, len);
0824
0825
0826 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
0827 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
0828 xennet_make_txreqs(&info, skb_frag_page(frag),
0829 skb_frag_off(frag),
0830 skb_frag_size(frag));
0831 }
0832
0833
0834 first_tx->size = skb->len;
0835
0836
0837 skb_tx_timestamp(skb);
0838
0839 xennet_mark_tx_pending(queue);
0840
0841 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
0842 if (notify)
0843 notify_remote_via_irq(queue->tx_irq);
0844
0845 u64_stats_update_begin(&tx_stats->syncp);
0846 tx_stats->bytes += skb->len;
0847 tx_stats->packets++;
0848 u64_stats_update_end(&tx_stats->syncp);
0849
0850
0851 xennet_tx_buf_gc(queue);
0852
0853 if (!netfront_tx_slot_available(queue))
0854 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
0855
0856 spin_unlock_irqrestore(&queue->tx_lock, flags);
0857
0858 return NETDEV_TX_OK;
0859
0860 drop:
0861 dev->stats.tx_dropped++;
0862 dev_kfree_skb_any(skb);
0863 return NETDEV_TX_OK;
0864 }
0865
0866 static int xennet_close(struct net_device *dev)
0867 {
0868 struct netfront_info *np = netdev_priv(dev);
0869 unsigned int num_queues = dev->real_num_tx_queues;
0870 unsigned int i;
0871 struct netfront_queue *queue;
0872 netif_tx_stop_all_queues(np->netdev);
0873 for (i = 0; i < num_queues; ++i) {
0874 queue = &np->queues[i];
0875 napi_disable(&queue->napi);
0876 }
0877 return 0;
0878 }
0879
0880 static void xennet_destroy_queues(struct netfront_info *info)
0881 {
0882 unsigned int i;
0883
0884 for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
0885 struct netfront_queue *queue = &info->queues[i];
0886
0887 if (netif_running(info->netdev))
0888 napi_disable(&queue->napi);
0889 netif_napi_del(&queue->napi);
0890 }
0891
0892 kfree(info->queues);
0893 info->queues = NULL;
0894 }
0895
0896 static void xennet_uninit(struct net_device *dev)
0897 {
0898 struct netfront_info *np = netdev_priv(dev);
0899 xennet_destroy_queues(np);
0900 }
0901
0902 static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
0903 {
0904 unsigned long flags;
0905
0906 spin_lock_irqsave(&queue->rx_cons_lock, flags);
0907 queue->rx.rsp_cons = val;
0908 queue->rx_rsp_unconsumed = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
0909 spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
0910 }
0911
0912 static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
0913 grant_ref_t ref)
0914 {
0915 int new = xennet_rxidx(queue->rx.req_prod_pvt);
0916
0917 BUG_ON(queue->rx_skbs[new]);
0918 queue->rx_skbs[new] = skb;
0919 queue->grant_rx_ref[new] = ref;
0920 RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
0921 RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
0922 queue->rx.req_prod_pvt++;
0923 }
0924
0925 static int xennet_get_extras(struct netfront_queue *queue,
0926 struct xen_netif_extra_info *extras,
0927 RING_IDX rp)
0928
0929 {
0930 struct xen_netif_extra_info extra;
0931 struct device *dev = &queue->info->netdev->dev;
0932 RING_IDX cons = queue->rx.rsp_cons;
0933 int err = 0;
0934
0935 do {
0936 struct sk_buff *skb;
0937 grant_ref_t ref;
0938
0939 if (unlikely(cons + 1 == rp)) {
0940 if (net_ratelimit())
0941 dev_warn(dev, "Missing extra info\n");
0942 err = -EBADR;
0943 break;
0944 }
0945
0946 RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
0947
0948 if (unlikely(!extra.type ||
0949 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
0950 if (net_ratelimit())
0951 dev_warn(dev, "Invalid extra type: %d\n",
0952 extra.type);
0953 err = -EINVAL;
0954 } else {
0955 extras[extra.type - 1] = extra;
0956 }
0957
0958 skb = xennet_get_rx_skb(queue, cons);
0959 ref = xennet_get_rx_ref(queue, cons);
0960 xennet_move_rx_slot(queue, skb, ref);
0961 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
0962
0963 xennet_set_rx_rsp_cons(queue, cons);
0964 return err;
0965 }
0966
0967 static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
0968 struct xen_netif_rx_response *rx, struct bpf_prog *prog,
0969 struct xdp_buff *xdp, bool *need_xdp_flush)
0970 {
0971 struct xdp_frame *xdpf;
0972 u32 len = rx->status;
0973 u32 act;
0974 int err;
0975
0976 xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
0977 &queue->xdp_rxq);
0978 xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
0979 len, false);
0980
0981 act = bpf_prog_run_xdp(prog, xdp);
0982 switch (act) {
0983 case XDP_TX:
0984 get_page(pdata);
0985 xdpf = xdp_convert_buff_to_frame(xdp);
0986 err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
0987 if (unlikely(!err))
0988 xdp_return_frame_rx_napi(xdpf);
0989 else if (unlikely(err < 0))
0990 trace_xdp_exception(queue->info->netdev, prog, act);
0991 break;
0992 case XDP_REDIRECT:
0993 get_page(pdata);
0994 err = xdp_do_redirect(queue->info->netdev, xdp, prog);
0995 *need_xdp_flush = true;
0996 if (unlikely(err))
0997 trace_xdp_exception(queue->info->netdev, prog, act);
0998 break;
0999 case XDP_PASS:
1000 case XDP_DROP:
1001 break;
1002
1003 case XDP_ABORTED:
1004 trace_xdp_exception(queue->info->netdev, prog, act);
1005 break;
1006
1007 default:
1008 bpf_warn_invalid_xdp_action(queue->info->netdev, prog, act);
1009 }
1010
1011 return act;
1012 }
1013
1014 static int xennet_get_responses(struct netfront_queue *queue,
1015 struct netfront_rx_info *rinfo, RING_IDX rp,
1016 struct sk_buff_head *list,
1017 bool *need_xdp_flush)
1018 {
1019 struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
1020 int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
1021 RING_IDX cons = queue->rx.rsp_cons;
1022 struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
1023 struct xen_netif_extra_info *extras = rinfo->extras;
1024 grant_ref_t ref = xennet_get_rx_ref(queue, cons);
1025 struct device *dev = &queue->info->netdev->dev;
1026 struct bpf_prog *xdp_prog;
1027 struct xdp_buff xdp;
1028 int slots = 1;
1029 int err = 0;
1030 u32 verdict;
1031
1032 if (rx->flags & XEN_NETRXF_extra_info) {
1033 err = xennet_get_extras(queue, extras, rp);
1034 if (!err) {
1035 if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) {
1036 struct xen_netif_extra_info *xdp;
1037
1038 xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1];
1039 rx->offset = xdp->u.xdp.headroom;
1040 }
1041 }
1042 cons = queue->rx.rsp_cons;
1043 }
1044
1045 for (;;) {
1046
1047
1048
1049
1050
1051 if (ref == INVALID_GRANT_REF) {
1052 if (net_ratelimit())
1053 dev_warn(dev, "Bad rx response id %d.\n",
1054 rx->id);
1055 err = -EINVAL;
1056 goto next;
1057 }
1058
1059 if (unlikely(rx->status < 0 ||
1060 rx->offset + rx->status > XEN_PAGE_SIZE)) {
1061 if (net_ratelimit())
1062 dev_warn(dev, "rx->offset: %u, size: %d\n",
1063 rx->offset, rx->status);
1064 xennet_move_rx_slot(queue, skb, ref);
1065 err = -EINVAL;
1066 goto next;
1067 }
1068
1069 if (!gnttab_end_foreign_access_ref(ref)) {
1070 dev_alert(dev,
1071 "Grant still in use by backend domain\n");
1072 queue->info->broken = true;
1073 dev_alert(dev, "Disabled for further use\n");
1074 return -EINVAL;
1075 }
1076
1077 gnttab_release_grant_reference(&queue->gref_rx_head, ref);
1078
1079 rcu_read_lock();
1080 xdp_prog = rcu_dereference(queue->xdp_prog);
1081 if (xdp_prog) {
1082 if (!(rx->flags & XEN_NETRXF_more_data)) {
1083
1084 verdict = xennet_run_xdp(queue,
1085 skb_frag_page(&skb_shinfo(skb)->frags[0]),
1086 rx, xdp_prog, &xdp, need_xdp_flush);
1087 if (verdict != XDP_PASS)
1088 err = -EINVAL;
1089 } else {
1090
1091 err = -EINVAL;
1092 }
1093 }
1094 rcu_read_unlock();
1095
1096 __skb_queue_tail(list, skb);
1097
1098 next:
1099 if (!(rx->flags & XEN_NETRXF_more_data))
1100 break;
1101
1102 if (cons + slots == rp) {
1103 if (net_ratelimit())
1104 dev_warn(dev, "Need more slots\n");
1105 err = -ENOENT;
1106 break;
1107 }
1108
1109 RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
1110 rx = &rx_local;
1111 skb = xennet_get_rx_skb(queue, cons + slots);
1112 ref = xennet_get_rx_ref(queue, cons + slots);
1113 slots++;
1114 }
1115
1116 if (unlikely(slots > max)) {
1117 if (net_ratelimit())
1118 dev_warn(dev, "Too many slots\n");
1119 err = -E2BIG;
1120 }
1121
1122 if (unlikely(err))
1123 xennet_set_rx_rsp_cons(queue, cons + slots);
1124
1125 return err;
1126 }
1127
1128 static int xennet_set_skb_gso(struct sk_buff *skb,
1129 struct xen_netif_extra_info *gso)
1130 {
1131 if (!gso->u.gso.size) {
1132 if (net_ratelimit())
1133 pr_warn("GSO size must not be zero\n");
1134 return -EINVAL;
1135 }
1136
1137 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
1138 gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
1139 if (net_ratelimit())
1140 pr_warn("Bad GSO type %d\n", gso->u.gso.type);
1141 return -EINVAL;
1142 }
1143
1144 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1145 skb_shinfo(skb)->gso_type =
1146 (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
1147 SKB_GSO_TCPV4 :
1148 SKB_GSO_TCPV6;
1149
1150
1151 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1152 skb_shinfo(skb)->gso_segs = 0;
1153
1154 return 0;
1155 }
1156
1157 static int xennet_fill_frags(struct netfront_queue *queue,
1158 struct sk_buff *skb,
1159 struct sk_buff_head *list)
1160 {
1161 RING_IDX cons = queue->rx.rsp_cons;
1162 struct sk_buff *nskb;
1163
1164 while ((nskb = __skb_dequeue(list))) {
1165 struct xen_netif_rx_response rx;
1166 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
1167
1168 RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
1169
1170 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
1171 unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
1172
1173 BUG_ON(pull_to < skb_headlen(skb));
1174 __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
1175 }
1176 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
1177 xennet_set_rx_rsp_cons(queue,
1178 ++cons + skb_queue_len(list));
1179 kfree_skb(nskb);
1180 return -ENOENT;
1181 }
1182
1183 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1184 skb_frag_page(nfrag),
1185 rx.offset, rx.status, PAGE_SIZE);
1186
1187 skb_shinfo(nskb)->nr_frags = 0;
1188 kfree_skb(nskb);
1189 }
1190
1191 xennet_set_rx_rsp_cons(queue, cons);
1192
1193 return 0;
1194 }
1195
1196 static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
1197 {
1198 bool recalculate_partial_csum = false;
1199
1200
1201
1202
1203
1204
1205
1206 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1207 struct netfront_info *np = netdev_priv(dev);
1208 atomic_inc(&np->rx_gso_checksum_fixup);
1209 skb->ip_summed = CHECKSUM_PARTIAL;
1210 recalculate_partial_csum = true;
1211 }
1212
1213
1214 if (skb->ip_summed != CHECKSUM_PARTIAL)
1215 return 0;
1216
1217 return skb_checksum_setup(skb, recalculate_partial_csum);
1218 }
1219
1220 static int handle_incoming_queue(struct netfront_queue *queue,
1221 struct sk_buff_head *rxq)
1222 {
1223 struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
1224 int packets_dropped = 0;
1225 struct sk_buff *skb;
1226
1227 while ((skb = __skb_dequeue(rxq)) != NULL) {
1228 int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
1229
1230 if (pull_to > skb_headlen(skb))
1231 __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
1232
1233
1234 skb->protocol = eth_type_trans(skb, queue->info->netdev);
1235 skb_reset_network_header(skb);
1236
1237 if (checksum_setup(queue->info->netdev, skb)) {
1238 kfree_skb(skb);
1239 packets_dropped++;
1240 queue->info->netdev->stats.rx_errors++;
1241 continue;
1242 }
1243
1244 u64_stats_update_begin(&rx_stats->syncp);
1245 rx_stats->packets++;
1246 rx_stats->bytes += skb->len;
1247 u64_stats_update_end(&rx_stats->syncp);
1248
1249
1250 napi_gro_receive(&queue->napi, skb);
1251 }
1252
1253 return packets_dropped;
1254 }
1255
1256 static int xennet_poll(struct napi_struct *napi, int budget)
1257 {
1258 struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
1259 struct net_device *dev = queue->info->netdev;
1260 struct sk_buff *skb;
1261 struct netfront_rx_info rinfo;
1262 struct xen_netif_rx_response *rx = &rinfo.rx;
1263 struct xen_netif_extra_info *extras = rinfo.extras;
1264 RING_IDX i, rp;
1265 int work_done;
1266 struct sk_buff_head rxq;
1267 struct sk_buff_head errq;
1268 struct sk_buff_head tmpq;
1269 int err;
1270 bool need_xdp_flush = false;
1271
1272 spin_lock(&queue->rx_lock);
1273
1274 skb_queue_head_init(&rxq);
1275 skb_queue_head_init(&errq);
1276 skb_queue_head_init(&tmpq);
1277
1278 rp = queue->rx.sring->rsp_prod;
1279 if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
1280 dev_alert(&dev->dev, "Illegal number of responses %u\n",
1281 rp - queue->rx.rsp_cons);
1282 queue->info->broken = true;
1283 spin_unlock(&queue->rx_lock);
1284 return 0;
1285 }
1286 rmb();
1287
1288 i = queue->rx.rsp_cons;
1289 work_done = 0;
1290 while ((i != rp) && (work_done < budget)) {
1291 RING_COPY_RESPONSE(&queue->rx, i, rx);
1292 memset(extras, 0, sizeof(rinfo.extras));
1293
1294 err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
1295 &need_xdp_flush);
1296
1297 if (unlikely(err)) {
1298 if (queue->info->broken) {
1299 spin_unlock(&queue->rx_lock);
1300 return 0;
1301 }
1302 err:
1303 while ((skb = __skb_dequeue(&tmpq)))
1304 __skb_queue_tail(&errq, skb);
1305 dev->stats.rx_errors++;
1306 i = queue->rx.rsp_cons;
1307 continue;
1308 }
1309
1310 skb = __skb_dequeue(&tmpq);
1311
1312 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1313 struct xen_netif_extra_info *gso;
1314 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1315
1316 if (unlikely(xennet_set_skb_gso(skb, gso))) {
1317 __skb_queue_head(&tmpq, skb);
1318 xennet_set_rx_rsp_cons(queue,
1319 queue->rx.rsp_cons +
1320 skb_queue_len(&tmpq));
1321 goto err;
1322 }
1323 }
1324
1325 NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1326 if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1327 NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1328
1329 skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset);
1330 skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1331 skb->data_len = rx->status;
1332 skb->len += rx->status;
1333
1334 if (unlikely(xennet_fill_frags(queue, skb, &tmpq)))
1335 goto err;
1336
1337 if (rx->flags & XEN_NETRXF_csum_blank)
1338 skb->ip_summed = CHECKSUM_PARTIAL;
1339 else if (rx->flags & XEN_NETRXF_data_validated)
1340 skb->ip_summed = CHECKSUM_UNNECESSARY;
1341
1342 __skb_queue_tail(&rxq, skb);
1343
1344 i = queue->rx.rsp_cons + 1;
1345 xennet_set_rx_rsp_cons(queue, i);
1346 work_done++;
1347 }
1348 if (need_xdp_flush)
1349 xdp_do_flush();
1350
1351 __skb_queue_purge(&errq);
1352
1353 work_done -= handle_incoming_queue(queue, &rxq);
1354
1355 xennet_alloc_rx_buffers(queue);
1356
1357 if (work_done < budget) {
1358 int more_to_do = 0;
1359
1360 napi_complete_done(napi, work_done);
1361
1362 RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1363 if (more_to_do)
1364 napi_schedule(napi);
1365 }
1366
1367 spin_unlock(&queue->rx_lock);
1368
1369 return work_done;
1370 }
1371
1372 static int xennet_change_mtu(struct net_device *dev, int mtu)
1373 {
1374 int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1375
1376 if (mtu > max)
1377 return -EINVAL;
1378 dev->mtu = mtu;
1379 return 0;
1380 }
1381
1382 static void xennet_get_stats64(struct net_device *dev,
1383 struct rtnl_link_stats64 *tot)
1384 {
1385 struct netfront_info *np = netdev_priv(dev);
1386 int cpu;
1387
1388 for_each_possible_cpu(cpu) {
1389 struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1390 struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1391 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1392 unsigned int start;
1393
1394 do {
1395 start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1396 tx_packets = tx_stats->packets;
1397 tx_bytes = tx_stats->bytes;
1398 } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1399
1400 do {
1401 start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1402 rx_packets = rx_stats->packets;
1403 rx_bytes = rx_stats->bytes;
1404 } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1405
1406 tot->rx_packets += rx_packets;
1407 tot->tx_packets += tx_packets;
1408 tot->rx_bytes += rx_bytes;
1409 tot->tx_bytes += tx_bytes;
1410 }
1411
1412 tot->rx_errors = dev->stats.rx_errors;
1413 tot->tx_dropped = dev->stats.tx_dropped;
1414 }
1415
1416 static void xennet_release_tx_bufs(struct netfront_queue *queue)
1417 {
1418 struct sk_buff *skb;
1419 int i;
1420
1421 for (i = 0; i < NET_TX_RING_SIZE; i++) {
1422
1423 if (!queue->tx_skbs[i])
1424 continue;
1425
1426 skb = queue->tx_skbs[i];
1427 queue->tx_skbs[i] = NULL;
1428 get_page(queue->grant_tx_page[i]);
1429 gnttab_end_foreign_access(queue->grant_tx_ref[i],
1430 queue->grant_tx_page[i]);
1431 queue->grant_tx_page[i] = NULL;
1432 queue->grant_tx_ref[i] = INVALID_GRANT_REF;
1433 add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
1434 dev_kfree_skb_irq(skb);
1435 }
1436 }
1437
1438 static void xennet_release_rx_bufs(struct netfront_queue *queue)
1439 {
1440 int id, ref;
1441
1442 spin_lock_bh(&queue->rx_lock);
1443
1444 for (id = 0; id < NET_RX_RING_SIZE; id++) {
1445 struct sk_buff *skb;
1446 struct page *page;
1447
1448 skb = queue->rx_skbs[id];
1449 if (!skb)
1450 continue;
1451
1452 ref = queue->grant_rx_ref[id];
1453 if (ref == INVALID_GRANT_REF)
1454 continue;
1455
1456 page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1457
1458
1459
1460
1461 get_page(page);
1462 gnttab_end_foreign_access(ref, page);
1463 queue->grant_rx_ref[id] = INVALID_GRANT_REF;
1464
1465 kfree_skb(skb);
1466 }
1467
1468 spin_unlock_bh(&queue->rx_lock);
1469 }
1470
1471 static netdev_features_t xennet_fix_features(struct net_device *dev,
1472 netdev_features_t features)
1473 {
1474 struct netfront_info *np = netdev_priv(dev);
1475
1476 if (features & NETIF_F_SG &&
1477 !xenbus_read_unsigned(np->xbdev->otherend, "feature-sg", 0))
1478 features &= ~NETIF_F_SG;
1479
1480 if (features & NETIF_F_IPV6_CSUM &&
1481 !xenbus_read_unsigned(np->xbdev->otherend,
1482 "feature-ipv6-csum-offload", 0))
1483 features &= ~NETIF_F_IPV6_CSUM;
1484
1485 if (features & NETIF_F_TSO &&
1486 !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv4", 0))
1487 features &= ~NETIF_F_TSO;
1488
1489 if (features & NETIF_F_TSO6 &&
1490 !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv6", 0))
1491 features &= ~NETIF_F_TSO6;
1492
1493 return features;
1494 }
1495
1496 static int xennet_set_features(struct net_device *dev,
1497 netdev_features_t features)
1498 {
1499 if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1500 netdev_info(dev, "Reducing MTU because no SG offload");
1501 dev->mtu = ETH_DATA_LEN;
1502 }
1503
1504 return 0;
1505 }
1506
1507 static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
1508 {
1509 unsigned long flags;
1510
1511 if (unlikely(queue->info->broken))
1512 return false;
1513
1514 spin_lock_irqsave(&queue->tx_lock, flags);
1515 if (xennet_tx_buf_gc(queue))
1516 *eoi = 0;
1517 spin_unlock_irqrestore(&queue->tx_lock, flags);
1518
1519 return true;
1520 }
1521
1522 static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1523 {
1524 unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
1525
1526 if (likely(xennet_handle_tx(dev_id, &eoiflag)))
1527 xen_irq_lateeoi(irq, eoiflag);
1528
1529 return IRQ_HANDLED;
1530 }
1531
1532 static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
1533 {
1534 unsigned int work_queued;
1535 unsigned long flags;
1536
1537 if (unlikely(queue->info->broken))
1538 return false;
1539
1540 spin_lock_irqsave(&queue->rx_cons_lock, flags);
1541 work_queued = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
1542 if (work_queued > queue->rx_rsp_unconsumed) {
1543 queue->rx_rsp_unconsumed = work_queued;
1544 *eoi = 0;
1545 } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
1546 const struct device *dev = &queue->info->netdev->dev;
1547
1548 spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
1549 dev_alert(dev, "RX producer index going backwards\n");
1550 dev_alert(dev, "Disabled for further use\n");
1551 queue->info->broken = true;
1552 return false;
1553 }
1554 spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
1555
1556 if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
1557 napi_schedule(&queue->napi);
1558
1559 return true;
1560 }
1561
1562 static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1563 {
1564 unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
1565
1566 if (likely(xennet_handle_rx(dev_id, &eoiflag)))
1567 xen_irq_lateeoi(irq, eoiflag);
1568
1569 return IRQ_HANDLED;
1570 }
1571
1572 static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1573 {
1574 unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
1575
1576 if (xennet_handle_tx(dev_id, &eoiflag) &&
1577 xennet_handle_rx(dev_id, &eoiflag))
1578 xen_irq_lateeoi(irq, eoiflag);
1579
1580 return IRQ_HANDLED;
1581 }
1582
1583 #ifdef CONFIG_NET_POLL_CONTROLLER
1584 static void xennet_poll_controller(struct net_device *dev)
1585 {
1586
1587 struct netfront_info *info = netdev_priv(dev);
1588 unsigned int num_queues = dev->real_num_tx_queues;
1589 unsigned int i;
1590
1591 if (info->broken)
1592 return;
1593
1594 for (i = 0; i < num_queues; ++i)
1595 xennet_interrupt(0, &info->queues[i]);
1596 }
1597 #endif
1598
1599 #define NETBACK_XDP_HEADROOM_DISABLE 0
1600 #define NETBACK_XDP_HEADROOM_ENABLE 1
1601
1602 static int talk_to_netback_xdp(struct netfront_info *np, int xdp)
1603 {
1604 int err;
1605 unsigned short headroom;
1606
1607 headroom = xdp ? XDP_PACKET_HEADROOM : 0;
1608 err = xenbus_printf(XBT_NIL, np->xbdev->nodename,
1609 "xdp-headroom", "%hu",
1610 headroom);
1611 if (err)
1612 pr_warn("Error writing xdp-headroom\n");
1613
1614 return err;
1615 }
1616
1617 static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1618 struct netlink_ext_ack *extack)
1619 {
1620 unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
1621 struct netfront_info *np = netdev_priv(dev);
1622 struct bpf_prog *old_prog;
1623 unsigned int i, err;
1624
1625 if (dev->mtu > max_mtu) {
1626 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu);
1627 return -EINVAL;
1628 }
1629
1630 if (!np->netback_has_xdp_headroom)
1631 return 0;
1632
1633 xenbus_switch_state(np->xbdev, XenbusStateReconfiguring);
1634
1635 err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE :
1636 NETBACK_XDP_HEADROOM_DISABLE);
1637 if (err)
1638 return err;
1639
1640
1641 wait_event(module_wq,
1642 xenbus_read_driver_state(np->xbdev->otherend) ==
1643 XenbusStateReconfigured);
1644 np->netfront_xdp_enabled = true;
1645
1646 old_prog = rtnl_dereference(np->queues[0].xdp_prog);
1647
1648 if (prog)
1649 bpf_prog_add(prog, dev->real_num_tx_queues);
1650
1651 for (i = 0; i < dev->real_num_tx_queues; ++i)
1652 rcu_assign_pointer(np->queues[i].xdp_prog, prog);
1653
1654 if (old_prog)
1655 for (i = 0; i < dev->real_num_tx_queues; ++i)
1656 bpf_prog_put(old_prog);
1657
1658 xenbus_switch_state(np->xbdev, XenbusStateConnected);
1659
1660 return 0;
1661 }
1662
1663 static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1664 {
1665 struct netfront_info *np = netdev_priv(dev);
1666
1667 if (np->broken)
1668 return -ENODEV;
1669
1670 switch (xdp->command) {
1671 case XDP_SETUP_PROG:
1672 return xennet_xdp_set(dev, xdp->prog, xdp->extack);
1673 default:
1674 return -EINVAL;
1675 }
1676 }
1677
1678 static const struct net_device_ops xennet_netdev_ops = {
1679 .ndo_uninit = xennet_uninit,
1680 .ndo_open = xennet_open,
1681 .ndo_stop = xennet_close,
1682 .ndo_start_xmit = xennet_start_xmit,
1683 .ndo_change_mtu = xennet_change_mtu,
1684 .ndo_get_stats64 = xennet_get_stats64,
1685 .ndo_set_mac_address = eth_mac_addr,
1686 .ndo_validate_addr = eth_validate_addr,
1687 .ndo_fix_features = xennet_fix_features,
1688 .ndo_set_features = xennet_set_features,
1689 .ndo_select_queue = xennet_select_queue,
1690 .ndo_bpf = xennet_xdp,
1691 .ndo_xdp_xmit = xennet_xdp_xmit,
1692 #ifdef CONFIG_NET_POLL_CONTROLLER
1693 .ndo_poll_controller = xennet_poll_controller,
1694 #endif
1695 };
1696
1697 static void xennet_free_netdev(struct net_device *netdev)
1698 {
1699 struct netfront_info *np = netdev_priv(netdev);
1700
1701 free_percpu(np->rx_stats);
1702 free_percpu(np->tx_stats);
1703 free_netdev(netdev);
1704 }
1705
1706 static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1707 {
1708 int err;
1709 struct net_device *netdev;
1710 struct netfront_info *np;
1711
1712 netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1713 if (!netdev)
1714 return ERR_PTR(-ENOMEM);
1715
1716 np = netdev_priv(netdev);
1717 np->xbdev = dev;
1718
1719 np->queues = NULL;
1720
1721 err = -ENOMEM;
1722 np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1723 if (np->rx_stats == NULL)
1724 goto exit;
1725 np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1726 if (np->tx_stats == NULL)
1727 goto exit;
1728
1729 netdev->netdev_ops = &xennet_netdev_ops;
1730
1731 netdev->features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1732 NETIF_F_GSO_ROBUST;
1733 netdev->hw_features = NETIF_F_SG |
1734 NETIF_F_IPV6_CSUM |
1735 NETIF_F_TSO | NETIF_F_TSO6;
1736
1737
1738
1739
1740
1741
1742
1743 netdev->features |= netdev->hw_features;
1744
1745 netdev->ethtool_ops = &xennet_ethtool_ops;
1746 netdev->min_mtu = ETH_MIN_MTU;
1747 netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE;
1748 SET_NETDEV_DEV(netdev, &dev->dev);
1749
1750 np->netdev = netdev;
1751 np->netfront_xdp_enabled = false;
1752
1753 netif_carrier_off(netdev);
1754
1755 do {
1756 xenbus_switch_state(dev, XenbusStateInitialising);
1757 err = wait_event_timeout(module_wq,
1758 xenbus_read_driver_state(dev->otherend) !=
1759 XenbusStateClosed &&
1760 xenbus_read_driver_state(dev->otherend) !=
1761 XenbusStateUnknown, XENNET_TIMEOUT);
1762 } while (!err);
1763
1764 return netdev;
1765
1766 exit:
1767 xennet_free_netdev(netdev);
1768 return ERR_PTR(err);
1769 }
1770
1771
1772
1773
1774
1775
1776 static int netfront_probe(struct xenbus_device *dev,
1777 const struct xenbus_device_id *id)
1778 {
1779 int err;
1780 struct net_device *netdev;
1781 struct netfront_info *info;
1782
1783 netdev = xennet_create_dev(dev);
1784 if (IS_ERR(netdev)) {
1785 err = PTR_ERR(netdev);
1786 xenbus_dev_fatal(dev, err, "creating netdev");
1787 return err;
1788 }
1789
1790 info = netdev_priv(netdev);
1791 dev_set_drvdata(&dev->dev, info);
1792 #ifdef CONFIG_SYSFS
1793 info->netdev->sysfs_groups[0] = &xennet_dev_group;
1794 #endif
1795
1796 return 0;
1797 }
1798
1799 static void xennet_end_access(int ref, void *page)
1800 {
1801
1802 if (ref != INVALID_GRANT_REF)
1803 gnttab_end_foreign_access(ref, virt_to_page(page));
1804 }
1805
1806 static void xennet_disconnect_backend(struct netfront_info *info)
1807 {
1808 unsigned int i = 0;
1809 unsigned int num_queues = info->netdev->real_num_tx_queues;
1810
1811 netif_carrier_off(info->netdev);
1812
1813 for (i = 0; i < num_queues && info->queues; ++i) {
1814 struct netfront_queue *queue = &info->queues[i];
1815
1816 del_timer_sync(&queue->rx_refill_timer);
1817
1818 if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1819 unbind_from_irqhandler(queue->tx_irq, queue);
1820 if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1821 unbind_from_irqhandler(queue->tx_irq, queue);
1822 unbind_from_irqhandler(queue->rx_irq, queue);
1823 }
1824 queue->tx_evtchn = queue->rx_evtchn = 0;
1825 queue->tx_irq = queue->rx_irq = 0;
1826
1827 if (netif_running(info->netdev))
1828 napi_synchronize(&queue->napi);
1829
1830 xennet_release_tx_bufs(queue);
1831 xennet_release_rx_bufs(queue);
1832 gnttab_free_grant_references(queue->gref_tx_head);
1833 gnttab_free_grant_references(queue->gref_rx_head);
1834
1835
1836 xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1837 xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1838
1839 queue->tx_ring_ref = INVALID_GRANT_REF;
1840 queue->rx_ring_ref = INVALID_GRANT_REF;
1841 queue->tx.sring = NULL;
1842 queue->rx.sring = NULL;
1843
1844 page_pool_destroy(queue->page_pool);
1845 }
1846 }
1847
1848
1849
1850
1851
1852
1853
1854 static int netfront_resume(struct xenbus_device *dev)
1855 {
1856 struct netfront_info *info = dev_get_drvdata(&dev->dev);
1857
1858 dev_dbg(&dev->dev, "%s\n", dev->nodename);
1859
1860 netif_tx_lock_bh(info->netdev);
1861 netif_device_detach(info->netdev);
1862 netif_tx_unlock_bh(info->netdev);
1863
1864 xennet_disconnect_backend(info);
1865 return 0;
1866 }
1867
1868 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1869 {
1870 char *s, *e, *macstr;
1871 int i;
1872
1873 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1874 if (IS_ERR(macstr))
1875 return PTR_ERR(macstr);
1876
1877 for (i = 0; i < ETH_ALEN; i++) {
1878 mac[i] = simple_strtoul(s, &e, 16);
1879 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1880 kfree(macstr);
1881 return -ENOENT;
1882 }
1883 s = e+1;
1884 }
1885
1886 kfree(macstr);
1887 return 0;
1888 }
1889
1890 static int setup_netfront_single(struct netfront_queue *queue)
1891 {
1892 int err;
1893
1894 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1895 if (err < 0)
1896 goto fail;
1897
1898 err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
1899 xennet_interrupt, 0,
1900 queue->info->netdev->name,
1901 queue);
1902 if (err < 0)
1903 goto bind_fail;
1904 queue->rx_evtchn = queue->tx_evtchn;
1905 queue->rx_irq = queue->tx_irq = err;
1906
1907 return 0;
1908
1909 bind_fail:
1910 xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1911 queue->tx_evtchn = 0;
1912 fail:
1913 return err;
1914 }
1915
1916 static int setup_netfront_split(struct netfront_queue *queue)
1917 {
1918 int err;
1919
1920 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1921 if (err < 0)
1922 goto fail;
1923 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1924 if (err < 0)
1925 goto alloc_rx_evtchn_fail;
1926
1927 snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1928 "%s-tx", queue->name);
1929 err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
1930 xennet_tx_interrupt, 0,
1931 queue->tx_irq_name, queue);
1932 if (err < 0)
1933 goto bind_tx_fail;
1934 queue->tx_irq = err;
1935
1936 snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1937 "%s-rx", queue->name);
1938 err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
1939 xennet_rx_interrupt, 0,
1940 queue->rx_irq_name, queue);
1941 if (err < 0)
1942 goto bind_rx_fail;
1943 queue->rx_irq = err;
1944
1945 return 0;
1946
1947 bind_rx_fail:
1948 unbind_from_irqhandler(queue->tx_irq, queue);
1949 queue->tx_irq = 0;
1950 bind_tx_fail:
1951 xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1952 queue->rx_evtchn = 0;
1953 alloc_rx_evtchn_fail:
1954 xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1955 queue->tx_evtchn = 0;
1956 fail:
1957 return err;
1958 }
1959
1960 static int setup_netfront(struct xenbus_device *dev,
1961 struct netfront_queue *queue, unsigned int feature_split_evtchn)
1962 {
1963 struct xen_netif_tx_sring *txs;
1964 struct xen_netif_rx_sring *rxs;
1965 int err;
1966
1967 queue->tx_ring_ref = INVALID_GRANT_REF;
1968 queue->rx_ring_ref = INVALID_GRANT_REF;
1969 queue->rx.sring = NULL;
1970 queue->tx.sring = NULL;
1971
1972 err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&txs,
1973 1, &queue->tx_ring_ref);
1974 if (err)
1975 goto fail;
1976
1977 XEN_FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
1978
1979 err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&rxs,
1980 1, &queue->rx_ring_ref);
1981 if (err)
1982 goto fail;
1983
1984 XEN_FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
1985
1986 if (feature_split_evtchn)
1987 err = setup_netfront_split(queue);
1988
1989
1990
1991
1992 if (!feature_split_evtchn || err)
1993 err = setup_netfront_single(queue);
1994
1995 if (err)
1996 goto fail;
1997
1998 return 0;
1999
2000 fail:
2001 xenbus_teardown_ring((void **)&queue->rx.sring, 1, &queue->rx_ring_ref);
2002 xenbus_teardown_ring((void **)&queue->tx.sring, 1, &queue->tx_ring_ref);
2003
2004 return err;
2005 }
2006
2007
2008
2009
2010
2011 static int xennet_init_queue(struct netfront_queue *queue)
2012 {
2013 unsigned short i;
2014 int err = 0;
2015 char *devid;
2016
2017 spin_lock_init(&queue->tx_lock);
2018 spin_lock_init(&queue->rx_lock);
2019 spin_lock_init(&queue->rx_cons_lock);
2020
2021 timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
2022
2023 devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
2024 snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
2025 devid, queue->id);
2026
2027
2028 queue->tx_skb_freelist = 0;
2029 queue->tx_pend_queue = TX_LINK_NONE;
2030 for (i = 0; i < NET_TX_RING_SIZE; i++) {
2031 queue->tx_link[i] = i + 1;
2032 queue->grant_tx_ref[i] = INVALID_GRANT_REF;
2033 queue->grant_tx_page[i] = NULL;
2034 }
2035 queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
2036
2037
2038 for (i = 0; i < NET_RX_RING_SIZE; i++) {
2039 queue->rx_skbs[i] = NULL;
2040 queue->grant_rx_ref[i] = INVALID_GRANT_REF;
2041 }
2042
2043
2044 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
2045 &queue->gref_tx_head) < 0) {
2046 pr_alert("can't alloc tx grant refs\n");
2047 err = -ENOMEM;
2048 goto exit;
2049 }
2050
2051
2052 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
2053 &queue->gref_rx_head) < 0) {
2054 pr_alert("can't alloc rx grant refs\n");
2055 err = -ENOMEM;
2056 goto exit_free_tx;
2057 }
2058
2059 return 0;
2060
2061 exit_free_tx:
2062 gnttab_free_grant_references(queue->gref_tx_head);
2063 exit:
2064 return err;
2065 }
2066
2067 static int write_queue_xenstore_keys(struct netfront_queue *queue,
2068 struct xenbus_transaction *xbt, int write_hierarchical)
2069 {
2070
2071
2072
2073
2074 struct xenbus_device *dev = queue->info->xbdev;
2075 int err;
2076 const char *message;
2077 char *path;
2078 size_t pathsize;
2079
2080
2081 if (write_hierarchical) {
2082 pathsize = strlen(dev->nodename) + 10;
2083 path = kzalloc(pathsize, GFP_KERNEL);
2084 if (!path) {
2085 err = -ENOMEM;
2086 message = "out of memory while writing ring references";
2087 goto error;
2088 }
2089 snprintf(path, pathsize, "%s/queue-%u",
2090 dev->nodename, queue->id);
2091 } else {
2092 path = (char *)dev->nodename;
2093 }
2094
2095
2096 err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
2097 queue->tx_ring_ref);
2098 if (err) {
2099 message = "writing tx-ring-ref";
2100 goto error;
2101 }
2102
2103 err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
2104 queue->rx_ring_ref);
2105 if (err) {
2106 message = "writing rx-ring-ref";
2107 goto error;
2108 }
2109
2110
2111
2112
2113 if (queue->tx_evtchn == queue->rx_evtchn) {
2114
2115 err = xenbus_printf(*xbt, path,
2116 "event-channel", "%u", queue->tx_evtchn);
2117 if (err) {
2118 message = "writing event-channel";
2119 goto error;
2120 }
2121 } else {
2122
2123 err = xenbus_printf(*xbt, path,
2124 "event-channel-tx", "%u", queue->tx_evtchn);
2125 if (err) {
2126 message = "writing event-channel-tx";
2127 goto error;
2128 }
2129
2130 err = xenbus_printf(*xbt, path,
2131 "event-channel-rx", "%u", queue->rx_evtchn);
2132 if (err) {
2133 message = "writing event-channel-rx";
2134 goto error;
2135 }
2136 }
2137
2138 if (write_hierarchical)
2139 kfree(path);
2140 return 0;
2141
2142 error:
2143 if (write_hierarchical)
2144 kfree(path);
2145 xenbus_dev_fatal(dev, err, "%s", message);
2146 return err;
2147 }
2148
2149
2150
2151 static int xennet_create_page_pool(struct netfront_queue *queue)
2152 {
2153 int err;
2154 struct page_pool_params pp_params = {
2155 .order = 0,
2156 .flags = 0,
2157 .pool_size = NET_RX_RING_SIZE,
2158 .nid = NUMA_NO_NODE,
2159 .dev = &queue->info->netdev->dev,
2160 .offset = XDP_PACKET_HEADROOM,
2161 .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
2162 };
2163
2164 queue->page_pool = page_pool_create(&pp_params);
2165 if (IS_ERR(queue->page_pool)) {
2166 err = PTR_ERR(queue->page_pool);
2167 queue->page_pool = NULL;
2168 return err;
2169 }
2170
2171 err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev,
2172 queue->id, 0);
2173 if (err) {
2174 netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n");
2175 goto err_free_pp;
2176 }
2177
2178 err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq,
2179 MEM_TYPE_PAGE_POOL, queue->page_pool);
2180 if (err) {
2181 netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n");
2182 goto err_unregister_rxq;
2183 }
2184 return 0;
2185
2186 err_unregister_rxq:
2187 xdp_rxq_info_unreg(&queue->xdp_rxq);
2188 err_free_pp:
2189 page_pool_destroy(queue->page_pool);
2190 queue->page_pool = NULL;
2191 return err;
2192 }
2193
2194 static int xennet_create_queues(struct netfront_info *info,
2195 unsigned int *num_queues)
2196 {
2197 unsigned int i;
2198 int ret;
2199
2200 info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
2201 GFP_KERNEL);
2202 if (!info->queues)
2203 return -ENOMEM;
2204
2205 for (i = 0; i < *num_queues; i++) {
2206 struct netfront_queue *queue = &info->queues[i];
2207
2208 queue->id = i;
2209 queue->info = info;
2210
2211 ret = xennet_init_queue(queue);
2212 if (ret < 0) {
2213 dev_warn(&info->xbdev->dev,
2214 "only created %d queues\n", i);
2215 *num_queues = i;
2216 break;
2217 }
2218
2219
2220 ret = xennet_create_page_pool(queue);
2221 if (ret < 0) {
2222 dev_err(&info->xbdev->dev, "can't allocate page pool\n");
2223 *num_queues = i;
2224 return ret;
2225 }
2226
2227 netif_napi_add(queue->info->netdev, &queue->napi,
2228 xennet_poll, 64);
2229 if (netif_running(info->netdev))
2230 napi_enable(&queue->napi);
2231 }
2232
2233 netif_set_real_num_tx_queues(info->netdev, *num_queues);
2234
2235 if (*num_queues == 0) {
2236 dev_err(&info->xbdev->dev, "no queues\n");
2237 return -EINVAL;
2238 }
2239 return 0;
2240 }
2241
2242
2243 static int talk_to_netback(struct xenbus_device *dev,
2244 struct netfront_info *info)
2245 {
2246 const char *message;
2247 struct xenbus_transaction xbt;
2248 int err;
2249 unsigned int feature_split_evtchn;
2250 unsigned int i = 0;
2251 unsigned int max_queues = 0;
2252 struct netfront_queue *queue = NULL;
2253 unsigned int num_queues = 1;
2254 u8 addr[ETH_ALEN];
2255
2256 info->netdev->irq = 0;
2257
2258
2259 info->bounce = !xennet_trusted ||
2260 !xenbus_read_unsigned(dev->nodename, "trusted", 1);
2261
2262
2263 max_queues = xenbus_read_unsigned(info->xbdev->otherend,
2264 "multi-queue-max-queues", 1);
2265 num_queues = min(max_queues, xennet_max_queues);
2266
2267
2268 feature_split_evtchn = xenbus_read_unsigned(info->xbdev->otherend,
2269 "feature-split-event-channels", 0);
2270
2271
2272 err = xen_net_read_mac(dev, addr);
2273 if (err) {
2274 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
2275 goto out_unlocked;
2276 }
2277 eth_hw_addr_set(info->netdev, addr);
2278
2279 info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend,
2280 "feature-xdp-headroom", 0);
2281 if (info->netback_has_xdp_headroom) {
2282
2283 err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ?
2284 NETBACK_XDP_HEADROOM_ENABLE :
2285 NETBACK_XDP_HEADROOM_DISABLE);
2286 if (err)
2287 goto out_unlocked;
2288 }
2289
2290 rtnl_lock();
2291 if (info->queues)
2292 xennet_destroy_queues(info);
2293
2294
2295 info->broken = false;
2296
2297 err = xennet_create_queues(info, &num_queues);
2298 if (err < 0) {
2299 xenbus_dev_fatal(dev, err, "creating queues");
2300 kfree(info->queues);
2301 info->queues = NULL;
2302 goto out;
2303 }
2304 rtnl_unlock();
2305
2306
2307 for (i = 0; i < num_queues; ++i) {
2308 queue = &info->queues[i];
2309 err = setup_netfront(dev, queue, feature_split_evtchn);
2310 if (err)
2311 goto destroy_ring;
2312 }
2313
2314 again:
2315 err = xenbus_transaction_start(&xbt);
2316 if (err) {
2317 xenbus_dev_fatal(dev, err, "starting transaction");
2318 goto destroy_ring;
2319 }
2320
2321 if (xenbus_exists(XBT_NIL,
2322 info->xbdev->otherend, "multi-queue-max-queues")) {
2323
2324 err = xenbus_printf(xbt, dev->nodename,
2325 "multi-queue-num-queues", "%u", num_queues);
2326 if (err) {
2327 message = "writing multi-queue-num-queues";
2328 goto abort_transaction_no_dev_fatal;
2329 }
2330 }
2331
2332 if (num_queues == 1) {
2333 err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0);
2334 if (err)
2335 goto abort_transaction_no_dev_fatal;
2336 } else {
2337
2338 for (i = 0; i < num_queues; ++i) {
2339 queue = &info->queues[i];
2340 err = write_queue_xenstore_keys(queue, &xbt, 1);
2341 if (err)
2342 goto abort_transaction_no_dev_fatal;
2343 }
2344 }
2345
2346
2347 err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
2348 1);
2349 if (err) {
2350 message = "writing request-rx-copy";
2351 goto abort_transaction;
2352 }
2353
2354 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
2355 if (err) {
2356 message = "writing feature-rx-notify";
2357 goto abort_transaction;
2358 }
2359
2360 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
2361 if (err) {
2362 message = "writing feature-sg";
2363 goto abort_transaction;
2364 }
2365
2366 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
2367 if (err) {
2368 message = "writing feature-gso-tcpv4";
2369 goto abort_transaction;
2370 }
2371
2372 err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
2373 if (err) {
2374 message = "writing feature-gso-tcpv6";
2375 goto abort_transaction;
2376 }
2377
2378 err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
2379 "1");
2380 if (err) {
2381 message = "writing feature-ipv6-csum-offload";
2382 goto abort_transaction;
2383 }
2384
2385 err = xenbus_transaction_end(xbt, 0);
2386 if (err) {
2387 if (err == -EAGAIN)
2388 goto again;
2389 xenbus_dev_fatal(dev, err, "completing transaction");
2390 goto destroy_ring;
2391 }
2392
2393 return 0;
2394
2395 abort_transaction:
2396 xenbus_dev_fatal(dev, err, "%s", message);
2397 abort_transaction_no_dev_fatal:
2398 xenbus_transaction_end(xbt, 1);
2399 destroy_ring:
2400 xennet_disconnect_backend(info);
2401 rtnl_lock();
2402 xennet_destroy_queues(info);
2403 out:
2404 rtnl_unlock();
2405 out_unlocked:
2406 device_unregister(&dev->dev);
2407 return err;
2408 }
2409
2410 static int xennet_connect(struct net_device *dev)
2411 {
2412 struct netfront_info *np = netdev_priv(dev);
2413 unsigned int num_queues = 0;
2414 int err;
2415 unsigned int j = 0;
2416 struct netfront_queue *queue = NULL;
2417
2418 if (!xenbus_read_unsigned(np->xbdev->otherend, "feature-rx-copy", 0)) {
2419 dev_info(&dev->dev,
2420 "backend does not support copying receive path\n");
2421 return -ENODEV;
2422 }
2423
2424 err = talk_to_netback(np->xbdev, np);
2425 if (err)
2426 return err;
2427 if (np->netback_has_xdp_headroom)
2428 pr_info("backend supports XDP headroom\n");
2429 if (np->bounce)
2430 dev_info(&np->xbdev->dev,
2431 "bouncing transmitted data to zeroed pages\n");
2432
2433
2434 num_queues = dev->real_num_tx_queues;
2435
2436 if (dev->reg_state == NETREG_UNINITIALIZED) {
2437 err = register_netdev(dev);
2438 if (err) {
2439 pr_warn("%s: register_netdev err=%d\n", __func__, err);
2440 device_unregister(&np->xbdev->dev);
2441 return err;
2442 }
2443 }
2444
2445 rtnl_lock();
2446 netdev_update_features(dev);
2447 rtnl_unlock();
2448
2449
2450
2451
2452
2453
2454
2455 netif_tx_lock_bh(np->netdev);
2456 netif_device_attach(np->netdev);
2457 netif_tx_unlock_bh(np->netdev);
2458
2459 netif_carrier_on(np->netdev);
2460 for (j = 0; j < num_queues; ++j) {
2461 queue = &np->queues[j];
2462
2463 notify_remote_via_irq(queue->tx_irq);
2464 if (queue->tx_irq != queue->rx_irq)
2465 notify_remote_via_irq(queue->rx_irq);
2466
2467 spin_lock_bh(&queue->rx_lock);
2468 xennet_alloc_rx_buffers(queue);
2469 spin_unlock_bh(&queue->rx_lock);
2470 }
2471
2472 return 0;
2473 }
2474
2475
2476
2477
2478 static void netback_changed(struct xenbus_device *dev,
2479 enum xenbus_state backend_state)
2480 {
2481 struct netfront_info *np = dev_get_drvdata(&dev->dev);
2482 struct net_device *netdev = np->netdev;
2483
2484 dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
2485
2486 wake_up_all(&module_wq);
2487
2488 switch (backend_state) {
2489 case XenbusStateInitialising:
2490 case XenbusStateInitialised:
2491 case XenbusStateReconfiguring:
2492 case XenbusStateReconfigured:
2493 case XenbusStateUnknown:
2494 break;
2495
2496 case XenbusStateInitWait:
2497 if (dev->state != XenbusStateInitialising)
2498 break;
2499 if (xennet_connect(netdev) != 0)
2500 break;
2501 xenbus_switch_state(dev, XenbusStateConnected);
2502 break;
2503
2504 case XenbusStateConnected:
2505 netdev_notify_peers(netdev);
2506 break;
2507
2508 case XenbusStateClosed:
2509 if (dev->state == XenbusStateClosed)
2510 break;
2511 fallthrough;
2512 case XenbusStateClosing:
2513 xenbus_frontend_closed(dev);
2514 break;
2515 }
2516 }
2517
2518 static const struct xennet_stat {
2519 char name[ETH_GSTRING_LEN];
2520 u16 offset;
2521 } xennet_stats[] = {
2522 {
2523 "rx_gso_checksum_fixup",
2524 offsetof(struct netfront_info, rx_gso_checksum_fixup)
2525 },
2526 };
2527
2528 static int xennet_get_sset_count(struct net_device *dev, int string_set)
2529 {
2530 switch (string_set) {
2531 case ETH_SS_STATS:
2532 return ARRAY_SIZE(xennet_stats);
2533 default:
2534 return -EINVAL;
2535 }
2536 }
2537
2538 static void xennet_get_ethtool_stats(struct net_device *dev,
2539 struct ethtool_stats *stats, u64 * data)
2540 {
2541 void *np = netdev_priv(dev);
2542 int i;
2543
2544 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2545 data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2546 }
2547
2548 static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2549 {
2550 int i;
2551
2552 switch (stringset) {
2553 case ETH_SS_STATS:
2554 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2555 memcpy(data + i * ETH_GSTRING_LEN,
2556 xennet_stats[i].name, ETH_GSTRING_LEN);
2557 break;
2558 }
2559 }
2560
2561 static const struct ethtool_ops xennet_ethtool_ops =
2562 {
2563 .get_link = ethtool_op_get_link,
2564
2565 .get_sset_count = xennet_get_sset_count,
2566 .get_ethtool_stats = xennet_get_ethtool_stats,
2567 .get_strings = xennet_get_strings,
2568 .get_ts_info = ethtool_op_get_ts_info,
2569 };
2570
2571 #ifdef CONFIG_SYSFS
2572 static ssize_t show_rxbuf(struct device *dev,
2573 struct device_attribute *attr, char *buf)
2574 {
2575 return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2576 }
2577
2578 static ssize_t store_rxbuf(struct device *dev,
2579 struct device_attribute *attr,
2580 const char *buf, size_t len)
2581 {
2582 char *endp;
2583
2584 if (!capable(CAP_NET_ADMIN))
2585 return -EPERM;
2586
2587 simple_strtoul(buf, &endp, 0);
2588 if (endp == buf)
2589 return -EBADMSG;
2590
2591
2592
2593 return len;
2594 }
2595
2596 static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
2597 static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
2598 static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
2599
2600 static struct attribute *xennet_dev_attrs[] = {
2601 &dev_attr_rxbuf_min.attr,
2602 &dev_attr_rxbuf_max.attr,
2603 &dev_attr_rxbuf_cur.attr,
2604 NULL
2605 };
2606
2607 static const struct attribute_group xennet_dev_group = {
2608 .attrs = xennet_dev_attrs
2609 };
2610 #endif
2611
2612 static void xennet_bus_close(struct xenbus_device *dev)
2613 {
2614 int ret;
2615
2616 if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
2617 return;
2618 do {
2619 xenbus_switch_state(dev, XenbusStateClosing);
2620 ret = wait_event_timeout(module_wq,
2621 xenbus_read_driver_state(dev->otherend) ==
2622 XenbusStateClosing ||
2623 xenbus_read_driver_state(dev->otherend) ==
2624 XenbusStateClosed ||
2625 xenbus_read_driver_state(dev->otherend) ==
2626 XenbusStateUnknown,
2627 XENNET_TIMEOUT);
2628 } while (!ret);
2629
2630 if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
2631 return;
2632
2633 do {
2634 xenbus_switch_state(dev, XenbusStateClosed);
2635 ret = wait_event_timeout(module_wq,
2636 xenbus_read_driver_state(dev->otherend) ==
2637 XenbusStateClosed ||
2638 xenbus_read_driver_state(dev->otherend) ==
2639 XenbusStateUnknown,
2640 XENNET_TIMEOUT);
2641 } while (!ret);
2642 }
2643
2644 static int xennet_remove(struct xenbus_device *dev)
2645 {
2646 struct netfront_info *info = dev_get_drvdata(&dev->dev);
2647
2648 xennet_bus_close(dev);
2649 xennet_disconnect_backend(info);
2650
2651 if (info->netdev->reg_state == NETREG_REGISTERED)
2652 unregister_netdev(info->netdev);
2653
2654 if (info->queues) {
2655 rtnl_lock();
2656 xennet_destroy_queues(info);
2657 rtnl_unlock();
2658 }
2659 xennet_free_netdev(info->netdev);
2660
2661 return 0;
2662 }
2663
2664 static const struct xenbus_device_id netfront_ids[] = {
2665 { "vif" },
2666 { "" }
2667 };
2668
2669 static struct xenbus_driver netfront_driver = {
2670 .ids = netfront_ids,
2671 .probe = netfront_probe,
2672 .remove = xennet_remove,
2673 .resume = netfront_resume,
2674 .otherend_changed = netback_changed,
2675 };
2676
2677 static int __init netif_init(void)
2678 {
2679 if (!xen_domain())
2680 return -ENODEV;
2681
2682 if (!xen_has_pv_nic_devices())
2683 return -ENODEV;
2684
2685 pr_info("Initialising Xen virtual ethernet driver\n");
2686
2687
2688
2689
2690 if (xennet_max_queues == 0)
2691 xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
2692 num_online_cpus());
2693
2694 return xenbus_register_frontend(&netfront_driver);
2695 }
2696 module_init(netif_init);
2697
2698
2699 static void __exit netif_exit(void)
2700 {
2701 xenbus_unregister_driver(&netfront_driver);
2702 }
2703 module_exit(netif_exit);
2704
2705 MODULE_DESCRIPTION("Xen virtual network device frontend");
2706 MODULE_LICENSE("GPL");
2707 MODULE_ALIAS("xen:vif");
2708 MODULE_ALIAS("xennet");