ethernet/sun/sunvnet_common.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /* sunvnet.c: Sun LDOM Virtual Network Driver.
0003  *
0004  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
0005  * Copyright (C) 2016-2017 Oracle. All rights reserved.
0006  */
0007
0008 #include <linux/module.h>
0009 #include <linux/kernel.h>
0010 #include <linux/types.h>
0011 #include <linux/slab.h>
0012 #include <linux/delay.h>
0013 #include <linux/init.h>
0014 #include <linux/netdevice.h>
0015 #include <linux/ethtool.h>
0016 #include <linux/etherdevice.h>
0017 #include <linux/mutex.h>
0018 #include <linux/highmem.h>
0019 #include <linux/if_vlan.h>
0020 #define CREATE_TRACE_POINTS
0021 #include <trace/events/sunvnet.h>
0022
0023 #if IS_ENABLED(CONFIG_IPV6)
0024 #include <linux/icmpv6.h>
0025 #endif
0026
0027 #include <net/ip.h>
0028 #include <net/icmp.h>
0029 #include <net/route.h>
0030
0031 #include <asm/vio.h>
0032 #include <asm/ldc.h>
0033
0034 #include "sunvnet_common.h"
0035
0036 /* Heuristic for the number of times to exponentially backoff and
0037  * retry sending an LDC trigger when EAGAIN is encountered
0038  */
0039 #define VNET_MAX_RETRIES    10
0040
0041 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
0042 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
0043 MODULE_LICENSE("GPL");
0044 MODULE_VERSION("1.1");
0045
0046 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
0047
0048 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
0049 {
0050     return vio_dring_avail(dr, VNET_TX_RING_SIZE);
0051 }
0052
0053 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
0054 {
0055     struct vio_msg_tag *pkt = arg;
0056
0057     pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
0058            pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
0059     pr_err("Resetting connection\n");
0060
0061     ldc_disconnect(port->vio.lp);
0062
0063     return -ECONNRESET;
0064 }
0065
0066 static int vnet_port_alloc_tx_ring(struct vnet_port *port);
0067
0068 int sunvnet_send_attr_common(struct vio_driver_state *vio)
0069 {
0070     struct vnet_port *port = to_vnet_port(vio);
0071     struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
0072     struct vio_net_attr_info pkt;
0073     int framelen = ETH_FRAME_LEN;
0074     int i, err;
0075
0076     err = vnet_port_alloc_tx_ring(to_vnet_port(vio));
0077     if (err)
0078         return err;
0079
0080     memset(&pkt, 0, sizeof(pkt));
0081     pkt.tag.type = VIO_TYPE_CTRL;
0082     pkt.tag.stype = VIO_SUBTYPE_INFO;
0083     pkt.tag.stype_env = VIO_ATTR_INFO;
0084     pkt.tag.sid = vio_send_sid(vio);
0085     if (vio_version_before(vio, 1, 2))
0086         pkt.xfer_mode = VIO_DRING_MODE;
0087     else
0088         pkt.xfer_mode = VIO_NEW_DRING_MODE;
0089     pkt.addr_type = VNET_ADDR_ETHERMAC;
0090     pkt.ack_freq = 0;
0091     for (i = 0; i < 6; i++)
0092         pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
0093     if (vio_version_after(vio, 1, 3)) {
0094         if (port->rmtu) {
0095             port->rmtu = min(VNET_MAXPACKET, port->rmtu);
0096             pkt.mtu = port->rmtu;
0097         } else {
0098             port->rmtu = VNET_MAXPACKET;
0099             pkt.mtu = port->rmtu;
0100         }
0101         if (vio_version_after_eq(vio, 1, 6))
0102             pkt.options = VIO_TX_DRING;
0103     } else if (vio_version_before(vio, 1, 3)) {
0104         pkt.mtu = framelen;
0105     } else { /* v1.3 */
0106         pkt.mtu = framelen + VLAN_HLEN;
0107     }
0108
0109     pkt.cflags = 0;
0110     if (vio_version_after_eq(vio, 1, 7) && port->tso) {
0111         pkt.cflags |= VNET_LSO_IPV4_CAPAB;
0112         if (!port->tsolen)
0113             port->tsolen = VNET_MAXTSO;
0114         pkt.ipv4_lso_maxlen = port->tsolen;
0115     }
0116
0117     pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
0118
0119     viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
0120            "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
0121            "cflags[0x%04x] lso_max[%u]\n",
0122            pkt.xfer_mode, pkt.addr_type,
0123            (unsigned long long)pkt.addr,
0124            pkt.ack_freq, pkt.plnk_updt, pkt.options,
0125            (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
0126
0127     return vio_ldc_send(vio, &pkt, sizeof(pkt));
0128 }
0129 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common);
0130
0131 static int handle_attr_info(struct vio_driver_state *vio,
0132                 struct vio_net_attr_info *pkt)
0133 {
0134     struct vnet_port *port = to_vnet_port(vio);
0135     u64 localmtu;
0136     u8  xfer_mode;
0137
0138     viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
0139            "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
0140            " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
0141            pkt->xfer_mode, pkt->addr_type,
0142            (unsigned long long)pkt->addr,
0143            pkt->ack_freq, pkt->plnk_updt, pkt->options,
0144            (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
0145            pkt->ipv4_lso_maxlen);
0146
0147     pkt->tag.sid = vio_send_sid(vio);
0148
0149     xfer_mode = pkt->xfer_mode;
0150     /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
0151     if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
0152         xfer_mode = VIO_NEW_DRING_MODE;
0153
0154     /* MTU negotiation:
0155      *  < v1.3 - ETH_FRAME_LEN exactly
0156      *  > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
0157      *          pkt->mtu for ACK
0158      *  = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
0159      */
0160     if (vio_version_before(vio, 1, 3)) {
0161         localmtu = ETH_FRAME_LEN;
0162     } else if (vio_version_after(vio, 1, 3)) {
0163         localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
0164         localmtu = min(pkt->mtu, localmtu);
0165         pkt->mtu = localmtu;
0166     } else { /* v1.3 */
0167         localmtu = ETH_FRAME_LEN + VLAN_HLEN;
0168     }
0169     port->rmtu = localmtu;
0170
0171     /* LSO negotiation */
0172     if (vio_version_after_eq(vio, 1, 7))
0173         port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB);
0174     else
0175         port->tso = false;
0176     if (port->tso) {
0177         if (!port->tsolen)
0178             port->tsolen = VNET_MAXTSO;
0179         port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen);
0180         if (port->tsolen < VNET_MINTSO) {
0181             port->tso = false;
0182             port->tsolen = 0;
0183             pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
0184         }
0185         pkt->ipv4_lso_maxlen = port->tsolen;
0186     } else {
0187         pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
0188         pkt->ipv4_lso_maxlen = 0;
0189         port->tsolen = 0;
0190     }
0191
0192     /* for version >= 1.6, ACK packet mode we support */
0193     if (vio_version_after_eq(vio, 1, 6)) {
0194         pkt->xfer_mode = VIO_NEW_DRING_MODE;
0195         pkt->options = VIO_TX_DRING;
0196     }
0197
0198     if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
0199         pkt->addr_type != VNET_ADDR_ETHERMAC ||
0200         pkt->mtu != localmtu) {
0201         viodbg(HS, "SEND NET ATTR NACK\n");
0202
0203         pkt->tag.stype = VIO_SUBTYPE_NACK;
0204
0205         (void)vio_ldc_send(vio, pkt, sizeof(*pkt));
0206
0207         return -ECONNRESET;
0208     }
0209
0210     viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
0211            "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
0212            "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
0213            pkt->xfer_mode, pkt->addr_type,
0214            (unsigned long long)pkt->addr,
0215            pkt->ack_freq, pkt->plnk_updt, pkt->options,
0216            (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
0217            pkt->ipv4_lso_maxlen);
0218
0219     pkt->tag.stype = VIO_SUBTYPE_ACK;
0220
0221     return vio_ldc_send(vio, pkt, sizeof(*pkt));
0222 }
0223
0224 static int handle_attr_ack(struct vio_driver_state *vio,
0225                struct vio_net_attr_info *pkt)
0226 {
0227     viodbg(HS, "GOT NET ATTR ACK\n");
0228
0229     return 0;
0230 }
0231
0232 static int handle_attr_nack(struct vio_driver_state *vio,
0233                 struct vio_net_attr_info *pkt)
0234 {
0235     viodbg(HS, "GOT NET ATTR NACK\n");
0236
0237     return -ECONNRESET;
0238 }
0239
0240 int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg)
0241 {
0242     struct vio_net_attr_info *pkt = arg;
0243
0244     switch (pkt->tag.stype) {
0245     case VIO_SUBTYPE_INFO:
0246         return handle_attr_info(vio, pkt);
0247
0248     case VIO_SUBTYPE_ACK:
0249         return handle_attr_ack(vio, pkt);
0250
0251     case VIO_SUBTYPE_NACK:
0252         return handle_attr_nack(vio, pkt);
0253
0254     default:
0255         return -ECONNRESET;
0256     }
0257 }
0258 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common);
0259
0260 void sunvnet_handshake_complete_common(struct vio_driver_state *vio)
0261 {
0262     struct vio_dring_state *dr;
0263
0264     dr = &vio->drings[VIO_DRIVER_RX_RING];
0265     dr->rcv_nxt = 1;
0266     dr->snd_nxt = 1;
0267
0268     dr = &vio->drings[VIO_DRIVER_TX_RING];
0269     dr->rcv_nxt = 1;
0270     dr->snd_nxt = 1;
0271 }
0272 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common);
0273
0274 /* The hypervisor interface that implements copying to/from imported
0275  * memory from another domain requires that copies are done to 8-byte
0276  * aligned buffers, and that the lengths of such copies are also 8-byte
0277  * multiples.
0278  *
0279  * So we align skb->data to an 8-byte multiple and pad-out the data
0280  * area so we can round the copy length up to the next multiple of
0281  * 8 for the copy.
0282  *
0283  * The transmitter puts the actual start of the packet 6 bytes into
0284  * the buffer it sends over, so that the IP headers after the ethernet
0285  * header are aligned properly.  These 6 bytes are not in the descriptor
0286  * length, they are simply implied.  This offset is represented using
0287  * the VNET_PACKET_SKIP macro.
0288  */
0289 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
0290                        unsigned int len)
0291 {
0292     struct sk_buff *skb;
0293     unsigned long addr, off;
0294
0295     skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8);
0296     if (unlikely(!skb))
0297         return NULL;
0298
0299     addr = (unsigned long)skb->data;
0300     off = ((addr + 7UL) & ~7UL) - addr;
0301     if (off)
0302         skb_reserve(skb, off);
0303
0304     return skb;
0305 }
0306
0307 static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
0308 {
0309     struct iphdr *iph = ip_hdr(skb);
0310     int offset = skb_transport_offset(skb);
0311
0312     if (skb->protocol != htons(ETH_P_IP))
0313         return;
0314     if (iph->protocol != IPPROTO_TCP &&
0315         iph->protocol != IPPROTO_UDP)
0316         return;
0317     skb->ip_summed = CHECKSUM_NONE;
0318     skb->csum_level = 1;
0319     skb->csum = 0;
0320     if (iph->protocol == IPPROTO_TCP) {
0321         struct tcphdr *ptcp = tcp_hdr(skb);
0322
0323         ptcp->check = 0;
0324         skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
0325         ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
0326                         skb->len - offset, IPPROTO_TCP,
0327                         skb->csum);
0328     } else if (iph->protocol == IPPROTO_UDP) {
0329         struct udphdr *pudp = udp_hdr(skb);
0330
0331         pudp->check = 0;
0332         skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
0333         pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
0334                         skb->len - offset, IPPROTO_UDP,
0335                         skb->csum);
0336     }
0337 }
0338
0339 #if IS_ENABLED(CONFIG_IPV6)
0340 static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
0341 {
0342     struct ipv6hdr *ip6h = ipv6_hdr(skb);
0343     int offset = skb_transport_offset(skb);
0344
0345     if (skb->protocol != htons(ETH_P_IPV6))
0346         return;
0347     if (ip6h->nexthdr != IPPROTO_TCP &&
0348         ip6h->nexthdr != IPPROTO_UDP)
0349         return;
0350     skb->ip_summed = CHECKSUM_NONE;
0351     skb->csum_level = 1;
0352     skb->csum = 0;
0353     if (ip6h->nexthdr == IPPROTO_TCP) {
0354         struct tcphdr *ptcp = tcp_hdr(skb);
0355
0356         ptcp->check = 0;
0357         skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
0358         ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
0359                           skb->len - offset, IPPROTO_TCP,
0360                           skb->csum);
0361     } else if (ip6h->nexthdr == IPPROTO_UDP) {
0362         struct udphdr *pudp = udp_hdr(skb);
0363
0364         pudp->check = 0;
0365         skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
0366         pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
0367                           skb->len - offset, IPPROTO_UDP,
0368                           skb->csum);
0369     }
0370 }
0371 #endif
0372
0373 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
0374 {
0375     struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
0376     unsigned int len = desc->size;
0377     unsigned int copy_len;
0378     struct sk_buff *skb;
0379     int maxlen;
0380     int err;
0381
0382     err = -EMSGSIZE;
0383     if (port->tso && port->tsolen > port->rmtu)
0384         maxlen = port->tsolen;
0385     else
0386         maxlen = port->rmtu;
0387     if (unlikely(len < ETH_ZLEN || len > maxlen)) {
0388         dev->stats.rx_length_errors++;
0389         goto out_dropped;
0390     }
0391
0392     skb = alloc_and_align_skb(dev, len);
0393     err = -ENOMEM;
0394     if (unlikely(!skb)) {
0395         dev->stats.rx_missed_errors++;
0396         goto out_dropped;
0397     }
0398
0399     copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
0400     skb_put(skb, copy_len);
0401     err = ldc_copy(port->vio.lp, LDC_COPY_IN,
0402                skb->data, copy_len, 0,
0403                desc->cookies, desc->ncookies);
0404     if (unlikely(err < 0)) {
0405         dev->stats.rx_frame_errors++;
0406         goto out_free_skb;
0407     }
0408
0409     skb_pull(skb, VNET_PACKET_SKIP);
0410     skb_trim(skb, len);
0411     skb->protocol = eth_type_trans(skb, dev);
0412
0413     if (vio_version_after_eq(&port->vio, 1, 8)) {
0414         struct vio_net_dext *dext = vio_net_ext(desc);
0415
0416         skb_reset_network_header(skb);
0417
0418         if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
0419             if (skb->protocol == ETH_P_IP) {
0420                 struct iphdr *iph = ip_hdr(skb);
0421
0422                 iph->check = 0;
0423                 ip_send_check(iph);
0424             }
0425         }
0426         if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
0427             skb->ip_summed == CHECKSUM_NONE) {
0428             if (skb->protocol == htons(ETH_P_IP)) {
0429                 struct iphdr *iph = ip_hdr(skb);
0430                 int ihl = iph->ihl * 4;
0431
0432                 skb_set_transport_header(skb, ihl);
0433                 vnet_fullcsum_ipv4(skb);
0434 #if IS_ENABLED(CONFIG_IPV6)
0435             } else if (skb->protocol == htons(ETH_P_IPV6)) {
0436                 skb_set_transport_header(skb,
0437                              sizeof(struct ipv6hdr));
0438                 vnet_fullcsum_ipv6(skb);
0439 #endif
0440             }
0441         }
0442         if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
0443             skb->ip_summed = CHECKSUM_PARTIAL;
0444             skb->csum_level = 0;
0445             if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK)
0446                 skb->csum_level = 1;
0447         }
0448     }
0449
0450     skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
0451
0452     if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
0453         dev->stats.multicast++;
0454     dev->stats.rx_packets++;
0455     dev->stats.rx_bytes += len;
0456     port->stats.rx_packets++;
0457     port->stats.rx_bytes += len;
0458     napi_gro_receive(&port->napi, skb);
0459     return 0;
0460
0461 out_free_skb:
0462     kfree_skb(skb);
0463
0464 out_dropped:
0465     dev->stats.rx_dropped++;
0466     return err;
0467 }
0468
0469 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
0470              u32 start, u32 end, u8 vio_dring_state)
0471 {
0472     struct vio_dring_data hdr = {
0473         .tag = {
0474             .type       = VIO_TYPE_DATA,
0475             .stype      = VIO_SUBTYPE_ACK,
0476             .stype_env  = VIO_DRING_DATA,
0477             .sid        = vio_send_sid(&port->vio),
0478         },
0479         .dring_ident        = dr->ident,
0480         .start_idx      = start,
0481         .end_idx        = end,
0482         .state          = vio_dring_state,
0483     };
0484     int err, delay;
0485     int retries = 0;
0486
0487     hdr.seq = dr->snd_nxt;
0488     delay = 1;
0489     do {
0490         err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
0491         if (err > 0) {
0492             dr->snd_nxt++;
0493             break;
0494         }
0495         udelay(delay);
0496         if ((delay <<= 1) > 128)
0497             delay = 128;
0498         if (retries++ > VNET_MAX_RETRIES) {
0499             pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
0500                 port->raddr[0], port->raddr[1],
0501                 port->raddr[2], port->raddr[3],
0502                 port->raddr[4], port->raddr[5]);
0503             break;
0504         }
0505     } while (err == -EAGAIN);
0506
0507     if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
0508         port->stop_rx_idx = end;
0509         port->stop_rx = true;
0510     } else {
0511         port->stop_rx_idx = 0;
0512         port->stop_rx = false;
0513     }
0514
0515     return err;
0516 }
0517
0518 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
0519                     struct vio_dring_state *dr,
0520                     u32 index)
0521 {
0522     struct vio_net_desc *desc = port->vio.desc_buf;
0523     int err;
0524
0525     err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
0526                   (index * dr->entry_size),
0527                   dr->cookies, dr->ncookies);
0528     if (err < 0)
0529         return ERR_PTR(err);
0530
0531     return desc;
0532 }
0533
0534 static int put_rx_desc(struct vnet_port *port,
0535                struct vio_dring_state *dr,
0536                struct vio_net_desc *desc,
0537                u32 index)
0538 {
0539     int err;
0540
0541     err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
0542                   (index * dr->entry_size),
0543                   dr->cookies, dr->ncookies);
0544     if (err < 0)
0545         return err;
0546
0547     return 0;
0548 }
0549
0550 static int vnet_walk_rx_one(struct vnet_port *port,
0551                 struct vio_dring_state *dr,
0552                 u32 index, int *needs_ack)
0553 {
0554     struct vio_net_desc *desc = get_rx_desc(port, dr, index);
0555     struct vio_driver_state *vio = &port->vio;
0556     int err;
0557
0558     BUG_ON(!desc);
0559     if (IS_ERR(desc))
0560         return PTR_ERR(desc);
0561
0562     if (desc->hdr.state != VIO_DESC_READY)
0563         return 1;
0564
0565     dma_rmb();
0566
0567     viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
0568            desc->hdr.state, desc->hdr.ack,
0569            desc->size, desc->ncookies,
0570            desc->cookies[0].cookie_addr,
0571            desc->cookies[0].cookie_size);
0572
0573     err = vnet_rx_one(port, desc);
0574     if (err == -ECONNRESET)
0575         return err;
0576     trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid,
0577               index, desc->hdr.ack);
0578     desc->hdr.state = VIO_DESC_DONE;
0579     err = put_rx_desc(port, dr, desc, index);
0580     if (err < 0)
0581         return err;
0582     *needs_ack = desc->hdr.ack;
0583     return 0;
0584 }
0585
0586 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
0587             u32 start, u32 end, int *npkts, int budget)
0588 {
0589     struct vio_driver_state *vio = &port->vio;
0590     int ack_start = -1, ack_end = -1;
0591     bool send_ack = true;
0592
0593     end = (end == (u32)-1) ? vio_dring_prev(dr, start)
0594                    : vio_dring_next(dr, end);
0595
0596     viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
0597
0598     while (start != end) {
0599         int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
0600
0601         if (err == -ECONNRESET)
0602             return err;
0603         if (err != 0)
0604             break;
0605         (*npkts)++;
0606         if (ack_start == -1)
0607             ack_start = start;
0608         ack_end = start;
0609         start = vio_dring_next(dr, start);
0610         if (ack && start != end) {
0611             err = vnet_send_ack(port, dr, ack_start, ack_end,
0612                         VIO_DRING_ACTIVE);
0613             if (err == -ECONNRESET)
0614                 return err;
0615             ack_start = -1;
0616         }
0617         if ((*npkts) >= budget) {
0618             send_ack = false;
0619             break;
0620         }
0621     }
0622     if (unlikely(ack_start == -1)) {
0623         ack_end = vio_dring_prev(dr, start);
0624         ack_start = ack_end;
0625     }
0626     if (send_ack) {
0627         port->napi_resume = false;
0628         trace_vnet_tx_send_stopped_ack(port->vio._local_sid,
0629                            port->vio._peer_sid,
0630                            ack_end, *npkts);
0631         return vnet_send_ack(port, dr, ack_start, ack_end,
0632                      VIO_DRING_STOPPED);
0633     } else  {
0634         trace_vnet_tx_defer_stopped_ack(port->vio._local_sid,
0635                         port->vio._peer_sid,
0636                         ack_end, *npkts);
0637         port->napi_resume = true;
0638         port->napi_stop_idx = ack_end;
0639         return 1;
0640     }
0641 }
0642
0643 static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
0644            int budget)
0645 {
0646     struct vio_dring_data *pkt = msgbuf;
0647     struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
0648     struct vio_driver_state *vio = &port->vio;
0649
0650     viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
0651            pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
0652
0653     if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
0654         return 0;
0655     if (unlikely(pkt->seq != dr->rcv_nxt)) {
0656         pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
0657                pkt->seq, dr->rcv_nxt);
0658         return 0;
0659     }
0660
0661     if (!port->napi_resume)
0662         dr->rcv_nxt++;
0663
0664     /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
0665
0666     return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
0667                 npkts, budget);
0668 }
0669
0670 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
0671 {
0672     u32 idx = dr->cons;
0673     int found = 0;
0674
0675     while (idx != dr->prod) {
0676         if (idx == end) {
0677             found = 1;
0678             break;
0679         }
0680         idx = vio_dring_next(dr, idx);
0681     }
0682     return found;
0683 }
0684
0685 static int vnet_ack(struct vnet_port *port, void *msgbuf)
0686 {
0687     struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
0688     struct vio_dring_data *pkt = msgbuf;
0689     struct net_device *dev;
0690     u32 end;
0691     struct vio_net_desc *desc;
0692     struct netdev_queue *txq;
0693
0694     if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
0695         return 0;
0696
0697     end = pkt->end_idx;
0698     dev = VNET_PORT_TO_NET_DEVICE(port);
0699     netif_tx_lock(dev);
0700     if (unlikely(!idx_is_pending(dr, end))) {
0701         netif_tx_unlock(dev);
0702         return 0;
0703     }
0704
0705     /* sync for race conditions with vnet_start_xmit() and tell xmit it
0706      * is time to send a trigger.
0707      */
0708     trace_vnet_rx_stopped_ack(port->vio._local_sid,
0709                   port->vio._peer_sid, end);
0710     dr->cons = vio_dring_next(dr, end);
0711     desc = vio_dring_entry(dr, dr->cons);
0712     if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
0713         /* vnet_start_xmit() just populated this dring but missed
0714          * sending the "start" LDC message to the consumer.
0715          * Send a "start" trigger on its behalf.
0716          */
0717         if (__vnet_tx_trigger(port, dr->cons) > 0)
0718             port->start_cons = false;
0719         else
0720             port->start_cons = true;
0721     } else {
0722         port->start_cons = true;
0723     }
0724     netif_tx_unlock(dev);
0725
0726     txq = netdev_get_tx_queue(dev, port->q_index);
0727     if (unlikely(netif_tx_queue_stopped(txq) &&
0728              vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
0729         return 1;
0730
0731     return 0;
0732 }
0733
0734 static int vnet_nack(struct vnet_port *port, void *msgbuf)
0735 {
0736     /* XXX just reset or similar XXX */
0737     return 0;
0738 }
0739
0740 static int handle_mcast(struct vnet_port *port, void *msgbuf)
0741 {
0742     struct vio_net_mcast_info *pkt = msgbuf;
0743     struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
0744
0745     if (pkt->tag.stype != VIO_SUBTYPE_ACK)
0746         pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
0747                dev->name,
0748                pkt->tag.type,
0749                pkt->tag.stype,
0750                pkt->tag.stype_env,
0751                pkt->tag.sid);
0752
0753     return 0;
0754 }
0755
0756 /* If the queue is stopped, wake it up so that we'll
0757  * send out another START message at the next TX.
0758  */
0759 static void maybe_tx_wakeup(struct vnet_port *port)
0760 {
0761     struct netdev_queue *txq;
0762
0763     txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
0764                   port->q_index);
0765     __netif_tx_lock(txq, smp_processor_id());
0766     if (likely(netif_tx_queue_stopped(txq)))
0767         netif_tx_wake_queue(txq);
0768     __netif_tx_unlock(txq);
0769 }
0770
0771 bool sunvnet_port_is_up_common(struct vnet_port *vnet)
0772 {
0773     struct vio_driver_state *vio = &vnet->vio;
0774
0775     return !!(vio->hs_state & VIO_HS_COMPLETE);
0776 }
0777 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
0778
0779 static int vnet_event_napi(struct vnet_port *port, int budget)
0780 {
0781     struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
0782     struct vio_driver_state *vio = &port->vio;
0783     int tx_wakeup, err;
0784     int npkts = 0;
0785
0786     /* we don't expect any other bits */
0787     BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY |
0788                   LDC_EVENT_RESET |
0789                   LDC_EVENT_UP));
0790
0791     /* RESET takes precedent over any other event */
0792     if (port->rx_event & LDC_EVENT_RESET) {
0793         /* a link went down */
0794
0795         if (port->vsw == 1) {
0796             netif_tx_stop_all_queues(dev);
0797             netif_carrier_off(dev);
0798         }
0799
0800         vio_link_state_change(vio, LDC_EVENT_RESET);
0801         vnet_port_reset(port);
0802         vio_port_up(vio);
0803
0804         /* If the device is running but its tx queue was
0805          * stopped (due to flow control), restart it.
0806          * This is necessary since vnet_port_reset()
0807          * clears the tx drings and thus we may never get
0808          * back a VIO_TYPE_DATA ACK packet - which is
0809          * the normal mechanism to restart the tx queue.
0810          */
0811         if (netif_running(dev))
0812             maybe_tx_wakeup(port);
0813
0814         port->rx_event = 0;
0815         port->stats.event_reset++;
0816         return 0;
0817     }
0818
0819     if (port->rx_event & LDC_EVENT_UP) {
0820         /* a link came up */
0821
0822         if (port->vsw == 1) {
0823             netif_carrier_on(port->dev);
0824             netif_tx_start_all_queues(port->dev);
0825         }
0826
0827         vio_link_state_change(vio, LDC_EVENT_UP);
0828         port->rx_event = 0;
0829         port->stats.event_up++;
0830         return 0;
0831     }
0832
0833     err = 0;
0834     tx_wakeup = 0;
0835     while (1) {
0836         union {
0837             struct vio_msg_tag tag;
0838             u64 raw[8];
0839         } msgbuf;
0840
0841         if (port->napi_resume) {
0842             struct vio_dring_data *pkt =
0843                 (struct vio_dring_data *)&msgbuf;
0844             struct vio_dring_state *dr =
0845                 &port->vio.drings[VIO_DRIVER_RX_RING];
0846
0847             pkt->tag.type = VIO_TYPE_DATA;
0848             pkt->tag.stype = VIO_SUBTYPE_INFO;
0849             pkt->tag.stype_env = VIO_DRING_DATA;
0850             pkt->seq = dr->rcv_nxt;
0851             pkt->start_idx = vio_dring_next(dr,
0852                             port->napi_stop_idx);
0853             pkt->end_idx = -1;
0854         } else {
0855             err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
0856             if (unlikely(err < 0)) {
0857                 if (err == -ECONNRESET)
0858                     vio_conn_reset(vio);
0859                 break;
0860             }
0861             if (err == 0)
0862                 break;
0863             viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
0864                    msgbuf.tag.type,
0865                    msgbuf.tag.stype,
0866                    msgbuf.tag.stype_env,
0867                    msgbuf.tag.sid);
0868             err = vio_validate_sid(vio, &msgbuf.tag);
0869             if (err < 0)
0870                 break;
0871         }
0872
0873         if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
0874             if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
0875                 if (!sunvnet_port_is_up_common(port)) {
0876                     /* failures like handshake_failure()
0877                      * may have cleaned up dring, but
0878                      * NAPI polling may bring us here.
0879                      */
0880                     err = -ECONNRESET;
0881                     break;
0882                 }
0883                 err = vnet_rx(port, &msgbuf, &npkts, budget);
0884                 if (npkts >= budget)
0885                     break;
0886                 if (npkts == 0)
0887                     break;
0888             } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
0889                 err = vnet_ack(port, &msgbuf);
0890                 if (err > 0)
0891                     tx_wakeup |= err;
0892             } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
0893                 err = vnet_nack(port, &msgbuf);
0894             }
0895         } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
0896             if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
0897                 err = handle_mcast(port, &msgbuf);
0898             else
0899                 err = vio_control_pkt_engine(vio, &msgbuf);
0900             if (err)
0901                 break;
0902         } else {
0903             err = vnet_handle_unknown(port, &msgbuf);
0904         }
0905         if (err == -ECONNRESET)
0906             break;
0907     }
0908     if (unlikely(tx_wakeup && err != -ECONNRESET))
0909         maybe_tx_wakeup(port);
0910     return npkts;
0911 }
0912
0913 int sunvnet_poll_common(struct napi_struct *napi, int budget)
0914 {
0915     struct vnet_port *port = container_of(napi, struct vnet_port, napi);
0916     struct vio_driver_state *vio = &port->vio;
0917     int processed = vnet_event_napi(port, budget);
0918
0919     if (processed < budget) {
0920         napi_complete_done(napi, processed);
0921         port->rx_event &= ~LDC_EVENT_DATA_READY;
0922         vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
0923     }
0924     return processed;
0925 }
0926 EXPORT_SYMBOL_GPL(sunvnet_poll_common);
0927
0928 void sunvnet_event_common(void *arg, int event)
0929 {
0930     struct vnet_port *port = arg;
0931     struct vio_driver_state *vio = &port->vio;
0932
0933     port->rx_event |= event;
0934     vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
0935     napi_schedule(&port->napi);
0936 }
0937 EXPORT_SYMBOL_GPL(sunvnet_event_common);
0938
0939 static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
0940 {
0941     struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
0942     struct vio_dring_data hdr = {
0943         .tag = {
0944             .type       = VIO_TYPE_DATA,
0945             .stype      = VIO_SUBTYPE_INFO,
0946             .stype_env  = VIO_DRING_DATA,
0947             .sid        = vio_send_sid(&port->vio),
0948         },
0949         .dring_ident        = dr->ident,
0950         .start_idx      = start,
0951         .end_idx        = (u32)-1,
0952     };
0953     int err, delay;
0954     int retries = 0;
0955
0956     if (port->stop_rx) {
0957         trace_vnet_tx_pending_stopped_ack(port->vio._local_sid,
0958                           port->vio._peer_sid,
0959                           port->stop_rx_idx, -1);
0960         err = vnet_send_ack(port,
0961                     &port->vio.drings[VIO_DRIVER_RX_RING],
0962                     port->stop_rx_idx, -1,
0963                     VIO_DRING_STOPPED);
0964         if (err <= 0)
0965             return err;
0966     }
0967
0968     hdr.seq = dr->snd_nxt;
0969     delay = 1;
0970     do {
0971         err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
0972         if (err > 0) {
0973             dr->snd_nxt++;
0974             break;
0975         }
0976         udelay(delay);
0977         if ((delay <<= 1) > 128)
0978             delay = 128;
0979         if (retries++ > VNET_MAX_RETRIES)
0980             break;
0981     } while (err == -EAGAIN);
0982     trace_vnet_tx_trigger(port->vio._local_sid,
0983                   port->vio._peer_sid, start, err);
0984
0985     return err;
0986 }
0987
0988 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
0989                       unsigned *pending)
0990 {
0991     struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
0992     struct sk_buff *skb = NULL;
0993     int i, txi;
0994
0995     *pending = 0;
0996
0997     txi = dr->prod;
0998     for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
0999         struct vio_net_desc *d;
1000
1001         --txi;
1002         if (txi < 0)
1003             txi = VNET_TX_RING_SIZE - 1;
1004
1005         d = vio_dring_entry(dr, txi);
1006
1007         if (d->hdr.state == VIO_DESC_READY) {
1008             (*pending)++;
1009             continue;
1010         }
1011         if (port->tx_bufs[txi].skb) {
1012             if (d->hdr.state != VIO_DESC_DONE)
1013                 pr_notice("invalid ring buffer state %d\n",
1014                       d->hdr.state);
1015             BUG_ON(port->tx_bufs[txi].skb->next);
1016
1017             port->tx_bufs[txi].skb->next = skb;
1018             skb = port->tx_bufs[txi].skb;
1019             port->tx_bufs[txi].skb = NULL;
1020
1021             ldc_unmap(port->vio.lp,
1022                   port->tx_bufs[txi].cookies,
1023                   port->tx_bufs[txi].ncookies);
1024         } else if (d->hdr.state == VIO_DESC_FREE) {
1025             break;
1026         }
1027         d->hdr.state = VIO_DESC_FREE;
1028     }
1029     return skb;
1030 }
1031
1032 static inline void vnet_free_skbs(struct sk_buff *skb)
1033 {
1034     struct sk_buff *next;
1035
1036     while (skb) {
1037         next = skb->next;
1038         skb->next = NULL;
1039         dev_kfree_skb(skb);
1040         skb = next;
1041     }
1042 }
1043
1044 void sunvnet_clean_timer_expire_common(struct timer_list *t)
1045 {
1046     struct vnet_port *port = from_timer(port, t, clean_timer);
1047     struct sk_buff *freeskbs;
1048     unsigned pending;
1049
1050     netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port));
1051     freeskbs = vnet_clean_tx_ring(port, &pending);
1052     netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port));
1053
1054     vnet_free_skbs(freeskbs);
1055
1056     if (pending)
1057         (void)mod_timer(&port->clean_timer,
1058                 jiffies + VNET_CLEAN_TIMEOUT);
1059      else
1060         del_timer(&port->clean_timer);
1061 }
1062 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common);
1063
1064 static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
1065                    struct ldc_trans_cookie *cookies, int ncookies,
1066                    unsigned int map_perm)
1067 {
1068     int i, nc, err, blen;
1069
1070     /* header */
1071     blen = skb_headlen(skb);
1072     if (blen < ETH_ZLEN)
1073         blen = ETH_ZLEN;
1074     blen += VNET_PACKET_SKIP;
1075     blen += 8 - (blen & 7);
1076
1077     err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies,
1078                  ncookies, map_perm);
1079     if (err < 0)
1080         return err;
1081     nc = err;
1082
1083     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1084         skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1085         u8 *vaddr;
1086
1087         if (nc < ncookies) {
1088             vaddr = kmap_atomic(skb_frag_page(f));
1089             blen = skb_frag_size(f);
1090             blen += 8 - (blen & 7);
1091             err = ldc_map_single(lp, vaddr + skb_frag_off(f),
1092                          blen, cookies + nc, ncookies - nc,
1093                          map_perm);
1094             kunmap_atomic(vaddr);
1095         } else {
1096             err = -EMSGSIZE;
1097         }
1098
1099         if (err < 0) {
1100             ldc_unmap(lp, cookies, nc);
1101             return err;
1102         }
1103         nc += err;
1104     }
1105     return nc;
1106 }
1107
1108 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
1109 {
1110     struct sk_buff *nskb;
1111     int i, len, pad, docopy;
1112
1113     len = skb->len;
1114     pad = 0;
1115     if (len < ETH_ZLEN) {
1116         pad += ETH_ZLEN - skb->len;
1117         len += pad;
1118     }
1119     len += VNET_PACKET_SKIP;
1120     pad += 8 - (len & 7);
1121
1122     /* make sure we have enough cookies and alignment in every frag */
1123     docopy = skb_shinfo(skb)->nr_frags >= ncookies;
1124     for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1125         skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1126
1127         docopy |= skb_frag_off(f) & 7;
1128     }
1129     if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
1130         skb_tailroom(skb) < pad ||
1131         skb_headroom(skb) < VNET_PACKET_SKIP || docopy) {
1132         int start = 0, offset;
1133         __wsum csum;
1134
1135         len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN;
1136         nskb = alloc_and_align_skb(skb->dev, len);
1137         if (!nskb) {
1138             dev_kfree_skb(skb);
1139             return NULL;
1140         }
1141         skb_reserve(nskb, VNET_PACKET_SKIP);
1142
1143         nskb->protocol = skb->protocol;
1144         offset = skb_mac_header(skb) - skb->data;
1145         skb_set_mac_header(nskb, offset);
1146         offset = skb_network_header(skb) - skb->data;
1147         skb_set_network_header(nskb, offset);
1148         offset = skb_transport_header(skb) - skb->data;
1149         skb_set_transport_header(nskb, offset);
1150
1151         offset = 0;
1152         nskb->csum_offset = skb->csum_offset;
1153         nskb->ip_summed = skb->ip_summed;
1154
1155         if (skb->ip_summed == CHECKSUM_PARTIAL)
1156             start = skb_checksum_start_offset(skb);
1157         if (start) {
1158             int offset = start + nskb->csum_offset;
1159
1160             /* copy the headers, no csum here */
1161             if (skb_copy_bits(skb, 0, nskb->data, start)) {
1162                 dev_kfree_skb(nskb);
1163                 dev_kfree_skb(skb);
1164                 return NULL;
1165             }
1166
1167             /* copy the rest, with csum calculation */
1168             *(__sum16 *)(skb->data + offset) = 0;
1169             csum = skb_copy_and_csum_bits(skb, start,
1170                               nskb->data + start,
1171                               skb->len - start);
1172
1173             /* add in the header checksums */
1174             if (skb->protocol == htons(ETH_P_IP)) {
1175                 struct iphdr *iph = ip_hdr(nskb);
1176
1177                 if (iph->protocol == IPPROTO_TCP ||
1178                     iph->protocol == IPPROTO_UDP) {
1179                     csum = csum_tcpudp_magic(iph->saddr,
1180                                  iph->daddr,
1181                                  skb->len - start,
1182                                  iph->protocol,
1183                                  csum);
1184                 }
1185             } else if (skb->protocol == htons(ETH_P_IPV6)) {
1186                 struct ipv6hdr *ip6h = ipv6_hdr(nskb);
1187
1188                 if (ip6h->nexthdr == IPPROTO_TCP ||
1189                     ip6h->nexthdr == IPPROTO_UDP) {
1190                     csum = csum_ipv6_magic(&ip6h->saddr,
1191                                    &ip6h->daddr,
1192                                    skb->len - start,
1193                                    ip6h->nexthdr,
1194                                    csum);
1195                 }
1196             }
1197
1198             /* save the final result */
1199             *(__sum16 *)(nskb->data + offset) = csum;
1200
1201             nskb->ip_summed = CHECKSUM_NONE;
1202         } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
1203             dev_kfree_skb(nskb);
1204             dev_kfree_skb(skb);
1205             return NULL;
1206         }
1207         (void)skb_put(nskb, skb->len);
1208         if (skb_is_gso(skb)) {
1209             skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size;
1210             skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
1211         }
1212         nskb->queue_mapping = skb->queue_mapping;
1213         dev_kfree_skb(skb);
1214         skb = nskb;
1215     }
1216     return skb;
1217 }
1218
1219 static netdev_tx_t
1220 vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
1221              struct vnet_port *(*vnet_tx_port)
1222              (struct sk_buff *, struct net_device *))
1223 {
1224     struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
1225     struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1226     struct sk_buff *segs, *curr, *next;
1227     int maclen, datalen;
1228     int status;
1229     int gso_size, gso_type, gso_segs;
1230     int hlen = skb_transport_header(skb) - skb_mac_header(skb);
1231     int proto = IPPROTO_IP;
1232
1233     if (skb->protocol == htons(ETH_P_IP))
1234         proto = ip_hdr(skb)->protocol;
1235     else if (skb->protocol == htons(ETH_P_IPV6))
1236         proto = ipv6_hdr(skb)->nexthdr;
1237
1238     if (proto == IPPROTO_TCP) {
1239         hlen += tcp_hdr(skb)->doff * 4;
1240     } else if (proto == IPPROTO_UDP) {
1241         hlen += sizeof(struct udphdr);
1242     } else {
1243         pr_err("vnet_handle_offloads GSO with unknown transport "
1244                "protocol %d tproto %d\n", skb->protocol, proto);
1245         hlen = 128; /* XXX */
1246     }
1247     datalen = port->tsolen - hlen;
1248
1249     gso_size = skb_shinfo(skb)->gso_size;
1250     gso_type = skb_shinfo(skb)->gso_type;
1251     gso_segs = skb_shinfo(skb)->gso_segs;
1252
1253     if (port->tso && gso_size < datalen)
1254         gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen);
1255
1256     if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) {
1257         struct netdev_queue *txq;
1258
1259         txq  = netdev_get_tx_queue(dev, port->q_index);
1260         netif_tx_stop_queue(txq);
1261         if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs)
1262             return NETDEV_TX_BUSY;
1263         netif_tx_wake_queue(txq);
1264     }
1265
1266     maclen = skb_network_header(skb) - skb_mac_header(skb);
1267     skb_pull(skb, maclen);
1268
1269     if (port->tso && gso_size < datalen) {
1270         if (skb_unclone(skb, GFP_ATOMIC))
1271             goto out_dropped;
1272
1273         /* segment to TSO size */
1274         skb_shinfo(skb)->gso_size = datalen;
1275         skb_shinfo(skb)->gso_segs = gso_segs;
1276     }
1277     segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO);
1278     if (IS_ERR(segs))
1279         goto out_dropped;
1280
1281     skb_push(skb, maclen);
1282     skb_reset_mac_header(skb);
1283
1284     status = 0;
1285     skb_list_walk_safe(segs, curr, next) {
1286         skb_mark_not_on_list(curr);
1287         if (port->tso && curr->len > dev->mtu) {
1288             skb_shinfo(curr)->gso_size = gso_size;
1289             skb_shinfo(curr)->gso_type = gso_type;
1290             skb_shinfo(curr)->gso_segs =
1291                 DIV_ROUND_UP(curr->len - hlen, gso_size);
1292         } else {
1293             skb_shinfo(curr)->gso_size = 0;
1294         }
1295
1296         skb_push(curr, maclen);
1297         skb_reset_mac_header(curr);
1298         memcpy(skb_mac_header(curr), skb_mac_header(skb),
1299                maclen);
1300         curr->csum_start = skb_transport_header(curr) - curr->head;
1301         if (ip_hdr(curr)->protocol == IPPROTO_TCP)
1302             curr->csum_offset = offsetof(struct tcphdr, check);
1303         else if (ip_hdr(curr)->protocol == IPPROTO_UDP)
1304             curr->csum_offset = offsetof(struct udphdr, check);
1305
1306         if (!(status & NETDEV_TX_MASK))
1307             status = sunvnet_start_xmit_common(curr, dev,
1308                                vnet_tx_port);
1309         if (status & NETDEV_TX_MASK)
1310             dev_kfree_skb_any(curr);
1311     }
1312
1313     if (!(status & NETDEV_TX_MASK))
1314         dev_kfree_skb_any(skb);
1315     return status;
1316 out_dropped:
1317     dev->stats.tx_dropped++;
1318     dev_kfree_skb_any(skb);
1319     return NETDEV_TX_OK;
1320 }
1321
1322 netdev_tx_t
1323 sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
1324               struct vnet_port *(*vnet_tx_port)
1325               (struct sk_buff *, struct net_device *))
1326 {
1327     struct vnet_port *port = NULL;
1328     struct vio_dring_state *dr;
1329     struct vio_net_desc *d;
1330     unsigned int len;
1331     struct sk_buff *freeskbs = NULL;
1332     int i, err, txi;
1333     unsigned pending = 0;
1334     struct netdev_queue *txq;
1335
1336     rcu_read_lock();
1337     port = vnet_tx_port(skb, dev);
1338     if (unlikely(!port))
1339         goto out_dropped;
1340
1341     if (skb_is_gso(skb) && skb->len > port->tsolen) {
1342         err = vnet_handle_offloads(port, skb, vnet_tx_port);
1343         rcu_read_unlock();
1344         return err;
1345     }
1346
1347     if (!skb_is_gso(skb) && skb->len > port->rmtu) {
1348         unsigned long localmtu = port->rmtu - ETH_HLEN;
1349
1350         if (vio_version_after_eq(&port->vio, 1, 3))
1351             localmtu -= VLAN_HLEN;
1352
1353         if (skb->protocol == htons(ETH_P_IP))
1354             icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1355                       htonl(localmtu));
1356 #if IS_ENABLED(CONFIG_IPV6)
1357         else if (skb->protocol == htons(ETH_P_IPV6))
1358             icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
1359 #endif
1360         goto out_dropped;
1361     }
1362
1363     skb = vnet_skb_shape(skb, 2);
1364
1365     if (unlikely(!skb))
1366         goto out_dropped;
1367
1368     if (skb->ip_summed == CHECKSUM_PARTIAL) {
1369         if (skb->protocol == htons(ETH_P_IP))
1370             vnet_fullcsum_ipv4(skb);
1371 #if IS_ENABLED(CONFIG_IPV6)
1372         else if (skb->protocol == htons(ETH_P_IPV6))
1373             vnet_fullcsum_ipv6(skb);
1374 #endif
1375     }
1376
1377     dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1378     i = skb_get_queue_mapping(skb);
1379     txq = netdev_get_tx_queue(dev, i);
1380     if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1381         if (!netif_tx_queue_stopped(txq)) {
1382             netif_tx_stop_queue(txq);
1383
1384             /* This is a hard error, log it. */
1385             netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
1386             dev->stats.tx_errors++;
1387         }
1388         rcu_read_unlock();
1389         return NETDEV_TX_BUSY;
1390     }
1391
1392     d = vio_dring_cur(dr);
1393
1394     txi = dr->prod;
1395
1396     freeskbs = vnet_clean_tx_ring(port, &pending);
1397
1398     BUG_ON(port->tx_bufs[txi].skb);
1399
1400     len = skb->len;
1401     if (len < ETH_ZLEN)
1402         len = ETH_ZLEN;
1403
1404     err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2,
1405                (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
1406     if (err < 0) {
1407         netdev_info(dev, "tx buffer map error %d\n", err);
1408         goto out_dropped;
1409     }
1410
1411     port->tx_bufs[txi].skb = skb;
1412     skb = NULL;
1413     port->tx_bufs[txi].ncookies = err;
1414
1415     /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1416      * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1417      * the protocol itself does not require it as long as the peer
1418      * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1419      *
1420      * An ACK for every packet in the ring is expensive as the
1421      * sending of LDC messages is slow and affects performance.
1422      */
1423     d->hdr.ack = VIO_ACK_DISABLE;
1424     d->size = len;
1425     d->ncookies = port->tx_bufs[txi].ncookies;
1426     for (i = 0; i < d->ncookies; i++)
1427         d->cookies[i] = port->tx_bufs[txi].cookies[i];
1428     if (vio_version_after_eq(&port->vio, 1, 7)) {
1429         struct vio_net_dext *dext = vio_net_ext(d);
1430
1431         memset(dext, 0, sizeof(*dext));
1432         if (skb_is_gso(port->tx_bufs[txi].skb)) {
1433             dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb)
1434                          ->gso_size;
1435             dext->flags |= VNET_PKT_IPV4_LSO;
1436         }
1437         if (vio_version_after_eq(&port->vio, 1, 8) &&
1438             !port->switch_port) {
1439             dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK;
1440             dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK;
1441         }
1442     }
1443
1444     /* This has to be a non-SMP write barrier because we are writing
1445      * to memory which is shared with the peer LDOM.
1446      */
1447     dma_wmb();
1448
1449     d->hdr.state = VIO_DESC_READY;
1450
1451     /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1452      * to notify the consumer that some descriptors are READY.
1453      * After that "start" trigger, no additional triggers are needed until
1454      * a DRING_STOPPED is received from the consumer. The dr->cons field
1455      * (set up by vnet_ack()) has the value of the next dring index
1456      * that has not yet been ack-ed. We send a "start" trigger here
1457      * if, and only if, start_cons is true (reset it afterward). Conversely,
1458      * vnet_ack() should check if the dring corresponding to cons
1459      * is marked READY, but start_cons was false.
1460      * If so, vnet_ack() should send out the missed "start" trigger.
1461      *
1462      * Note that the dma_wmb() above makes sure the cookies et al. are
1463      * not globally visible before the VIO_DESC_READY, and that the
1464      * stores are ordered correctly by the compiler. The consumer will
1465      * not proceed until the VIO_DESC_READY is visible assuring that
1466      * the consumer does not observe anything related to descriptors
1467      * out of order. The HV trap from the LDC start trigger is the
1468      * producer to consumer announcement that work is available to the
1469      * consumer
1470      */
1471     if (!port->start_cons) { /* previous trigger suffices */
1472         trace_vnet_skip_tx_trigger(port->vio._local_sid,
1473                        port->vio._peer_sid, dr->cons);
1474         goto ldc_start_done;
1475     }
1476
1477     err = __vnet_tx_trigger(port, dr->cons);
1478     if (unlikely(err < 0)) {
1479         netdev_info(dev, "TX trigger error %d\n", err);
1480         d->hdr.state = VIO_DESC_FREE;
1481         skb = port->tx_bufs[txi].skb;
1482         port->tx_bufs[txi].skb = NULL;
1483         dev->stats.tx_carrier_errors++;
1484         goto out_dropped;
1485     }
1486
1487 ldc_start_done:
1488     port->start_cons = false;
1489
1490     dev->stats.tx_packets++;
1491     dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1492     port->stats.tx_packets++;
1493     port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1494
1495     dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
1496     if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
1497         netif_tx_stop_queue(txq);
1498         smp_rmb();
1499         if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
1500             netif_tx_wake_queue(txq);
1501     }
1502
1503     (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
1504     rcu_read_unlock();
1505
1506     vnet_free_skbs(freeskbs);
1507
1508     return NETDEV_TX_OK;
1509
1510 out_dropped:
1511     if (pending)
1512         (void)mod_timer(&port->clean_timer,
1513                 jiffies + VNET_CLEAN_TIMEOUT);
1514     else if (port)
1515         del_timer(&port->clean_timer);
1516     rcu_read_unlock();
1517     dev_kfree_skb(skb);
1518     vnet_free_skbs(freeskbs);
1519     dev->stats.tx_dropped++;
1520     return NETDEV_TX_OK;
1521 }
1522 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
1523
1524 void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue)
1525 {
1526     /* XXX Implement me XXX */
1527 }
1528 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common);
1529
1530 int sunvnet_open_common(struct net_device *dev)
1531 {
1532     netif_carrier_on(dev);
1533     netif_tx_start_all_queues(dev);
1534
1535     return 0;
1536 }
1537 EXPORT_SYMBOL_GPL(sunvnet_open_common);
1538
1539 int sunvnet_close_common(struct net_device *dev)
1540 {
1541     netif_tx_stop_all_queues(dev);
1542     netif_carrier_off(dev);
1543
1544     return 0;
1545 }
1546 EXPORT_SYMBOL_GPL(sunvnet_close_common);
1547
1548 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
1549 {
1550     struct vnet_mcast_entry *m;
1551
1552     for (m = vp->mcast_list; m; m = m->next) {
1553         if (ether_addr_equal(m->addr, addr))
1554             return m;
1555     }
1556     return NULL;
1557 }
1558
1559 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
1560 {
1561     struct netdev_hw_addr *ha;
1562
1563     netdev_for_each_mc_addr(ha, dev) {
1564         struct vnet_mcast_entry *m;
1565
1566         m = __vnet_mc_find(vp, ha->addr);
1567         if (m) {
1568             m->hit = 1;
1569             continue;
1570         }
1571
1572         if (!m) {
1573             m = kzalloc(sizeof(*m), GFP_ATOMIC);
1574             if (!m)
1575                 continue;
1576             memcpy(m->addr, ha->addr, ETH_ALEN);
1577             m->hit = 1;
1578
1579             m->next = vp->mcast_list;
1580             vp->mcast_list = m;
1581         }
1582     }
1583 }
1584
1585 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
1586 {
1587     struct vio_net_mcast_info info;
1588     struct vnet_mcast_entry *m, **pp;
1589     int n_addrs;
1590
1591     memset(&info, 0, sizeof(info));
1592
1593     info.tag.type = VIO_TYPE_CTRL;
1594     info.tag.stype = VIO_SUBTYPE_INFO;
1595     info.tag.stype_env = VNET_MCAST_INFO;
1596     info.tag.sid = vio_send_sid(&port->vio);
1597     info.set = 1;
1598
1599     n_addrs = 0;
1600     for (m = vp->mcast_list; m; m = m->next) {
1601         if (m->sent)
1602             continue;
1603         m->sent = 1;
1604         memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1605                m->addr, ETH_ALEN);
1606         if (++n_addrs == VNET_NUM_MCAST) {
1607             info.count = n_addrs;
1608
1609             (void)vio_ldc_send(&port->vio, &info,
1610                        sizeof(info));
1611             n_addrs = 0;
1612         }
1613     }
1614     if (n_addrs) {
1615         info.count = n_addrs;
1616         (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1617     }
1618
1619     info.set = 0;
1620
1621     n_addrs = 0;
1622     pp = &vp->mcast_list;
1623     while ((m = *pp) != NULL) {
1624         if (m->hit) {
1625             m->hit = 0;
1626             pp = &m->next;
1627             continue;
1628         }
1629
1630         memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1631                m->addr, ETH_ALEN);
1632         if (++n_addrs == VNET_NUM_MCAST) {
1633             info.count = n_addrs;
1634             (void)vio_ldc_send(&port->vio, &info,
1635                        sizeof(info));
1636             n_addrs = 0;
1637         }
1638
1639         *pp = m->next;
1640         kfree(m);
1641     }
1642     if (n_addrs) {
1643         info.count = n_addrs;
1644         (void)vio_ldc_send(&port->vio, &info, sizeof(info));
1645     }
1646 }
1647
1648 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
1649 {
1650     struct vnet_port *port;
1651
1652     rcu_read_lock();
1653     list_for_each_entry_rcu(port, &vp->port_list, list) {
1654         if (port->switch_port) {
1655             __update_mc_list(vp, dev);
1656             __send_mc_list(vp, port);
1657             break;
1658         }
1659     }
1660     rcu_read_unlock();
1661 }
1662 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
1663
1664 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
1665 {
1666     return -EINVAL;
1667 }
1668 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common);
1669
1670 void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
1671 {
1672     struct vio_dring_state *dr;
1673     int i;
1674
1675     dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1676
1677     if (!dr->base)
1678         return;
1679
1680     for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1681         struct vio_net_desc *d;
1682         void *skb = port->tx_bufs[i].skb;
1683
1684         if (!skb)
1685             continue;
1686
1687         d = vio_dring_entry(dr, i);
1688
1689         ldc_unmap(port->vio.lp,
1690               port->tx_bufs[i].cookies,
1691               port->tx_bufs[i].ncookies);
1692         dev_kfree_skb(skb);
1693         port->tx_bufs[i].skb = NULL;
1694         d->hdr.state = VIO_DESC_FREE;
1695     }
1696     ldc_free_exp_dring(port->vio.lp, dr->base,
1697                (dr->entry_size * dr->num_entries),
1698                dr->cookies, dr->ncookies);
1699     dr->base = NULL;
1700     dr->entry_size = 0;
1701     dr->num_entries = 0;
1702     dr->pending = 0;
1703     dr->ncookies = 0;
1704 }
1705 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
1706
1707 void vnet_port_reset(struct vnet_port *port)
1708 {
1709     del_timer(&port->clean_timer);
1710     sunvnet_port_free_tx_bufs_common(port);
1711     port->rmtu = 0;
1712     port->tso = (port->vsw == 0);  /* no tso in vsw, misbehaves in bridge */
1713     port->tsolen = 0;
1714 }
1715 EXPORT_SYMBOL_GPL(vnet_port_reset);
1716
1717 static int vnet_port_alloc_tx_ring(struct vnet_port *port)
1718 {
1719     struct vio_dring_state *dr;
1720     unsigned long len, elen;
1721     int i, err, ncookies;
1722     void *dring;
1723
1724     dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1725
1726     elen = sizeof(struct vio_net_desc) +
1727            sizeof(struct ldc_trans_cookie) * 2;
1728     if (vio_version_after_eq(&port->vio, 1, 7))
1729         elen += sizeof(struct vio_net_dext);
1730     len = VNET_TX_RING_SIZE * elen;
1731
1732     ncookies = VIO_MAX_RING_COOKIES;
1733     dring = ldc_alloc_exp_dring(port->vio.lp, len,
1734                     dr->cookies, &ncookies,
1735                     (LDC_MAP_SHADOW |
1736                      LDC_MAP_DIRECT |
1737                      LDC_MAP_RW));
1738     if (IS_ERR(dring)) {
1739         err = PTR_ERR(dring);
1740         goto err_out;
1741     }
1742
1743     dr->base = dring;
1744     dr->entry_size = elen;
1745     dr->num_entries = VNET_TX_RING_SIZE;
1746     dr->prod = 0;
1747     dr->cons = 0;
1748     port->start_cons  = true; /* need an initial trigger */
1749     dr->pending = VNET_TX_RING_SIZE;
1750     dr->ncookies = ncookies;
1751
1752     for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1753         struct vio_net_desc *d;
1754
1755         d = vio_dring_entry(dr, i);
1756         d->hdr.state = VIO_DESC_FREE;
1757     }
1758     return 0;
1759
1760 err_out:
1761     sunvnet_port_free_tx_bufs_common(port);
1762
1763     return err;
1764 }
1765
1766 #ifdef CONFIG_NET_POLL_CONTROLLER
1767 void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp)
1768 {
1769     struct vnet_port *port;
1770     unsigned long flags;
1771
1772     spin_lock_irqsave(&vp->lock, flags);
1773     if (!list_empty(&vp->port_list)) {
1774         port = list_entry(vp->port_list.next, struct vnet_port, list);
1775         napi_schedule(&port->napi);
1776     }
1777     spin_unlock_irqrestore(&vp->lock, flags);
1778 }
1779 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
1780 #endif
1781
1782 void sunvnet_port_add_txq_common(struct vnet_port *port)
1783 {
1784     struct vnet *vp = port->vp;
1785     int smallest = 0;
1786     int i;
1787
1788     /* find the first least-used q
1789      * When there are more ldoms than q's, we start to
1790      * double up on ports per queue.
1791      */
1792     for (i = 0; i < VNET_MAX_TXQS; i++) {
1793         if (vp->q_used[i] == 0) {
1794             smallest = i;
1795             break;
1796         }
1797         if (vp->q_used[i] < vp->q_used[smallest])
1798             smallest = i;
1799     }
1800
1801     vp->nports++;
1802     vp->q_used[smallest]++;
1803     port->q_index = smallest;
1804 }
1805 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
1806
1807 void sunvnet_port_rm_txq_common(struct vnet_port *port)
1808 {
1809     port->vp->nports--;
1810     port->vp->q_used[port->q_index]--;
1811     port->q_index = 0;
1812 }
1813 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);