0001
0002
0003
0004
0005
0006 #include <linux/uaccess.h>
0007 #include <linux/netdevice.h>
0008 #include <linux/etherdevice.h>
0009 #include <linux/if_ether.h>
0010 #include <linux/if_vlan.h>
0011 #include <net/llc_pdu.h>
0012 #include <linux/kernel.h>
0013 #include <linux/jhash.h>
0014 #include <linux/jiffies.h>
0015 #include <linux/llc.h>
0016 #include <linux/module.h>
0017 #include <linux/in.h>
0018 #include <linux/rcupdate.h>
0019 #include <linux/cpumask.h>
0020 #include <linux/if_arp.h>
0021 #include <linux/ip.h>
0022 #include <linux/ipv6.h>
0023 #include <linux/mpls.h>
0024 #include <linux/sctp.h>
0025 #include <linux/smp.h>
0026 #include <linux/tcp.h>
0027 #include <linux/udp.h>
0028 #include <linux/icmp.h>
0029 #include <linux/icmpv6.h>
0030 #include <linux/rculist.h>
0031 #include <net/ip.h>
0032 #include <net/ip_tunnels.h>
0033 #include <net/ipv6.h>
0034 #include <net/mpls.h>
0035 #include <net/ndisc.h>
0036 #include <net/nsh.h>
0037 #include <net/pkt_cls.h>
0038 #include <net/netfilter/nf_conntrack_zones.h>
0039
0040 #include "conntrack.h"
0041 #include "datapath.h"
0042 #include "flow.h"
0043 #include "flow_netlink.h"
0044 #include "vport.h"
0045
0046 u64 ovs_flow_used_time(unsigned long flow_jiffies)
0047 {
0048 struct timespec64 cur_ts;
0049 u64 cur_ms, idle_ms;
0050
0051 ktime_get_ts64(&cur_ts);
0052 idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
0053 cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
0054 cur_ts.tv_nsec / NSEC_PER_MSEC;
0055
0056 return cur_ms - idle_ms;
0057 }
0058
0059 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
0060
0061 void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
0062 const struct sk_buff *skb)
0063 {
0064 struct sw_flow_stats *stats;
0065 unsigned int cpu = smp_processor_id();
0066 int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
0067
0068 stats = rcu_dereference(flow->stats[cpu]);
0069
0070
0071 if (likely(stats)) {
0072 spin_lock(&stats->lock);
0073
0074 if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
0075 flow->stats_last_writer = cpu;
0076 } else {
0077 stats = rcu_dereference(flow->stats[0]);
0078 spin_lock(&stats->lock);
0079
0080
0081
0082
0083 if (unlikely(flow->stats_last_writer != cpu)) {
0084
0085
0086
0087
0088
0089 if (likely(flow->stats_last_writer != -1) &&
0090 likely(!rcu_access_pointer(flow->stats[cpu]))) {
0091
0092 struct sw_flow_stats *new_stats;
0093
0094 new_stats =
0095 kmem_cache_alloc_node(flow_stats_cache,
0096 GFP_NOWAIT |
0097 __GFP_THISNODE |
0098 __GFP_NOWARN |
0099 __GFP_NOMEMALLOC,
0100 numa_node_id());
0101 if (likely(new_stats)) {
0102 new_stats->used = jiffies;
0103 new_stats->packet_count = 1;
0104 new_stats->byte_count = len;
0105 new_stats->tcp_flags = tcp_flags;
0106 spin_lock_init(&new_stats->lock);
0107
0108 rcu_assign_pointer(flow->stats[cpu],
0109 new_stats);
0110 cpumask_set_cpu(cpu, &flow->cpu_used_mask);
0111 goto unlock;
0112 }
0113 }
0114 flow->stats_last_writer = cpu;
0115 }
0116 }
0117
0118 stats->used = jiffies;
0119 stats->packet_count++;
0120 stats->byte_count += len;
0121 stats->tcp_flags |= tcp_flags;
0122 unlock:
0123 spin_unlock(&stats->lock);
0124 }
0125
0126
0127 void ovs_flow_stats_get(const struct sw_flow *flow,
0128 struct ovs_flow_stats *ovs_stats,
0129 unsigned long *used, __be16 *tcp_flags)
0130 {
0131 int cpu;
0132
0133 *used = 0;
0134 *tcp_flags = 0;
0135 memset(ovs_stats, 0, sizeof(*ovs_stats));
0136
0137
0138 for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
0139 struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
0140
0141 if (stats) {
0142
0143
0144
0145 spin_lock_bh(&stats->lock);
0146 if (!*used || time_after(stats->used, *used))
0147 *used = stats->used;
0148 *tcp_flags |= stats->tcp_flags;
0149 ovs_stats->n_packets += stats->packet_count;
0150 ovs_stats->n_bytes += stats->byte_count;
0151 spin_unlock_bh(&stats->lock);
0152 }
0153 }
0154 }
0155
0156
0157 void ovs_flow_stats_clear(struct sw_flow *flow)
0158 {
0159 int cpu;
0160
0161
0162 for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
0163 struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
0164
0165 if (stats) {
0166 spin_lock_bh(&stats->lock);
0167 stats->used = 0;
0168 stats->packet_count = 0;
0169 stats->byte_count = 0;
0170 stats->tcp_flags = 0;
0171 spin_unlock_bh(&stats->lock);
0172 }
0173 }
0174 }
0175
0176 static int check_header(struct sk_buff *skb, int len)
0177 {
0178 if (unlikely(skb->len < len))
0179 return -EINVAL;
0180 if (unlikely(!pskb_may_pull(skb, len)))
0181 return -ENOMEM;
0182 return 0;
0183 }
0184
0185 static bool arphdr_ok(struct sk_buff *skb)
0186 {
0187 return pskb_may_pull(skb, skb_network_offset(skb) +
0188 sizeof(struct arp_eth_header));
0189 }
0190
0191 static int check_iphdr(struct sk_buff *skb)
0192 {
0193 unsigned int nh_ofs = skb_network_offset(skb);
0194 unsigned int ip_len;
0195 int err;
0196
0197 err = check_header(skb, nh_ofs + sizeof(struct iphdr));
0198 if (unlikely(err))
0199 return err;
0200
0201 ip_len = ip_hdrlen(skb);
0202 if (unlikely(ip_len < sizeof(struct iphdr) ||
0203 skb->len < nh_ofs + ip_len))
0204 return -EINVAL;
0205
0206 skb_set_transport_header(skb, nh_ofs + ip_len);
0207 return 0;
0208 }
0209
0210 static bool tcphdr_ok(struct sk_buff *skb)
0211 {
0212 int th_ofs = skb_transport_offset(skb);
0213 int tcp_len;
0214
0215 if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
0216 return false;
0217
0218 tcp_len = tcp_hdrlen(skb);
0219 if (unlikely(tcp_len < sizeof(struct tcphdr) ||
0220 skb->len < th_ofs + tcp_len))
0221 return false;
0222
0223 return true;
0224 }
0225
0226 static bool udphdr_ok(struct sk_buff *skb)
0227 {
0228 return pskb_may_pull(skb, skb_transport_offset(skb) +
0229 sizeof(struct udphdr));
0230 }
0231
0232 static bool sctphdr_ok(struct sk_buff *skb)
0233 {
0234 return pskb_may_pull(skb, skb_transport_offset(skb) +
0235 sizeof(struct sctphdr));
0236 }
0237
0238 static bool icmphdr_ok(struct sk_buff *skb)
0239 {
0240 return pskb_may_pull(skb, skb_transport_offset(skb) +
0241 sizeof(struct icmphdr));
0242 }
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270 static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh,
0271 u16 *ext_hdrs)
0272 {
0273 u8 next_type = nh->nexthdr;
0274 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
0275 int dest_options_header_count = 0;
0276
0277 *ext_hdrs = 0;
0278
0279 while (ipv6_ext_hdr(next_type)) {
0280 struct ipv6_opt_hdr _hdr, *hp;
0281
0282 switch (next_type) {
0283 case IPPROTO_NONE:
0284 *ext_hdrs |= OFPIEH12_NONEXT;
0285
0286 return;
0287
0288 case IPPROTO_ESP:
0289 if (*ext_hdrs & OFPIEH12_ESP)
0290 *ext_hdrs |= OFPIEH12_UNREP;
0291 if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST |
0292 OFPIEH12_ROUTER | IPPROTO_FRAGMENT |
0293 OFPIEH12_AUTH | OFPIEH12_UNREP)) ||
0294 dest_options_header_count >= 2) {
0295 *ext_hdrs |= OFPIEH12_UNSEQ;
0296 }
0297 *ext_hdrs |= OFPIEH12_ESP;
0298 break;
0299
0300 case IPPROTO_AH:
0301 if (*ext_hdrs & OFPIEH12_AUTH)
0302 *ext_hdrs |= OFPIEH12_UNREP;
0303 if ((*ext_hdrs &
0304 ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER |
0305 IPPROTO_FRAGMENT | OFPIEH12_UNREP)) ||
0306 dest_options_header_count >= 2) {
0307 *ext_hdrs |= OFPIEH12_UNSEQ;
0308 }
0309 *ext_hdrs |= OFPIEH12_AUTH;
0310 break;
0311
0312 case IPPROTO_DSTOPTS:
0313 if (dest_options_header_count == 0) {
0314 if (*ext_hdrs &
0315 ~(OFPIEH12_HOP | OFPIEH12_UNREP))
0316 *ext_hdrs |= OFPIEH12_UNSEQ;
0317 *ext_hdrs |= OFPIEH12_DEST;
0318 } else if (dest_options_header_count == 1) {
0319 if (*ext_hdrs &
0320 ~(OFPIEH12_HOP | OFPIEH12_DEST |
0321 OFPIEH12_ROUTER | OFPIEH12_FRAG |
0322 OFPIEH12_AUTH | OFPIEH12_ESP |
0323 OFPIEH12_UNREP)) {
0324 *ext_hdrs |= OFPIEH12_UNSEQ;
0325 }
0326 } else {
0327 *ext_hdrs |= OFPIEH12_UNREP;
0328 }
0329 dest_options_header_count++;
0330 break;
0331
0332 case IPPROTO_FRAGMENT:
0333 if (*ext_hdrs & OFPIEH12_FRAG)
0334 *ext_hdrs |= OFPIEH12_UNREP;
0335 if ((*ext_hdrs & ~(OFPIEH12_HOP |
0336 OFPIEH12_DEST |
0337 OFPIEH12_ROUTER |
0338 OFPIEH12_UNREP)) ||
0339 dest_options_header_count >= 2) {
0340 *ext_hdrs |= OFPIEH12_UNSEQ;
0341 }
0342 *ext_hdrs |= OFPIEH12_FRAG;
0343 break;
0344
0345 case IPPROTO_ROUTING:
0346 if (*ext_hdrs & OFPIEH12_ROUTER)
0347 *ext_hdrs |= OFPIEH12_UNREP;
0348 if ((*ext_hdrs & ~(OFPIEH12_HOP |
0349 OFPIEH12_DEST |
0350 OFPIEH12_UNREP)) ||
0351 dest_options_header_count >= 2) {
0352 *ext_hdrs |= OFPIEH12_UNSEQ;
0353 }
0354 *ext_hdrs |= OFPIEH12_ROUTER;
0355 break;
0356
0357 case IPPROTO_HOPOPTS:
0358 if (*ext_hdrs & OFPIEH12_HOP)
0359 *ext_hdrs |= OFPIEH12_UNREP;
0360
0361
0362
0363
0364 if (*ext_hdrs == 0)
0365 *ext_hdrs |= OFPIEH12_HOP;
0366 else
0367 *ext_hdrs |= OFPIEH12_UNSEQ;
0368 break;
0369
0370 default:
0371 return;
0372 }
0373
0374 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
0375 if (!hp)
0376 break;
0377 next_type = hp->nexthdr;
0378 start += ipv6_optlen(hp);
0379 }
0380 }
0381
0382 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
0383 {
0384 unsigned short frag_off;
0385 unsigned int payload_ofs = 0;
0386 unsigned int nh_ofs = skb_network_offset(skb);
0387 unsigned int nh_len;
0388 struct ipv6hdr *nh;
0389 int err, nexthdr, flags = 0;
0390
0391 err = check_header(skb, nh_ofs + sizeof(*nh));
0392 if (unlikely(err))
0393 return err;
0394
0395 nh = ipv6_hdr(skb);
0396
0397 get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs);
0398
0399 key->ip.proto = NEXTHDR_NONE;
0400 key->ip.tos = ipv6_get_dsfield(nh);
0401 key->ip.ttl = nh->hop_limit;
0402 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
0403 key->ipv6.addr.src = nh->saddr;
0404 key->ipv6.addr.dst = nh->daddr;
0405
0406 nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
0407 if (flags & IP6_FH_F_FRAG) {
0408 if (frag_off) {
0409 key->ip.frag = OVS_FRAG_TYPE_LATER;
0410 key->ip.proto = NEXTHDR_FRAGMENT;
0411 return 0;
0412 }
0413 key->ip.frag = OVS_FRAG_TYPE_FIRST;
0414 } else {
0415 key->ip.frag = OVS_FRAG_TYPE_NONE;
0416 }
0417
0418
0419
0420
0421
0422 if (unlikely(nexthdr < 0))
0423 return -EPROTO;
0424
0425 nh_len = payload_ofs - nh_ofs;
0426 skb_set_transport_header(skb, nh_ofs + nh_len);
0427 key->ip.proto = nexthdr;
0428 return nh_len;
0429 }
0430
0431 static bool icmp6hdr_ok(struct sk_buff *skb)
0432 {
0433 return pskb_may_pull(skb, skb_transport_offset(skb) +
0434 sizeof(struct icmp6hdr));
0435 }
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447 static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
0448 bool untag_vlan)
0449 {
0450 struct vlan_head *vh = (struct vlan_head *)skb->data;
0451
0452 if (likely(!eth_type_vlan(vh->tpid)))
0453 return 0;
0454
0455 if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
0456 return 0;
0457
0458 if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
0459 sizeof(__be16))))
0460 return -ENOMEM;
0461
0462 vh = (struct vlan_head *)skb->data;
0463 key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
0464 key_vh->tpid = vh->tpid;
0465
0466 if (unlikely(untag_vlan)) {
0467 int offset = skb->data - skb_mac_header(skb);
0468 u16 tci;
0469 int err;
0470
0471 __skb_push(skb, offset);
0472 err = __skb_vlan_pop(skb, &tci);
0473 __skb_pull(skb, offset);
0474 if (err)
0475 return err;
0476 __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
0477 } else {
0478 __skb_pull(skb, sizeof(struct vlan_head));
0479 }
0480 return 1;
0481 }
0482
0483 static void clear_vlan(struct sw_flow_key *key)
0484 {
0485 key->eth.vlan.tci = 0;
0486 key->eth.vlan.tpid = 0;
0487 key->eth.cvlan.tci = 0;
0488 key->eth.cvlan.tpid = 0;
0489 }
0490
0491 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
0492 {
0493 int res;
0494
0495 if (skb_vlan_tag_present(skb)) {
0496 key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
0497 key->eth.vlan.tpid = skb->vlan_proto;
0498 } else {
0499
0500 res = parse_vlan_tag(skb, &key->eth.vlan, true);
0501 if (res <= 0)
0502 return res;
0503 }
0504
0505
0506 res = parse_vlan_tag(skb, &key->eth.cvlan, false);
0507 if (res <= 0)
0508 return res;
0509
0510 return 0;
0511 }
0512
0513 static __be16 parse_ethertype(struct sk_buff *skb)
0514 {
0515 struct llc_snap_hdr {
0516 u8 dsap;
0517 u8 ssap;
0518 u8 ctrl;
0519 u8 oui[3];
0520 __be16 ethertype;
0521 };
0522 struct llc_snap_hdr *llc;
0523 __be16 proto;
0524
0525 proto = *(__be16 *) skb->data;
0526 __skb_pull(skb, sizeof(__be16));
0527
0528 if (eth_proto_is_802_3(proto))
0529 return proto;
0530
0531 if (skb->len < sizeof(struct llc_snap_hdr))
0532 return htons(ETH_P_802_2);
0533
0534 if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
0535 return htons(0);
0536
0537 llc = (struct llc_snap_hdr *) skb->data;
0538 if (llc->dsap != LLC_SAP_SNAP ||
0539 llc->ssap != LLC_SAP_SNAP ||
0540 (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
0541 return htons(ETH_P_802_2);
0542
0543 __skb_pull(skb, sizeof(struct llc_snap_hdr));
0544
0545 if (eth_proto_is_802_3(llc->ethertype))
0546 return llc->ethertype;
0547
0548 return htons(ETH_P_802_2);
0549 }
0550
0551 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
0552 int nh_len)
0553 {
0554 struct icmp6hdr *icmp = icmp6_hdr(skb);
0555
0556
0557
0558
0559 key->tp.src = htons(icmp->icmp6_type);
0560 key->tp.dst = htons(icmp->icmp6_code);
0561 memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
0562
0563 if (icmp->icmp6_code == 0 &&
0564 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
0565 icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
0566 int icmp_len = skb->len - skb_transport_offset(skb);
0567 struct nd_msg *nd;
0568 int offset;
0569
0570
0571
0572
0573 if (unlikely(icmp_len < sizeof(*nd)))
0574 return 0;
0575
0576 if (unlikely(skb_linearize(skb)))
0577 return -ENOMEM;
0578
0579 nd = (struct nd_msg *)skb_transport_header(skb);
0580 key->ipv6.nd.target = nd->target;
0581
0582 icmp_len -= sizeof(*nd);
0583 offset = 0;
0584 while (icmp_len >= 8) {
0585 struct nd_opt_hdr *nd_opt =
0586 (struct nd_opt_hdr *)(nd->opt + offset);
0587 int opt_len = nd_opt->nd_opt_len * 8;
0588
0589 if (unlikely(!opt_len || opt_len > icmp_len))
0590 return 0;
0591
0592
0593
0594
0595
0596 if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
0597 && opt_len == 8) {
0598 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
0599 goto invalid;
0600 ether_addr_copy(key->ipv6.nd.sll,
0601 &nd->opt[offset+sizeof(*nd_opt)]);
0602 } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
0603 && opt_len == 8) {
0604 if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
0605 goto invalid;
0606 ether_addr_copy(key->ipv6.nd.tll,
0607 &nd->opt[offset+sizeof(*nd_opt)]);
0608 }
0609
0610 icmp_len -= opt_len;
0611 offset += opt_len;
0612 }
0613 }
0614
0615 return 0;
0616
0617 invalid:
0618 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
0619 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
0620 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
0621
0622 return 0;
0623 }
0624
0625 static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
0626 {
0627 struct nshhdr *nh;
0628 unsigned int nh_ofs = skb_network_offset(skb);
0629 u8 version, length;
0630 int err;
0631
0632 err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
0633 if (unlikely(err))
0634 return err;
0635
0636 nh = nsh_hdr(skb);
0637 version = nsh_get_ver(nh);
0638 length = nsh_hdr_len(nh);
0639
0640 if (version != 0)
0641 return -EINVAL;
0642
0643 err = check_header(skb, nh_ofs + length);
0644 if (unlikely(err))
0645 return err;
0646
0647 nh = nsh_hdr(skb);
0648 key->nsh.base.flags = nsh_get_flags(nh);
0649 key->nsh.base.ttl = nsh_get_ttl(nh);
0650 key->nsh.base.mdtype = nh->mdtype;
0651 key->nsh.base.np = nh->np;
0652 key->nsh.base.path_hdr = nh->path_hdr;
0653 switch (key->nsh.base.mdtype) {
0654 case NSH_M_TYPE1:
0655 if (length != NSH_M_TYPE1_LEN)
0656 return -EINVAL;
0657 memcpy(key->nsh.context, nh->md1.context,
0658 sizeof(nh->md1));
0659 break;
0660 case NSH_M_TYPE2:
0661 memset(key->nsh.context, 0,
0662 sizeof(nh->md1));
0663 break;
0664 default:
0665 return -EINVAL;
0666 }
0667
0668 return 0;
0669 }
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679 static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
0680 {
0681 int error;
0682
0683
0684 if (key->eth.type == htons(ETH_P_IP)) {
0685 struct iphdr *nh;
0686 __be16 offset;
0687
0688 error = check_iphdr(skb);
0689 if (unlikely(error)) {
0690 memset(&key->ip, 0, sizeof(key->ip));
0691 memset(&key->ipv4, 0, sizeof(key->ipv4));
0692 if (error == -EINVAL) {
0693 skb->transport_header = skb->network_header;
0694 error = 0;
0695 }
0696 return error;
0697 }
0698
0699 nh = ip_hdr(skb);
0700 key->ipv4.addr.src = nh->saddr;
0701 key->ipv4.addr.dst = nh->daddr;
0702
0703 key->ip.proto = nh->protocol;
0704 key->ip.tos = nh->tos;
0705 key->ip.ttl = nh->ttl;
0706
0707 offset = nh->frag_off & htons(IP_OFFSET);
0708 if (offset) {
0709 key->ip.frag = OVS_FRAG_TYPE_LATER;
0710 memset(&key->tp, 0, sizeof(key->tp));
0711 return 0;
0712 }
0713 if (nh->frag_off & htons(IP_MF) ||
0714 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
0715 key->ip.frag = OVS_FRAG_TYPE_FIRST;
0716 else
0717 key->ip.frag = OVS_FRAG_TYPE_NONE;
0718
0719
0720 if (key->ip.proto == IPPROTO_TCP) {
0721 if (tcphdr_ok(skb)) {
0722 struct tcphdr *tcp = tcp_hdr(skb);
0723 key->tp.src = tcp->source;
0724 key->tp.dst = tcp->dest;
0725 key->tp.flags = TCP_FLAGS_BE16(tcp);
0726 } else {
0727 memset(&key->tp, 0, sizeof(key->tp));
0728 }
0729
0730 } else if (key->ip.proto == IPPROTO_UDP) {
0731 if (udphdr_ok(skb)) {
0732 struct udphdr *udp = udp_hdr(skb);
0733 key->tp.src = udp->source;
0734 key->tp.dst = udp->dest;
0735 } else {
0736 memset(&key->tp, 0, sizeof(key->tp));
0737 }
0738 } else if (key->ip.proto == IPPROTO_SCTP) {
0739 if (sctphdr_ok(skb)) {
0740 struct sctphdr *sctp = sctp_hdr(skb);
0741 key->tp.src = sctp->source;
0742 key->tp.dst = sctp->dest;
0743 } else {
0744 memset(&key->tp, 0, sizeof(key->tp));
0745 }
0746 } else if (key->ip.proto == IPPROTO_ICMP) {
0747 if (icmphdr_ok(skb)) {
0748 struct icmphdr *icmp = icmp_hdr(skb);
0749
0750
0751
0752 key->tp.src = htons(icmp->type);
0753 key->tp.dst = htons(icmp->code);
0754 } else {
0755 memset(&key->tp, 0, sizeof(key->tp));
0756 }
0757 }
0758
0759 } else if (key->eth.type == htons(ETH_P_ARP) ||
0760 key->eth.type == htons(ETH_P_RARP)) {
0761 struct arp_eth_header *arp;
0762 bool arp_available = arphdr_ok(skb);
0763
0764 arp = (struct arp_eth_header *)skb_network_header(skb);
0765
0766 if (arp_available &&
0767 arp->ar_hrd == htons(ARPHRD_ETHER) &&
0768 arp->ar_pro == htons(ETH_P_IP) &&
0769 arp->ar_hln == ETH_ALEN &&
0770 arp->ar_pln == 4) {
0771
0772
0773 if (ntohs(arp->ar_op) <= 0xff)
0774 key->ip.proto = ntohs(arp->ar_op);
0775 else
0776 key->ip.proto = 0;
0777
0778 memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
0779 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
0780 ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
0781 ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
0782 } else {
0783 memset(&key->ip, 0, sizeof(key->ip));
0784 memset(&key->ipv4, 0, sizeof(key->ipv4));
0785 }
0786 } else if (eth_p_mpls(key->eth.type)) {
0787 u8 label_count = 1;
0788
0789 memset(&key->mpls, 0, sizeof(key->mpls));
0790 skb_set_inner_network_header(skb, skb->mac_len);
0791 while (1) {
0792 __be32 lse;
0793
0794 error = check_header(skb, skb->mac_len +
0795 label_count * MPLS_HLEN);
0796 if (unlikely(error))
0797 return 0;
0798
0799 memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
0800
0801 if (label_count <= MPLS_LABEL_DEPTH)
0802 memcpy(&key->mpls.lse[label_count - 1], &lse,
0803 MPLS_HLEN);
0804
0805 skb_set_inner_network_header(skb, skb->mac_len +
0806 label_count * MPLS_HLEN);
0807 if (lse & htonl(MPLS_LS_S_MASK))
0808 break;
0809
0810 label_count++;
0811 }
0812 if (label_count > MPLS_LABEL_DEPTH)
0813 label_count = MPLS_LABEL_DEPTH;
0814
0815 key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
0816 } else if (key->eth.type == htons(ETH_P_IPV6)) {
0817 int nh_len;
0818
0819 nh_len = parse_ipv6hdr(skb, key);
0820 if (unlikely(nh_len < 0)) {
0821 switch (nh_len) {
0822 case -EINVAL:
0823 memset(&key->ip, 0, sizeof(key->ip));
0824 memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
0825 fallthrough;
0826 case -EPROTO:
0827 skb->transport_header = skb->network_header;
0828 error = 0;
0829 break;
0830 default:
0831 error = nh_len;
0832 }
0833 return error;
0834 }
0835
0836 if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
0837 memset(&key->tp, 0, sizeof(key->tp));
0838 return 0;
0839 }
0840 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
0841 key->ip.frag = OVS_FRAG_TYPE_FIRST;
0842
0843
0844 if (key->ip.proto == NEXTHDR_TCP) {
0845 if (tcphdr_ok(skb)) {
0846 struct tcphdr *tcp = tcp_hdr(skb);
0847 key->tp.src = tcp->source;
0848 key->tp.dst = tcp->dest;
0849 key->tp.flags = TCP_FLAGS_BE16(tcp);
0850 } else {
0851 memset(&key->tp, 0, sizeof(key->tp));
0852 }
0853 } else if (key->ip.proto == NEXTHDR_UDP) {
0854 if (udphdr_ok(skb)) {
0855 struct udphdr *udp = udp_hdr(skb);
0856 key->tp.src = udp->source;
0857 key->tp.dst = udp->dest;
0858 } else {
0859 memset(&key->tp, 0, sizeof(key->tp));
0860 }
0861 } else if (key->ip.proto == NEXTHDR_SCTP) {
0862 if (sctphdr_ok(skb)) {
0863 struct sctphdr *sctp = sctp_hdr(skb);
0864 key->tp.src = sctp->source;
0865 key->tp.dst = sctp->dest;
0866 } else {
0867 memset(&key->tp, 0, sizeof(key->tp));
0868 }
0869 } else if (key->ip.proto == NEXTHDR_ICMP) {
0870 if (icmp6hdr_ok(skb)) {
0871 error = parse_icmpv6(skb, key, nh_len);
0872 if (error)
0873 return error;
0874 } else {
0875 memset(&key->tp, 0, sizeof(key->tp));
0876 }
0877 }
0878 } else if (key->eth.type == htons(ETH_P_NSH)) {
0879 error = parse_nsh(skb, key);
0880 if (error)
0881 return error;
0882 }
0883 return 0;
0884 }
0885
0886
0887
0888
0889
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899
0900
0901
0902
0903
0904
0905
0906
0907
0908
0909
0910
0911 static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
0912 {
0913 struct ethhdr *eth;
0914
0915
0916 key->tp.flags = 0;
0917
0918 skb_reset_mac_header(skb);
0919
0920
0921 clear_vlan(key);
0922 if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
0923 if (unlikely(eth_type_vlan(skb->protocol)))
0924 return -EINVAL;
0925
0926 skb_reset_network_header(skb);
0927 key->eth.type = skb->protocol;
0928 } else {
0929 eth = eth_hdr(skb);
0930 ether_addr_copy(key->eth.src, eth->h_source);
0931 ether_addr_copy(key->eth.dst, eth->h_dest);
0932
0933 __skb_pull(skb, 2 * ETH_ALEN);
0934
0935
0936
0937
0938 if (unlikely(parse_vlan(skb, key)))
0939 return -ENOMEM;
0940
0941 key->eth.type = parse_ethertype(skb);
0942 if (unlikely(key->eth.type == htons(0)))
0943 return -ENOMEM;
0944
0945
0946
0947
0948
0949 if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
0950 skb->protocol = key->eth.cvlan.tpid;
0951 else
0952 skb->protocol = key->eth.type;
0953
0954 skb_reset_network_header(skb);
0955 __skb_push(skb, skb->data - skb_mac_header(skb));
0956 }
0957
0958 skb_reset_mac_len(skb);
0959
0960
0961 return key_extract_l3l4(skb, key);
0962 }
0963
0964
0965
0966
0967 int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
0968 {
0969 return key_extract_l3l4(skb, key);
0970 }
0971
0972 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
0973 {
0974 int res;
0975
0976 res = key_extract(skb, key);
0977 if (!res)
0978 key->mac_proto &= ~SW_FLOW_KEY_INVALID;
0979
0980 return res;
0981 }
0982
0983 static int key_extract_mac_proto(struct sk_buff *skb)
0984 {
0985 switch (skb->dev->type) {
0986 case ARPHRD_ETHER:
0987 return MAC_PROTO_ETHERNET;
0988 case ARPHRD_NONE:
0989 if (skb->protocol == htons(ETH_P_TEB))
0990 return MAC_PROTO_ETHERNET;
0991 return MAC_PROTO_NONE;
0992 }
0993 WARN_ON_ONCE(1);
0994 return -EINVAL;
0995 }
0996
0997 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
0998 struct sk_buff *skb, struct sw_flow_key *key)
0999 {
1000 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1001 struct tc_skb_ext *tc_ext;
1002 #endif
1003 bool post_ct = false, post_ct_snat = false, post_ct_dnat = false;
1004 int res, err;
1005 u16 zone = 0;
1006
1007
1008 if (tun_info) {
1009 key->tun_proto = ip_tunnel_info_af(tun_info);
1010 memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
1011
1012 if (tun_info->options_len) {
1013 BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
1014 8)) - 1
1015 > sizeof(key->tun_opts));
1016
1017 ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
1018 tun_info);
1019 key->tun_opts_len = tun_info->options_len;
1020 } else {
1021 key->tun_opts_len = 0;
1022 }
1023 } else {
1024 key->tun_proto = 0;
1025 key->tun_opts_len = 0;
1026 memset(&key->tun_key, 0, sizeof(key->tun_key));
1027 }
1028
1029 key->phy.priority = skb->priority;
1030 key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
1031 key->phy.skb_mark = skb->mark;
1032 key->ovs_flow_hash = 0;
1033 res = key_extract_mac_proto(skb);
1034 if (res < 0)
1035 return res;
1036 key->mac_proto = res;
1037
1038 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1039 if (tc_skb_ext_tc_enabled()) {
1040 tc_ext = skb_ext_find(skb, TC_SKB_EXT);
1041 key->recirc_id = tc_ext ? tc_ext->chain : 0;
1042 OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
1043 post_ct = tc_ext ? tc_ext->post_ct : false;
1044 post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
1045 post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false;
1046 zone = post_ct ? tc_ext->zone : 0;
1047 } else {
1048 key->recirc_id = 0;
1049 }
1050 #else
1051 key->recirc_id = 0;
1052 #endif
1053
1054 err = key_extract(skb, key);
1055 if (!err) {
1056 ovs_ct_fill_key(skb, key, post_ct);
1057 if (post_ct) {
1058 if (!skb_get_nfct(skb)) {
1059 key->ct_zone = zone;
1060 } else {
1061 if (!post_ct_dnat)
1062 key->ct_state &= ~OVS_CS_F_DST_NAT;
1063 if (!post_ct_snat)
1064 key->ct_state &= ~OVS_CS_F_SRC_NAT;
1065 }
1066 }
1067 }
1068 return err;
1069 }
1070
1071 int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
1072 struct sk_buff *skb,
1073 struct sw_flow_key *key, bool log)
1074 {
1075 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1076 u64 attrs = 0;
1077 int err;
1078
1079 err = parse_flow_nlattrs(attr, a, &attrs, log);
1080 if (err)
1081 return -EINVAL;
1082
1083
1084 err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
1085 if (err)
1086 return err;
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097 skb->protocol = key->eth.type;
1098 err = key_extract(skb, key);
1099 if (err)
1100 return err;
1101
1102
1103
1104
1105
1106 if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
1107 key->eth.type != htons(ETH_P_IP))
1108 return -EINVAL;
1109 if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
1110 (key->eth.type != htons(ETH_P_IPV6) ||
1111 sw_flow_key_is_nd(key)))
1112 return -EINVAL;
1113
1114 return 0;
1115 }