0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/kernel.h>
0010 #include <linux/skbuff.h>
0011 #include <linux/net.h>
0012 #include <linux/in6.h>
0013 #include <linux/ioam6.h>
0014 #include <linux/ioam6_iptunnel.h>
0015 #include <net/dst.h>
0016 #include <net/sock.h>
0017 #include <net/lwtunnel.h>
0018 #include <net/ioam6.h>
0019 #include <net/netlink.h>
0020 #include <net/ipv6.h>
0021 #include <net/dst_cache.h>
0022 #include <net/ip6_route.h>
0023 #include <net/addrconf.h>
0024
0025 #define IOAM6_MASK_SHORT_FIELDS 0xff100000
0026 #define IOAM6_MASK_WIDE_FIELDS 0xe00000
0027
0028 struct ioam6_lwt_encap {
0029 struct ipv6_hopopt_hdr eh;
0030 u8 pad[2];
0031 struct ioam6_hdr ioamh;
0032 struct ioam6_trace_hdr traceh;
0033 } __packed;
0034
0035 struct ioam6_lwt_freq {
0036 u32 k;
0037 u32 n;
0038 };
0039
0040 struct ioam6_lwt {
0041 struct dst_cache cache;
0042 struct ioam6_lwt_freq freq;
0043 atomic_t pkt_cnt;
0044 u8 mode;
0045 struct in6_addr tundst;
0046 struct ioam6_lwt_encap tuninfo;
0047 };
0048
0049 static struct netlink_range_validation freq_range = {
0050 .min = IOAM6_IPTUNNEL_FREQ_MIN,
0051 .max = IOAM6_IPTUNNEL_FREQ_MAX,
0052 };
0053
0054 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
0055 {
0056 return (struct ioam6_lwt *)lwt->data;
0057 }
0058
0059 static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
0060 {
0061 return &ioam6_lwt_state(lwt)->tuninfo;
0062 }
0063
0064 static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
0065 {
0066 return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
0067 }
0068
0069 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
0070 [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
0071 [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
0072 [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
0073 IOAM6_IPTUNNEL_MODE_MIN,
0074 IOAM6_IPTUNNEL_MODE_MAX),
0075 [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
0076 [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
0077 };
0078
0079 static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
0080 {
0081 u32 fields;
0082
0083 if (!trace->type_be32 || !trace->remlen ||
0084 trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
0085 trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
0086 trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
0087 trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
0088 trace->type.bit21)
0089 return false;
0090
0091 trace->nodelen = 0;
0092 fields = be32_to_cpu(trace->type_be32);
0093
0094 trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
0095 * (sizeof(__be32) / 4);
0096 trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
0097 * (sizeof(__be64) / 4);
0098
0099 return true;
0100 }
0101
0102 static int ioam6_build_state(struct net *net, struct nlattr *nla,
0103 unsigned int family, const void *cfg,
0104 struct lwtunnel_state **ts,
0105 struct netlink_ext_ack *extack)
0106 {
0107 struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
0108 struct ioam6_lwt_encap *tuninfo;
0109 struct ioam6_trace_hdr *trace;
0110 struct lwtunnel_state *lwt;
0111 struct ioam6_lwt *ilwt;
0112 int len_aligned, err;
0113 u32 freq_k, freq_n;
0114 u8 mode;
0115
0116 if (family != AF_INET6)
0117 return -EINVAL;
0118
0119 err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
0120 ioam6_iptunnel_policy, extack);
0121 if (err < 0)
0122 return err;
0123
0124 if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) ||
0125 (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) {
0126 NL_SET_ERR_MSG(extack, "freq: missing parameter");
0127 return -EINVAL;
0128 } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) {
0129 freq_k = IOAM6_IPTUNNEL_FREQ_MIN;
0130 freq_n = IOAM6_IPTUNNEL_FREQ_MIN;
0131 } else {
0132 freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]);
0133 freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]);
0134
0135 if (freq_k > freq_n) {
0136 NL_SET_ERR_MSG(extack, "freq: k > n is forbidden");
0137 return -EINVAL;
0138 }
0139 }
0140
0141 if (!tb[IOAM6_IPTUNNEL_MODE])
0142 mode = IOAM6_IPTUNNEL_MODE_INLINE;
0143 else
0144 mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
0145
0146 if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
0147 NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
0148 return -EINVAL;
0149 }
0150
0151 if (!tb[IOAM6_IPTUNNEL_TRACE]) {
0152 NL_SET_ERR_MSG(extack, "missing trace");
0153 return -EINVAL;
0154 }
0155
0156 trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
0157 if (!ioam6_validate_trace_hdr(trace)) {
0158 NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
0159 "invalid trace validation");
0160 return -EINVAL;
0161 }
0162
0163 len_aligned = ALIGN(trace->remlen * 4, 8);
0164 lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
0165 if (!lwt)
0166 return -ENOMEM;
0167
0168 ilwt = ioam6_lwt_state(lwt);
0169 err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
0170 if (err) {
0171 kfree(lwt);
0172 return err;
0173 }
0174
0175 atomic_set(&ilwt->pkt_cnt, 0);
0176 ilwt->freq.k = freq_k;
0177 ilwt->freq.n = freq_n;
0178
0179 ilwt->mode = mode;
0180 if (tb[IOAM6_IPTUNNEL_DST])
0181 ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
0182
0183 tuninfo = ioam6_lwt_info(lwt);
0184 tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
0185 tuninfo->pad[0] = IPV6_TLV_PADN;
0186 tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
0187 tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
0188 tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
0189 + trace->remlen * 4;
0190
0191 memcpy(&tuninfo->traceh, trace, sizeof(*trace));
0192
0193 if (len_aligned - trace->remlen * 4) {
0194 tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
0195 tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
0196 }
0197
0198 lwt->type = LWTUNNEL_ENCAP_IOAM6;
0199 lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
0200
0201 *ts = lwt;
0202
0203 return 0;
0204 }
0205
0206 static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
0207 {
0208 struct ioam6_trace_hdr *trace;
0209 struct ioam6_namespace *ns;
0210
0211 trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
0212 + sizeof(struct ipv6_hopopt_hdr) + 2
0213 + sizeof(struct ioam6_hdr));
0214
0215 ns = ioam6_namespace(net, trace->namespace_id);
0216 if (ns)
0217 ioam6_fill_trace_data(skb, ns, trace, false);
0218
0219 return 0;
0220 }
0221
0222 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
0223 struct ioam6_lwt_encap *tuninfo)
0224 {
0225 struct ipv6hdr *oldhdr, *hdr;
0226 int hdrlen, err;
0227
0228 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
0229
0230 err = skb_cow_head(skb, hdrlen + skb->mac_len);
0231 if (unlikely(err))
0232 return err;
0233
0234 oldhdr = ipv6_hdr(skb);
0235 skb_pull(skb, sizeof(*oldhdr));
0236 skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
0237
0238 skb_push(skb, sizeof(*oldhdr) + hdrlen);
0239 skb_reset_network_header(skb);
0240 skb_mac_header_rebuild(skb);
0241
0242 hdr = ipv6_hdr(skb);
0243 memmove(hdr, oldhdr, sizeof(*oldhdr));
0244 tuninfo->eh.nexthdr = hdr->nexthdr;
0245
0246 skb_set_transport_header(skb, sizeof(*hdr));
0247 skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
0248
0249 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
0250
0251 hdr->nexthdr = NEXTHDR_HOP;
0252 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
0253
0254 return ioam6_do_fill(net, skb);
0255 }
0256
0257 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
0258 struct ioam6_lwt_encap *tuninfo,
0259 struct in6_addr *tundst)
0260 {
0261 struct dst_entry *dst = skb_dst(skb);
0262 struct ipv6hdr *hdr, *inner_hdr;
0263 int hdrlen, len, err;
0264
0265 hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
0266 len = sizeof(*hdr) + hdrlen;
0267
0268 err = skb_cow_head(skb, len + skb->mac_len);
0269 if (unlikely(err))
0270 return err;
0271
0272 inner_hdr = ipv6_hdr(skb);
0273
0274 skb_push(skb, len);
0275 skb_reset_network_header(skb);
0276 skb_mac_header_rebuild(skb);
0277 skb_set_transport_header(skb, sizeof(*hdr));
0278
0279 tuninfo->eh.nexthdr = NEXTHDR_IPV6;
0280 memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
0281
0282 hdr = ipv6_hdr(skb);
0283 memcpy(hdr, inner_hdr, sizeof(*hdr));
0284
0285 hdr->nexthdr = NEXTHDR_HOP;
0286 hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
0287 hdr->daddr = *tundst;
0288 ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
0289 IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
0290
0291 skb_postpush_rcsum(skb, hdr, len);
0292
0293 return ioam6_do_fill(net, skb);
0294 }
0295
0296 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
0297 {
0298 struct dst_entry *dst = skb_dst(skb);
0299 struct in6_addr orig_daddr;
0300 struct ioam6_lwt *ilwt;
0301 int err = -EINVAL;
0302 u32 pkt_cnt;
0303
0304 if (skb->protocol != htons(ETH_P_IPV6))
0305 goto drop;
0306
0307 ilwt = ioam6_lwt_state(dst->lwtstate);
0308
0309
0310 pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
0311 if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
0312 goto out;
0313
0314 orig_daddr = ipv6_hdr(skb)->daddr;
0315
0316 switch (ilwt->mode) {
0317 case IOAM6_IPTUNNEL_MODE_INLINE:
0318 do_inline:
0319
0320 if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
0321 goto out;
0322
0323 err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
0324 if (unlikely(err))
0325 goto drop;
0326
0327 break;
0328 case IOAM6_IPTUNNEL_MODE_ENCAP:
0329 do_encap:
0330
0331 err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
0332 if (unlikely(err))
0333 goto drop;
0334
0335 break;
0336 case IOAM6_IPTUNNEL_MODE_AUTO:
0337
0338
0339
0340
0341 if (!skb->dev)
0342 goto do_inline;
0343
0344 goto do_encap;
0345 default:
0346 goto drop;
0347 }
0348
0349 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
0350 if (unlikely(err))
0351 goto drop;
0352
0353 if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
0354 preempt_disable();
0355 dst = dst_cache_get(&ilwt->cache);
0356 preempt_enable();
0357
0358 if (unlikely(!dst)) {
0359 struct ipv6hdr *hdr = ipv6_hdr(skb);
0360 struct flowi6 fl6;
0361
0362 memset(&fl6, 0, sizeof(fl6));
0363 fl6.daddr = hdr->daddr;
0364 fl6.saddr = hdr->saddr;
0365 fl6.flowlabel = ip6_flowinfo(hdr);
0366 fl6.flowi6_mark = skb->mark;
0367 fl6.flowi6_proto = hdr->nexthdr;
0368
0369 dst = ip6_route_output(net, NULL, &fl6);
0370 if (dst->error) {
0371 err = dst->error;
0372 dst_release(dst);
0373 goto drop;
0374 }
0375
0376 preempt_disable();
0377 dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
0378 preempt_enable();
0379 }
0380
0381 skb_dst_drop(skb);
0382 skb_dst_set(skb, dst);
0383
0384 return dst_output(net, sk, skb);
0385 }
0386 out:
0387 return dst->lwtstate->orig_output(net, sk, skb);
0388 drop:
0389 kfree_skb(skb);
0390 return err;
0391 }
0392
0393 static void ioam6_destroy_state(struct lwtunnel_state *lwt)
0394 {
0395 dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
0396 }
0397
0398 static int ioam6_fill_encap_info(struct sk_buff *skb,
0399 struct lwtunnel_state *lwtstate)
0400 {
0401 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
0402 int err;
0403
0404 err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k);
0405 if (err)
0406 goto ret;
0407
0408 err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n);
0409 if (err)
0410 goto ret;
0411
0412 err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
0413 if (err)
0414 goto ret;
0415
0416 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
0417 err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
0418 if (err)
0419 goto ret;
0420 }
0421
0422 err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
0423 &ilwt->tuninfo.traceh);
0424 ret:
0425 return err;
0426 }
0427
0428 static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
0429 {
0430 struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
0431 int nlsize;
0432
0433 nlsize = nla_total_size(sizeof(ilwt->freq.k)) +
0434 nla_total_size(sizeof(ilwt->freq.n)) +
0435 nla_total_size(sizeof(ilwt->mode)) +
0436 nla_total_size(sizeof(ilwt->tuninfo.traceh));
0437
0438 if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
0439 nlsize += nla_total_size(sizeof(ilwt->tundst));
0440
0441 return nlsize;
0442 }
0443
0444 static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
0445 {
0446 struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
0447 struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
0448 struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
0449 struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
0450
0451 return (ilwt_a->freq.k != ilwt_b->freq.k ||
0452 ilwt_a->freq.n != ilwt_b->freq.n ||
0453 ilwt_a->mode != ilwt_b->mode ||
0454 (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
0455 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
0456 trace_a->namespace_id != trace_b->namespace_id);
0457 }
0458
0459 static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
0460 .build_state = ioam6_build_state,
0461 .destroy_state = ioam6_destroy_state,
0462 .output = ioam6_output,
0463 .fill_encap = ioam6_fill_encap_info,
0464 .get_encap_size = ioam6_encap_nlsize,
0465 .cmp_encap = ioam6_encap_cmp,
0466 .owner = THIS_MODULE,
0467 };
0468
0469 int __init ioam6_iptunnel_init(void)
0470 {
0471 return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
0472 }
0473
0474 void ioam6_iptunnel_exit(void)
0475 {
0476 lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
0477 }