0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <linux/err.h>
0018 #include <linux/slab.h>
0019 #include <linux/kmod.h>
0020 #include <linux/list.h>
0021 #include <linux/spinlock.h>
0022 #include <linux/workqueue.h>
0023 #include <linux/notifier.h>
0024 #include <linux/netdevice.h>
0025 #include <linux/netfilter.h>
0026 #include <linux/module.h>
0027 #include <linux/cache.h>
0028 #include <linux/cpu.h>
0029 #include <linux/audit.h>
0030 #include <linux/rhashtable.h>
0031 #include <linux/if_tunnel.h>
0032 #include <net/dst.h>
0033 #include <net/flow.h>
0034 #include <net/inet_ecn.h>
0035 #include <net/xfrm.h>
0036 #include <net/ip.h>
0037 #include <net/gre.h>
0038 #if IS_ENABLED(CONFIG_IPV6_MIP6)
0039 #include <net/mip6.h>
0040 #endif
0041 #ifdef CONFIG_XFRM_STATISTICS
0042 #include <net/snmp.h>
0043 #endif
0044 #ifdef CONFIG_XFRM_ESPINTCP
0045 #include <net/espintcp.h>
0046 #endif
0047
0048 #include "xfrm_hash.h"
0049
0050 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
0051 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
0052 #define XFRM_MAX_QUEUE_LEN 100
0053
0054 struct xfrm_flo {
0055 struct dst_entry *dst_orig;
0056 u8 flags;
0057 };
0058
0059
0060 #define INEXACT_PREFIXLEN_IPV4 16
0061 #define INEXACT_PREFIXLEN_IPV6 48
0062
0063 struct xfrm_pol_inexact_node {
0064 struct rb_node node;
0065 union {
0066 xfrm_address_t addr;
0067 struct rcu_head rcu;
0068 };
0069 u8 prefixlen;
0070
0071 struct rb_root root;
0072
0073
0074 struct hlist_head hhead;
0075 };
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114 struct xfrm_pol_inexact_key {
0115 possible_net_t net;
0116 u32 if_id;
0117 u16 family;
0118 u8 dir, type;
0119 };
0120
0121 struct xfrm_pol_inexact_bin {
0122 struct xfrm_pol_inexact_key k;
0123 struct rhash_head head;
0124
0125 struct hlist_head hhead;
0126
0127 seqcount_spinlock_t count;
0128
0129 struct rb_root root_d;
0130
0131
0132 struct rb_root root_s;
0133
0134
0135 struct list_head inexact_bins;
0136 struct rcu_head rcu;
0137 };
0138
0139 enum xfrm_pol_inexact_candidate_type {
0140 XFRM_POL_CAND_BOTH,
0141 XFRM_POL_CAND_SADDR,
0142 XFRM_POL_CAND_DADDR,
0143 XFRM_POL_CAND_ANY,
0144
0145 XFRM_POL_CAND_MAX,
0146 };
0147
0148 struct xfrm_pol_inexact_candidates {
0149 struct hlist_head *res[XFRM_POL_CAND_MAX];
0150 };
0151
0152 static DEFINE_SPINLOCK(xfrm_if_cb_lock);
0153 static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
0154
0155 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
0156 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
0157 __read_mostly;
0158
0159 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
0160
0161 static struct rhashtable xfrm_policy_inexact_table;
0162 static const struct rhashtable_params xfrm_pol_inexact_params;
0163
0164 static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
0165 static int stale_bundle(struct dst_entry *dst);
0166 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
0167 static void xfrm_policy_queue_process(struct timer_list *t);
0168
0169 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
0170 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
0171 int dir);
0172
0173 static struct xfrm_pol_inexact_bin *
0174 xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
0175 u32 if_id);
0176
0177 static struct xfrm_pol_inexact_bin *
0178 xfrm_policy_inexact_lookup_rcu(struct net *net,
0179 u8 type, u16 family, u8 dir, u32 if_id);
0180 static struct xfrm_policy *
0181 xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
0182 bool excl);
0183 static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
0184 struct xfrm_policy *policy);
0185
0186 static bool
0187 xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
0188 struct xfrm_pol_inexact_bin *b,
0189 const xfrm_address_t *saddr,
0190 const xfrm_address_t *daddr);
0191
0192 static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
0193 {
0194 return refcount_inc_not_zero(&policy->refcnt);
0195 }
0196
0197 static inline bool
0198 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
0199 {
0200 const struct flowi4 *fl4 = &fl->u.ip4;
0201
0202 return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
0203 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
0204 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
0205 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
0206 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
0207 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
0208 }
0209
0210 static inline bool
0211 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
0212 {
0213 const struct flowi6 *fl6 = &fl->u.ip6;
0214
0215 return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
0216 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
0217 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
0218 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
0219 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
0220 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
0221 }
0222
0223 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
0224 unsigned short family)
0225 {
0226 switch (family) {
0227 case AF_INET:
0228 return __xfrm4_selector_match(sel, fl);
0229 case AF_INET6:
0230 return __xfrm6_selector_match(sel, fl);
0231 }
0232 return false;
0233 }
0234
0235 static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
0236 {
0237 const struct xfrm_policy_afinfo *afinfo;
0238
0239 if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo)))
0240 return NULL;
0241 rcu_read_lock();
0242 afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
0243 if (unlikely(!afinfo))
0244 rcu_read_unlock();
0245 return afinfo;
0246 }
0247
0248
0249 static const struct xfrm_if_cb *xfrm_if_get_cb(void)
0250 {
0251 return rcu_dereference(xfrm_if_cb);
0252 }
0253
0254 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
0255 const xfrm_address_t *saddr,
0256 const xfrm_address_t *daddr,
0257 int family, u32 mark)
0258 {
0259 const struct xfrm_policy_afinfo *afinfo;
0260 struct dst_entry *dst;
0261
0262 afinfo = xfrm_policy_get_afinfo(family);
0263 if (unlikely(afinfo == NULL))
0264 return ERR_PTR(-EAFNOSUPPORT);
0265
0266 dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
0267
0268 rcu_read_unlock();
0269
0270 return dst;
0271 }
0272 EXPORT_SYMBOL(__xfrm_dst_lookup);
0273
0274 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
0275 int tos, int oif,
0276 xfrm_address_t *prev_saddr,
0277 xfrm_address_t *prev_daddr,
0278 int family, u32 mark)
0279 {
0280 struct net *net = xs_net(x);
0281 xfrm_address_t *saddr = &x->props.saddr;
0282 xfrm_address_t *daddr = &x->id.daddr;
0283 struct dst_entry *dst;
0284
0285 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
0286 saddr = x->coaddr;
0287 daddr = prev_daddr;
0288 }
0289 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
0290 saddr = prev_saddr;
0291 daddr = x->coaddr;
0292 }
0293
0294 dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
0295
0296 if (!IS_ERR(dst)) {
0297 if (prev_saddr != saddr)
0298 memcpy(prev_saddr, saddr, sizeof(*prev_saddr));
0299 if (prev_daddr != daddr)
0300 memcpy(prev_daddr, daddr, sizeof(*prev_daddr));
0301 }
0302
0303 return dst;
0304 }
0305
0306 static inline unsigned long make_jiffies(long secs)
0307 {
0308 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
0309 return MAX_SCHEDULE_TIMEOUT-1;
0310 else
0311 return secs*HZ;
0312 }
0313
0314 static void xfrm_policy_timer(struct timer_list *t)
0315 {
0316 struct xfrm_policy *xp = from_timer(xp, t, timer);
0317 time64_t now = ktime_get_real_seconds();
0318 time64_t next = TIME64_MAX;
0319 int warn = 0;
0320 int dir;
0321
0322 read_lock(&xp->lock);
0323
0324 if (unlikely(xp->walk.dead))
0325 goto out;
0326
0327 dir = xfrm_policy_id2dir(xp->index);
0328
0329 if (xp->lft.hard_add_expires_seconds) {
0330 time64_t tmo = xp->lft.hard_add_expires_seconds +
0331 xp->curlft.add_time - now;
0332 if (tmo <= 0)
0333 goto expired;
0334 if (tmo < next)
0335 next = tmo;
0336 }
0337 if (xp->lft.hard_use_expires_seconds) {
0338 time64_t tmo = xp->lft.hard_use_expires_seconds +
0339 (xp->curlft.use_time ? : xp->curlft.add_time) - now;
0340 if (tmo <= 0)
0341 goto expired;
0342 if (tmo < next)
0343 next = tmo;
0344 }
0345 if (xp->lft.soft_add_expires_seconds) {
0346 time64_t tmo = xp->lft.soft_add_expires_seconds +
0347 xp->curlft.add_time - now;
0348 if (tmo <= 0) {
0349 warn = 1;
0350 tmo = XFRM_KM_TIMEOUT;
0351 }
0352 if (tmo < next)
0353 next = tmo;
0354 }
0355 if (xp->lft.soft_use_expires_seconds) {
0356 time64_t tmo = xp->lft.soft_use_expires_seconds +
0357 (xp->curlft.use_time ? : xp->curlft.add_time) - now;
0358 if (tmo <= 0) {
0359 warn = 1;
0360 tmo = XFRM_KM_TIMEOUT;
0361 }
0362 if (tmo < next)
0363 next = tmo;
0364 }
0365
0366 if (warn)
0367 km_policy_expired(xp, dir, 0, 0);
0368 if (next != TIME64_MAX &&
0369 !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
0370 xfrm_pol_hold(xp);
0371
0372 out:
0373 read_unlock(&xp->lock);
0374 xfrm_pol_put(xp);
0375 return;
0376
0377 expired:
0378 read_unlock(&xp->lock);
0379 if (!xfrm_policy_delete(xp, dir))
0380 km_policy_expired(xp, dir, 1, 0);
0381 xfrm_pol_put(xp);
0382 }
0383
0384
0385
0386
0387
0388 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
0389 {
0390 struct xfrm_policy *policy;
0391
0392 policy = kzalloc(sizeof(struct xfrm_policy), gfp);
0393
0394 if (policy) {
0395 write_pnet(&policy->xp_net, net);
0396 INIT_LIST_HEAD(&policy->walk.all);
0397 INIT_HLIST_NODE(&policy->bydst_inexact_list);
0398 INIT_HLIST_NODE(&policy->bydst);
0399 INIT_HLIST_NODE(&policy->byidx);
0400 rwlock_init(&policy->lock);
0401 refcount_set(&policy->refcnt, 1);
0402 skb_queue_head_init(&policy->polq.hold_queue);
0403 timer_setup(&policy->timer, xfrm_policy_timer, 0);
0404 timer_setup(&policy->polq.hold_timer,
0405 xfrm_policy_queue_process, 0);
0406 }
0407 return policy;
0408 }
0409 EXPORT_SYMBOL(xfrm_policy_alloc);
0410
0411 static void xfrm_policy_destroy_rcu(struct rcu_head *head)
0412 {
0413 struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
0414
0415 security_xfrm_policy_free(policy->security);
0416 kfree(policy);
0417 }
0418
0419
0420
0421 void xfrm_policy_destroy(struct xfrm_policy *policy)
0422 {
0423 BUG_ON(!policy->walk.dead);
0424
0425 if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
0426 BUG();
0427
0428 call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
0429 }
0430 EXPORT_SYMBOL(xfrm_policy_destroy);
0431
0432
0433
0434
0435
0436 static void xfrm_policy_kill(struct xfrm_policy *policy)
0437 {
0438 write_lock_bh(&policy->lock);
0439 policy->walk.dead = 1;
0440 write_unlock_bh(&policy->lock);
0441
0442 atomic_inc(&policy->genid);
0443
0444 if (del_timer(&policy->polq.hold_timer))
0445 xfrm_pol_put(policy);
0446 skb_queue_purge(&policy->polq.hold_queue);
0447
0448 if (del_timer(&policy->timer))
0449 xfrm_pol_put(policy);
0450
0451 xfrm_pol_put(policy);
0452 }
0453
0454 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
0455
0456 static inline unsigned int idx_hash(struct net *net, u32 index)
0457 {
0458 return __idx_hash(index, net->xfrm.policy_idx_hmask);
0459 }
0460
0461
0462 static void __get_hash_thresh(struct net *net,
0463 unsigned short family, int dir,
0464 u8 *dbits, u8 *sbits)
0465 {
0466 switch (family) {
0467 case AF_INET:
0468 *dbits = net->xfrm.policy_bydst[dir].dbits4;
0469 *sbits = net->xfrm.policy_bydst[dir].sbits4;
0470 break;
0471
0472 case AF_INET6:
0473 *dbits = net->xfrm.policy_bydst[dir].dbits6;
0474 *sbits = net->xfrm.policy_bydst[dir].sbits6;
0475 break;
0476
0477 default:
0478 *dbits = 0;
0479 *sbits = 0;
0480 }
0481 }
0482
0483 static struct hlist_head *policy_hash_bysel(struct net *net,
0484 const struct xfrm_selector *sel,
0485 unsigned short family, int dir)
0486 {
0487 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
0488 unsigned int hash;
0489 u8 dbits;
0490 u8 sbits;
0491
0492 __get_hash_thresh(net, family, dir, &dbits, &sbits);
0493 hash = __sel_hash(sel, family, hmask, dbits, sbits);
0494
0495 if (hash == hmask + 1)
0496 return NULL;
0497
0498 return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
0499 lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
0500 }
0501
0502 static struct hlist_head *policy_hash_direct(struct net *net,
0503 const xfrm_address_t *daddr,
0504 const xfrm_address_t *saddr,
0505 unsigned short family, int dir)
0506 {
0507 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
0508 unsigned int hash;
0509 u8 dbits;
0510 u8 sbits;
0511
0512 __get_hash_thresh(net, family, dir, &dbits, &sbits);
0513 hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
0514
0515 return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
0516 lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
0517 }
0518
0519 static void xfrm_dst_hash_transfer(struct net *net,
0520 struct hlist_head *list,
0521 struct hlist_head *ndsttable,
0522 unsigned int nhashmask,
0523 int dir)
0524 {
0525 struct hlist_node *tmp, *entry0 = NULL;
0526 struct xfrm_policy *pol;
0527 unsigned int h0 = 0;
0528 u8 dbits;
0529 u8 sbits;
0530
0531 redo:
0532 hlist_for_each_entry_safe(pol, tmp, list, bydst) {
0533 unsigned int h;
0534
0535 __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
0536 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
0537 pol->family, nhashmask, dbits, sbits);
0538 if (!entry0) {
0539 hlist_del_rcu(&pol->bydst);
0540 hlist_add_head_rcu(&pol->bydst, ndsttable + h);
0541 h0 = h;
0542 } else {
0543 if (h != h0)
0544 continue;
0545 hlist_del_rcu(&pol->bydst);
0546 hlist_add_behind_rcu(&pol->bydst, entry0);
0547 }
0548 entry0 = &pol->bydst;
0549 }
0550 if (!hlist_empty(list)) {
0551 entry0 = NULL;
0552 goto redo;
0553 }
0554 }
0555
0556 static void xfrm_idx_hash_transfer(struct hlist_head *list,
0557 struct hlist_head *nidxtable,
0558 unsigned int nhashmask)
0559 {
0560 struct hlist_node *tmp;
0561 struct xfrm_policy *pol;
0562
0563 hlist_for_each_entry_safe(pol, tmp, list, byidx) {
0564 unsigned int h;
0565
0566 h = __idx_hash(pol->index, nhashmask);
0567 hlist_add_head(&pol->byidx, nidxtable+h);
0568 }
0569 }
0570
0571 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
0572 {
0573 return ((old_hmask + 1) << 1) - 1;
0574 }
0575
0576 static void xfrm_bydst_resize(struct net *net, int dir)
0577 {
0578 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
0579 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
0580 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
0581 struct hlist_head *ndst = xfrm_hash_alloc(nsize);
0582 struct hlist_head *odst;
0583 int i;
0584
0585 if (!ndst)
0586 return;
0587
0588 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
0589 write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
0590
0591 odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
0592 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
0593
0594 for (i = hmask; i >= 0; i--)
0595 xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
0596
0597 rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
0598 net->xfrm.policy_bydst[dir].hmask = nhashmask;
0599
0600 write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
0601 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
0602
0603 synchronize_rcu();
0604
0605 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
0606 }
0607
0608 static void xfrm_byidx_resize(struct net *net, int total)
0609 {
0610 unsigned int hmask = net->xfrm.policy_idx_hmask;
0611 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
0612 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
0613 struct hlist_head *oidx = net->xfrm.policy_byidx;
0614 struct hlist_head *nidx = xfrm_hash_alloc(nsize);
0615 int i;
0616
0617 if (!nidx)
0618 return;
0619
0620 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
0621
0622 for (i = hmask; i >= 0; i--)
0623 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
0624
0625 net->xfrm.policy_byidx = nidx;
0626 net->xfrm.policy_idx_hmask = nhashmask;
0627
0628 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
0629
0630 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
0631 }
0632
0633 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
0634 {
0635 unsigned int cnt = net->xfrm.policy_count[dir];
0636 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
0637
0638 if (total)
0639 *total += cnt;
0640
0641 if ((hmask + 1) < xfrm_policy_hashmax &&
0642 cnt > hmask)
0643 return 1;
0644
0645 return 0;
0646 }
0647
0648 static inline int xfrm_byidx_should_resize(struct net *net, int total)
0649 {
0650 unsigned int hmask = net->xfrm.policy_idx_hmask;
0651
0652 if ((hmask + 1) < xfrm_policy_hashmax &&
0653 total > hmask)
0654 return 1;
0655
0656 return 0;
0657 }
0658
0659 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
0660 {
0661 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
0662 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
0663 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
0664 si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
0665 si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
0666 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
0667 si->spdhcnt = net->xfrm.policy_idx_hmask;
0668 si->spdhmcnt = xfrm_policy_hashmax;
0669 }
0670 EXPORT_SYMBOL(xfrm_spd_getinfo);
0671
0672 static DEFINE_MUTEX(hash_resize_mutex);
0673 static void xfrm_hash_resize(struct work_struct *work)
0674 {
0675 struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
0676 int dir, total;
0677
0678 mutex_lock(&hash_resize_mutex);
0679
0680 total = 0;
0681 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
0682 if (xfrm_bydst_should_resize(net, dir, &total))
0683 xfrm_bydst_resize(net, dir);
0684 }
0685 if (xfrm_byidx_should_resize(net, total))
0686 xfrm_byidx_resize(net, total);
0687
0688 mutex_unlock(&hash_resize_mutex);
0689 }
0690
0691
0692
0693
0694
0695 static struct xfrm_pol_inexact_bin *
0696 xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
0697 {
0698 struct xfrm_pol_inexact_bin *bin, *prev;
0699 struct xfrm_pol_inexact_key k = {
0700 .family = pol->family,
0701 .type = pol->type,
0702 .dir = dir,
0703 .if_id = pol->if_id,
0704 };
0705 struct net *net = xp_net(pol);
0706
0707 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
0708
0709 write_pnet(&k.net, net);
0710 bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
0711 xfrm_pol_inexact_params);
0712 if (bin)
0713 return bin;
0714
0715 bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
0716 if (!bin)
0717 return NULL;
0718
0719 bin->k = k;
0720 INIT_HLIST_HEAD(&bin->hhead);
0721 bin->root_d = RB_ROOT;
0722 bin->root_s = RB_ROOT;
0723 seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
0724
0725 prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
0726 &bin->k, &bin->head,
0727 xfrm_pol_inexact_params);
0728 if (!prev) {
0729 list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
0730 return bin;
0731 }
0732
0733 kfree(bin);
0734
0735 return IS_ERR(prev) ? NULL : prev;
0736 }
0737
0738 static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
0739 int family, u8 prefixlen)
0740 {
0741 if (xfrm_addr_any(addr, family))
0742 return true;
0743
0744 if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
0745 return true;
0746
0747 if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
0748 return true;
0749
0750 return false;
0751 }
0752
0753 static bool
0754 xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
0755 {
0756 const xfrm_address_t *addr;
0757 bool saddr_any, daddr_any;
0758 u8 prefixlen;
0759
0760 addr = &policy->selector.saddr;
0761 prefixlen = policy->selector.prefixlen_s;
0762
0763 saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
0764 policy->family,
0765 prefixlen);
0766 addr = &policy->selector.daddr;
0767 prefixlen = policy->selector.prefixlen_d;
0768 daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
0769 policy->family,
0770 prefixlen);
0771 return saddr_any && daddr_any;
0772 }
0773
0774 static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
0775 const xfrm_address_t *addr, u8 prefixlen)
0776 {
0777 node->addr = *addr;
0778 node->prefixlen = prefixlen;
0779 }
0780
0781 static struct xfrm_pol_inexact_node *
0782 xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
0783 {
0784 struct xfrm_pol_inexact_node *node;
0785
0786 node = kzalloc(sizeof(*node), GFP_ATOMIC);
0787 if (node)
0788 xfrm_pol_inexact_node_init(node, addr, prefixlen);
0789
0790 return node;
0791 }
0792
0793 static int xfrm_policy_addr_delta(const xfrm_address_t *a,
0794 const xfrm_address_t *b,
0795 u8 prefixlen, u16 family)
0796 {
0797 u32 ma, mb, mask;
0798 unsigned int pdw, pbi;
0799 int delta = 0;
0800
0801 switch (family) {
0802 case AF_INET:
0803 if (prefixlen == 0)
0804 return 0;
0805 mask = ~0U << (32 - prefixlen);
0806 ma = ntohl(a->a4) & mask;
0807 mb = ntohl(b->a4) & mask;
0808 if (ma < mb)
0809 delta = -1;
0810 else if (ma > mb)
0811 delta = 1;
0812 break;
0813 case AF_INET6:
0814 pdw = prefixlen >> 5;
0815 pbi = prefixlen & 0x1f;
0816
0817 if (pdw) {
0818 delta = memcmp(a->a6, b->a6, pdw << 2);
0819 if (delta)
0820 return delta;
0821 }
0822 if (pbi) {
0823 mask = ~0U << (32 - pbi);
0824 ma = ntohl(a->a6[pdw]) & mask;
0825 mb = ntohl(b->a6[pdw]) & mask;
0826 if (ma < mb)
0827 delta = -1;
0828 else if (ma > mb)
0829 delta = 1;
0830 }
0831 break;
0832 default:
0833 break;
0834 }
0835
0836 return delta;
0837 }
0838
0839 static void xfrm_policy_inexact_list_reinsert(struct net *net,
0840 struct xfrm_pol_inexact_node *n,
0841 u16 family)
0842 {
0843 unsigned int matched_s, matched_d;
0844 struct xfrm_policy *policy, *p;
0845
0846 matched_s = 0;
0847 matched_d = 0;
0848
0849 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
0850 struct hlist_node *newpos = NULL;
0851 bool matches_s, matches_d;
0852
0853 if (!policy->bydst_reinsert)
0854 continue;
0855
0856 WARN_ON_ONCE(policy->family != family);
0857
0858 policy->bydst_reinsert = false;
0859 hlist_for_each_entry(p, &n->hhead, bydst) {
0860 if (policy->priority > p->priority)
0861 newpos = &p->bydst;
0862 else if (policy->priority == p->priority &&
0863 policy->pos > p->pos)
0864 newpos = &p->bydst;
0865 else
0866 break;
0867 }
0868
0869 if (newpos)
0870 hlist_add_behind_rcu(&policy->bydst, newpos);
0871 else
0872 hlist_add_head_rcu(&policy->bydst, &n->hhead);
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882 matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
0883 &n->addr,
0884 n->prefixlen,
0885 family) == 0;
0886 matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
0887 &n->addr,
0888 n->prefixlen,
0889 family) == 0;
0890 if (matches_s && matches_d)
0891 continue;
0892
0893 WARN_ON_ONCE(!matches_s && !matches_d);
0894 if (matches_s)
0895 matched_s++;
0896 if (matches_d)
0897 matched_d++;
0898 WARN_ON_ONCE(matched_s && matched_d);
0899 }
0900 }
0901
0902 static void xfrm_policy_inexact_node_reinsert(struct net *net,
0903 struct xfrm_pol_inexact_node *n,
0904 struct rb_root *new,
0905 u16 family)
0906 {
0907 struct xfrm_pol_inexact_node *node;
0908 struct rb_node **p, *parent;
0909
0910
0911 WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
0912 restart:
0913 parent = NULL;
0914 p = &new->rb_node;
0915 while (*p) {
0916 u8 prefixlen;
0917 int delta;
0918
0919 parent = *p;
0920 node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
0921
0922 prefixlen = min(node->prefixlen, n->prefixlen);
0923
0924 delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
0925 prefixlen, family);
0926 if (delta < 0) {
0927 p = &parent->rb_left;
0928 } else if (delta > 0) {
0929 p = &parent->rb_right;
0930 } else {
0931 bool same_prefixlen = node->prefixlen == n->prefixlen;
0932 struct xfrm_policy *tmp;
0933
0934 hlist_for_each_entry(tmp, &n->hhead, bydst) {
0935 tmp->bydst_reinsert = true;
0936 hlist_del_rcu(&tmp->bydst);
0937 }
0938
0939 node->prefixlen = prefixlen;
0940
0941 xfrm_policy_inexact_list_reinsert(net, node, family);
0942
0943 if (same_prefixlen) {
0944 kfree_rcu(n, rcu);
0945 return;
0946 }
0947
0948 rb_erase(*p, new);
0949 kfree_rcu(n, rcu);
0950 n = node;
0951 goto restart;
0952 }
0953 }
0954
0955 rb_link_node_rcu(&n->node, parent, p);
0956 rb_insert_color(&n->node, new);
0957 }
0958
0959
0960 static void xfrm_policy_inexact_node_merge(struct net *net,
0961 struct xfrm_pol_inexact_node *v,
0962 struct xfrm_pol_inexact_node *n,
0963 u16 family)
0964 {
0965 struct xfrm_pol_inexact_node *node;
0966 struct xfrm_policy *tmp;
0967 struct rb_node *rnode;
0968
0969
0970
0971
0972
0973 while ((rnode = rb_first(&v->root)) != NULL) {
0974 node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
0975 rb_erase(&node->node, &v->root);
0976 xfrm_policy_inexact_node_reinsert(net, node, &n->root,
0977 family);
0978 }
0979
0980 hlist_for_each_entry(tmp, &v->hhead, bydst) {
0981 tmp->bydst_reinsert = true;
0982 hlist_del_rcu(&tmp->bydst);
0983 }
0984
0985 xfrm_policy_inexact_list_reinsert(net, n, family);
0986 }
0987
0988 static struct xfrm_pol_inexact_node *
0989 xfrm_policy_inexact_insert_node(struct net *net,
0990 struct rb_root *root,
0991 xfrm_address_t *addr,
0992 u16 family, u8 prefixlen, u8 dir)
0993 {
0994 struct xfrm_pol_inexact_node *cached = NULL;
0995 struct rb_node **p, *parent = NULL;
0996 struct xfrm_pol_inexact_node *node;
0997
0998 p = &root->rb_node;
0999 while (*p) {
1000 int delta;
1001
1002 parent = *p;
1003 node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
1004
1005 delta = xfrm_policy_addr_delta(addr, &node->addr,
1006 node->prefixlen,
1007 family);
1008 if (delta == 0 && prefixlen >= node->prefixlen) {
1009 WARN_ON_ONCE(cached);
1010 return node;
1011 }
1012
1013 if (delta < 0)
1014 p = &parent->rb_left;
1015 else
1016 p = &parent->rb_right;
1017
1018 if (prefixlen < node->prefixlen) {
1019 delta = xfrm_policy_addr_delta(addr, &node->addr,
1020 prefixlen,
1021 family);
1022 if (delta)
1023 continue;
1024
1025
1026
1027
1028
1029
1030 rb_erase(&node->node, root);
1031
1032 if (!cached) {
1033 xfrm_pol_inexact_node_init(node, addr,
1034 prefixlen);
1035 cached = node;
1036 } else {
1037
1038
1039
1040
1041 xfrm_policy_inexact_node_merge(net, node,
1042 cached, family);
1043 kfree_rcu(node, rcu);
1044 }
1045
1046
1047 p = &root->rb_node;
1048 parent = NULL;
1049 }
1050 }
1051
1052 node = cached;
1053 if (!node) {
1054 node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
1055 if (!node)
1056 return NULL;
1057 }
1058
1059 rb_link_node_rcu(&node->node, parent, p);
1060 rb_insert_color(&node->node, root);
1061
1062 return node;
1063 }
1064
1065 static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
1066 {
1067 struct xfrm_pol_inexact_node *node;
1068 struct rb_node *rn = rb_first(r);
1069
1070 while (rn) {
1071 node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
1072
1073 xfrm_policy_inexact_gc_tree(&node->root, rm);
1074 rn = rb_next(rn);
1075
1076 if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
1077 WARN_ON_ONCE(rm);
1078 continue;
1079 }
1080
1081 rb_erase(&node->node, r);
1082 kfree_rcu(node, rcu);
1083 }
1084 }
1085
1086 static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
1087 {
1088 write_seqcount_begin(&b->count);
1089 xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
1090 xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
1091 write_seqcount_end(&b->count);
1092
1093 if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
1094 !hlist_empty(&b->hhead)) {
1095 WARN_ON_ONCE(net_exit);
1096 return;
1097 }
1098
1099 if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
1100 xfrm_pol_inexact_params) == 0) {
1101 list_del(&b->inexact_bins);
1102 kfree_rcu(b, rcu);
1103 }
1104 }
1105
1106 static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
1107 {
1108 struct net *net = read_pnet(&b->k.net);
1109
1110 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1111 __xfrm_policy_inexact_prune_bin(b, false);
1112 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1113 }
1114
1115 static void __xfrm_policy_inexact_flush(struct net *net)
1116 {
1117 struct xfrm_pol_inexact_bin *bin, *t;
1118
1119 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
1120
1121 list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
1122 __xfrm_policy_inexact_prune_bin(bin, false);
1123 }
1124
1125 static struct hlist_head *
1126 xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
1127 struct xfrm_policy *policy, u8 dir)
1128 {
1129 struct xfrm_pol_inexact_node *n;
1130 struct net *net;
1131
1132 net = xp_net(policy);
1133 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
1134
1135 if (xfrm_policy_inexact_insert_use_any_list(policy))
1136 return &bin->hhead;
1137
1138 if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
1139 policy->family,
1140 policy->selector.prefixlen_d)) {
1141 write_seqcount_begin(&bin->count);
1142 n = xfrm_policy_inexact_insert_node(net,
1143 &bin->root_s,
1144 &policy->selector.saddr,
1145 policy->family,
1146 policy->selector.prefixlen_s,
1147 dir);
1148 write_seqcount_end(&bin->count);
1149 if (!n)
1150 return NULL;
1151
1152 return &n->hhead;
1153 }
1154
1155
1156 write_seqcount_begin(&bin->count);
1157 n = xfrm_policy_inexact_insert_node(net,
1158 &bin->root_d,
1159 &policy->selector.daddr,
1160 policy->family,
1161 policy->selector.prefixlen_d, dir);
1162 write_seqcount_end(&bin->count);
1163 if (!n)
1164 return NULL;
1165
1166
1167 if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
1168 policy->family,
1169 policy->selector.prefixlen_s))
1170 return &n->hhead;
1171
1172 write_seqcount_begin(&bin->count);
1173 n = xfrm_policy_inexact_insert_node(net,
1174 &n->root,
1175 &policy->selector.saddr,
1176 policy->family,
1177 policy->selector.prefixlen_s, dir);
1178 write_seqcount_end(&bin->count);
1179 if (!n)
1180 return NULL;
1181
1182 return &n->hhead;
1183 }
1184
1185 static struct xfrm_policy *
1186 xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
1187 {
1188 struct xfrm_pol_inexact_bin *bin;
1189 struct xfrm_policy *delpol;
1190 struct hlist_head *chain;
1191 struct net *net;
1192
1193 bin = xfrm_policy_inexact_alloc_bin(policy, dir);
1194 if (!bin)
1195 return ERR_PTR(-ENOMEM);
1196
1197 net = xp_net(policy);
1198 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
1199
1200 chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
1201 if (!chain) {
1202 __xfrm_policy_inexact_prune_bin(bin, false);
1203 return ERR_PTR(-ENOMEM);
1204 }
1205
1206 delpol = xfrm_policy_insert_list(chain, policy, excl);
1207 if (delpol && excl) {
1208 __xfrm_policy_inexact_prune_bin(bin, false);
1209 return ERR_PTR(-EEXIST);
1210 }
1211
1212 chain = &net->xfrm.policy_inexact[dir];
1213 xfrm_policy_insert_inexact_list(chain, policy);
1214
1215 if (delpol)
1216 __xfrm_policy_inexact_prune_bin(bin, false);
1217
1218 return delpol;
1219 }
1220
1221 static void xfrm_hash_rebuild(struct work_struct *work)
1222 {
1223 struct net *net = container_of(work, struct net,
1224 xfrm.policy_hthresh.work);
1225 unsigned int hmask;
1226 struct xfrm_policy *pol;
1227 struct xfrm_policy *policy;
1228 struct hlist_head *chain;
1229 struct hlist_head *odst;
1230 struct hlist_node *newpos;
1231 int i;
1232 int dir;
1233 unsigned seq;
1234 u8 lbits4, rbits4, lbits6, rbits6;
1235
1236 mutex_lock(&hash_resize_mutex);
1237
1238
1239 do {
1240 seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
1241
1242 lbits4 = net->xfrm.policy_hthresh.lbits4;
1243 rbits4 = net->xfrm.policy_hthresh.rbits4;
1244 lbits6 = net->xfrm.policy_hthresh.lbits6;
1245 rbits6 = net->xfrm.policy_hthresh.rbits6;
1246 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
1247
1248 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1249 write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
1250
1251
1252
1253
1254 list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
1255 struct xfrm_pol_inexact_bin *bin;
1256 u8 dbits, sbits;
1257
1258 dir = xfrm_policy_id2dir(policy->index);
1259 if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
1260 continue;
1261
1262 if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
1263 if (policy->family == AF_INET) {
1264 dbits = rbits4;
1265 sbits = lbits4;
1266 } else {
1267 dbits = rbits6;
1268 sbits = lbits6;
1269 }
1270 } else {
1271 if (policy->family == AF_INET) {
1272 dbits = lbits4;
1273 sbits = rbits4;
1274 } else {
1275 dbits = lbits6;
1276 sbits = rbits6;
1277 }
1278 }
1279
1280 if (policy->selector.prefixlen_d < dbits ||
1281 policy->selector.prefixlen_s < sbits)
1282 continue;
1283
1284 bin = xfrm_policy_inexact_alloc_bin(policy, dir);
1285 if (!bin)
1286 goto out_unlock;
1287
1288 if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
1289 goto out_unlock;
1290 }
1291
1292
1293 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
1294 struct hlist_node *n;
1295
1296 hlist_for_each_entry_safe(policy, n,
1297 &net->xfrm.policy_inexact[dir],
1298 bydst_inexact_list) {
1299 hlist_del_rcu(&policy->bydst);
1300 hlist_del_init(&policy->bydst_inexact_list);
1301 }
1302
1303 hmask = net->xfrm.policy_bydst[dir].hmask;
1304 odst = net->xfrm.policy_bydst[dir].table;
1305 for (i = hmask; i >= 0; i--) {
1306 hlist_for_each_entry_safe(policy, n, odst + i, bydst)
1307 hlist_del_rcu(&policy->bydst);
1308 }
1309 if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
1310
1311 net->xfrm.policy_bydst[dir].dbits4 = rbits4;
1312 net->xfrm.policy_bydst[dir].sbits4 = lbits4;
1313 net->xfrm.policy_bydst[dir].dbits6 = rbits6;
1314 net->xfrm.policy_bydst[dir].sbits6 = lbits6;
1315 } else {
1316
1317 net->xfrm.policy_bydst[dir].dbits4 = lbits4;
1318 net->xfrm.policy_bydst[dir].sbits4 = rbits4;
1319 net->xfrm.policy_bydst[dir].dbits6 = lbits6;
1320 net->xfrm.policy_bydst[dir].sbits6 = rbits6;
1321 }
1322 }
1323
1324
1325 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
1326 if (policy->walk.dead)
1327 continue;
1328 dir = xfrm_policy_id2dir(policy->index);
1329 if (dir >= XFRM_POLICY_MAX) {
1330
1331 continue;
1332 }
1333 newpos = NULL;
1334 chain = policy_hash_bysel(net, &policy->selector,
1335 policy->family, dir);
1336
1337 if (!chain) {
1338 void *p = xfrm_policy_inexact_insert(policy, dir, 0);
1339
1340 WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
1341 continue;
1342 }
1343
1344 hlist_for_each_entry(pol, chain, bydst) {
1345 if (policy->priority >= pol->priority)
1346 newpos = &pol->bydst;
1347 else
1348 break;
1349 }
1350 if (newpos)
1351 hlist_add_behind_rcu(&policy->bydst, newpos);
1352 else
1353 hlist_add_head_rcu(&policy->bydst, chain);
1354 }
1355
1356 out_unlock:
1357 __xfrm_policy_inexact_flush(net);
1358 write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
1359 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1360
1361 mutex_unlock(&hash_resize_mutex);
1362 }
1363
1364 void xfrm_policy_hash_rebuild(struct net *net)
1365 {
1366 schedule_work(&net->xfrm.policy_hthresh.work);
1367 }
1368 EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
1369
1370
1371
1372 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
1373 {
1374 static u32 idx_generator;
1375
1376 for (;;) {
1377 struct hlist_head *list;
1378 struct xfrm_policy *p;
1379 u32 idx;
1380 int found;
1381
1382 if (!index) {
1383 idx = (idx_generator | dir);
1384 idx_generator += 8;
1385 } else {
1386 idx = index;
1387 index = 0;
1388 }
1389
1390 if (idx == 0)
1391 idx = 8;
1392 list = net->xfrm.policy_byidx + idx_hash(net, idx);
1393 found = 0;
1394 hlist_for_each_entry(p, list, byidx) {
1395 if (p->index == idx) {
1396 found = 1;
1397 break;
1398 }
1399 }
1400 if (!found)
1401 return idx;
1402 }
1403 }
1404
1405 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
1406 {
1407 u32 *p1 = (u32 *) s1;
1408 u32 *p2 = (u32 *) s2;
1409 int len = sizeof(struct xfrm_selector) / sizeof(u32);
1410 int i;
1411
1412 for (i = 0; i < len; i++) {
1413 if (p1[i] != p2[i])
1414 return 1;
1415 }
1416
1417 return 0;
1418 }
1419
1420 static void xfrm_policy_requeue(struct xfrm_policy *old,
1421 struct xfrm_policy *new)
1422 {
1423 struct xfrm_policy_queue *pq = &old->polq;
1424 struct sk_buff_head list;
1425
1426 if (skb_queue_empty(&pq->hold_queue))
1427 return;
1428
1429 __skb_queue_head_init(&list);
1430
1431 spin_lock_bh(&pq->hold_queue.lock);
1432 skb_queue_splice_init(&pq->hold_queue, &list);
1433 if (del_timer(&pq->hold_timer))
1434 xfrm_pol_put(old);
1435 spin_unlock_bh(&pq->hold_queue.lock);
1436
1437 pq = &new->polq;
1438
1439 spin_lock_bh(&pq->hold_queue.lock);
1440 skb_queue_splice(&list, &pq->hold_queue);
1441 pq->timeout = XFRM_QUEUE_TMO_MIN;
1442 if (!mod_timer(&pq->hold_timer, jiffies))
1443 xfrm_pol_hold(new);
1444 spin_unlock_bh(&pq->hold_queue.lock);
1445 }
1446
1447 static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
1448 struct xfrm_policy *pol)
1449 {
1450 return mark->v == pol->mark.v && mark->m == pol->mark.m;
1451 }
1452
1453 static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
1454 {
1455 const struct xfrm_pol_inexact_key *k = data;
1456 u32 a = k->type << 24 | k->dir << 16 | k->family;
1457
1458 return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
1459 seed);
1460 }
1461
1462 static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
1463 {
1464 const struct xfrm_pol_inexact_bin *b = data;
1465
1466 return xfrm_pol_bin_key(&b->k, 0, seed);
1467 }
1468
1469 static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
1470 const void *ptr)
1471 {
1472 const struct xfrm_pol_inexact_key *key = arg->key;
1473 const struct xfrm_pol_inexact_bin *b = ptr;
1474 int ret;
1475
1476 if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
1477 return -1;
1478
1479 ret = b->k.dir ^ key->dir;
1480 if (ret)
1481 return ret;
1482
1483 ret = b->k.type ^ key->type;
1484 if (ret)
1485 return ret;
1486
1487 ret = b->k.family ^ key->family;
1488 if (ret)
1489 return ret;
1490
1491 return b->k.if_id ^ key->if_id;
1492 }
1493
1494 static const struct rhashtable_params xfrm_pol_inexact_params = {
1495 .head_offset = offsetof(struct xfrm_pol_inexact_bin, head),
1496 .hashfn = xfrm_pol_bin_key,
1497 .obj_hashfn = xfrm_pol_bin_obj,
1498 .obj_cmpfn = xfrm_pol_bin_cmp,
1499 .automatic_shrinking = true,
1500 };
1501
1502 static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
1503 struct xfrm_policy *policy)
1504 {
1505 struct xfrm_policy *pol, *delpol = NULL;
1506 struct hlist_node *newpos = NULL;
1507 int i = 0;
1508
1509 hlist_for_each_entry(pol, chain, bydst_inexact_list) {
1510 if (pol->type == policy->type &&
1511 pol->if_id == policy->if_id &&
1512 !selector_cmp(&pol->selector, &policy->selector) &&
1513 xfrm_policy_mark_match(&policy->mark, pol) &&
1514 xfrm_sec_ctx_match(pol->security, policy->security) &&
1515 !WARN_ON(delpol)) {
1516 delpol = pol;
1517 if (policy->priority > pol->priority)
1518 continue;
1519 } else if (policy->priority >= pol->priority) {
1520 newpos = &pol->bydst_inexact_list;
1521 continue;
1522 }
1523 if (delpol)
1524 break;
1525 }
1526
1527 if (newpos)
1528 hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
1529 else
1530 hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
1531
1532 hlist_for_each_entry(pol, chain, bydst_inexact_list) {
1533 pol->pos = i;
1534 i++;
1535 }
1536 }
1537
1538 static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
1539 struct xfrm_policy *policy,
1540 bool excl)
1541 {
1542 struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
1543
1544 hlist_for_each_entry(pol, chain, bydst) {
1545 if (pol->type == policy->type &&
1546 pol->if_id == policy->if_id &&
1547 !selector_cmp(&pol->selector, &policy->selector) &&
1548 xfrm_policy_mark_match(&policy->mark, pol) &&
1549 xfrm_sec_ctx_match(pol->security, policy->security) &&
1550 !WARN_ON(delpol)) {
1551 if (excl)
1552 return ERR_PTR(-EEXIST);
1553 delpol = pol;
1554 if (policy->priority > pol->priority)
1555 continue;
1556 } else if (policy->priority >= pol->priority) {
1557 newpos = pol;
1558 continue;
1559 }
1560 if (delpol)
1561 break;
1562 }
1563
1564 if (newpos)
1565 hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
1566 else
1567 hlist_add_head_rcu(&policy->bydst, chain);
1568
1569 return delpol;
1570 }
1571
1572 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
1573 {
1574 struct net *net = xp_net(policy);
1575 struct xfrm_policy *delpol;
1576 struct hlist_head *chain;
1577
1578 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1579 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
1580 if (chain)
1581 delpol = xfrm_policy_insert_list(chain, policy, excl);
1582 else
1583 delpol = xfrm_policy_inexact_insert(policy, dir, excl);
1584
1585 if (IS_ERR(delpol)) {
1586 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1587 return PTR_ERR(delpol);
1588 }
1589
1590 __xfrm_policy_link(policy, dir);
1591
1592
1593 if (policy->family == AF_INET)
1594 rt_genid_bump_ipv4(net);
1595 else
1596 rt_genid_bump_ipv6(net);
1597
1598 if (delpol) {
1599 xfrm_policy_requeue(delpol, policy);
1600 __xfrm_policy_unlink(delpol, dir);
1601 }
1602 policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
1603 hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
1604 policy->curlft.add_time = ktime_get_real_seconds();
1605 policy->curlft.use_time = 0;
1606 if (!mod_timer(&policy->timer, jiffies + HZ))
1607 xfrm_pol_hold(policy);
1608 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1609
1610 if (delpol)
1611 xfrm_policy_kill(delpol);
1612 else if (xfrm_bydst_should_resize(net, dir, NULL))
1613 schedule_work(&net->xfrm.policy_hash_work);
1614
1615 return 0;
1616 }
1617 EXPORT_SYMBOL(xfrm_policy_insert);
1618
1619 static struct xfrm_policy *
1620 __xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
1621 u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
1622 struct xfrm_sec_ctx *ctx)
1623 {
1624 struct xfrm_policy *pol;
1625
1626 if (!chain)
1627 return NULL;
1628
1629 hlist_for_each_entry(pol, chain, bydst) {
1630 if (pol->type == type &&
1631 pol->if_id == if_id &&
1632 xfrm_policy_mark_match(mark, pol) &&
1633 !selector_cmp(sel, &pol->selector) &&
1634 xfrm_sec_ctx_match(ctx, pol->security))
1635 return pol;
1636 }
1637
1638 return NULL;
1639 }
1640
1641 struct xfrm_policy *
1642 xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
1643 u8 type, int dir, struct xfrm_selector *sel,
1644 struct xfrm_sec_ctx *ctx, int delete, int *err)
1645 {
1646 struct xfrm_pol_inexact_bin *bin = NULL;
1647 struct xfrm_policy *pol, *ret = NULL;
1648 struct hlist_head *chain;
1649
1650 *err = 0;
1651 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1652 chain = policy_hash_bysel(net, sel, sel->family, dir);
1653 if (!chain) {
1654 struct xfrm_pol_inexact_candidates cand;
1655 int i;
1656
1657 bin = xfrm_policy_inexact_lookup(net, type,
1658 sel->family, dir, if_id);
1659 if (!bin) {
1660 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1661 return NULL;
1662 }
1663
1664 if (!xfrm_policy_find_inexact_candidates(&cand, bin,
1665 &sel->saddr,
1666 &sel->daddr)) {
1667 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1668 return NULL;
1669 }
1670
1671 pol = NULL;
1672 for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
1673 struct xfrm_policy *tmp;
1674
1675 tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
1676 if_id, type, dir,
1677 sel, ctx);
1678 if (!tmp)
1679 continue;
1680
1681 if (!pol || tmp->pos < pol->pos)
1682 pol = tmp;
1683 }
1684 } else {
1685 pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
1686 sel, ctx);
1687 }
1688
1689 if (pol) {
1690 xfrm_pol_hold(pol);
1691 if (delete) {
1692 *err = security_xfrm_policy_delete(pol->security);
1693 if (*err) {
1694 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1695 return pol;
1696 }
1697 __xfrm_policy_unlink(pol, dir);
1698 }
1699 ret = pol;
1700 }
1701 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1702
1703 if (ret && delete)
1704 xfrm_policy_kill(ret);
1705 if (bin && delete)
1706 xfrm_policy_inexact_prune_bin(bin);
1707 return ret;
1708 }
1709 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
1710
1711 struct xfrm_policy *
1712 xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
1713 u8 type, int dir, u32 id, int delete, int *err)
1714 {
1715 struct xfrm_policy *pol, *ret;
1716 struct hlist_head *chain;
1717
1718 *err = -ENOENT;
1719 if (xfrm_policy_id2dir(id) != dir)
1720 return NULL;
1721
1722 *err = 0;
1723 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1724 chain = net->xfrm.policy_byidx + idx_hash(net, id);
1725 ret = NULL;
1726 hlist_for_each_entry(pol, chain, byidx) {
1727 if (pol->type == type && pol->index == id &&
1728 pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
1729 xfrm_pol_hold(pol);
1730 if (delete) {
1731 *err = security_xfrm_policy_delete(
1732 pol->security);
1733 if (*err) {
1734 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1735 return pol;
1736 }
1737 __xfrm_policy_unlink(pol, dir);
1738 }
1739 ret = pol;
1740 break;
1741 }
1742 }
1743 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1744
1745 if (ret && delete)
1746 xfrm_policy_kill(ret);
1747 return ret;
1748 }
1749 EXPORT_SYMBOL(xfrm_policy_byid);
1750
1751 #ifdef CONFIG_SECURITY_NETWORK_XFRM
1752 static inline int
1753 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
1754 {
1755 struct xfrm_policy *pol;
1756 int err = 0;
1757
1758 list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
1759 if (pol->walk.dead ||
1760 xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
1761 pol->type != type)
1762 continue;
1763
1764 err = security_xfrm_policy_delete(pol->security);
1765 if (err) {
1766 xfrm_audit_policy_delete(pol, 0, task_valid);
1767 return err;
1768 }
1769 }
1770 return err;
1771 }
1772 #else
1773 static inline int
1774 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
1775 {
1776 return 0;
1777 }
1778 #endif
1779
1780 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
1781 {
1782 int dir, err = 0, cnt = 0;
1783 struct xfrm_policy *pol;
1784
1785 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1786
1787 err = xfrm_policy_flush_secctx_check(net, type, task_valid);
1788 if (err)
1789 goto out;
1790
1791 again:
1792 list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
1793 dir = xfrm_policy_id2dir(pol->index);
1794 if (pol->walk.dead ||
1795 dir >= XFRM_POLICY_MAX ||
1796 pol->type != type)
1797 continue;
1798
1799 __xfrm_policy_unlink(pol, dir);
1800 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1801 cnt++;
1802 xfrm_audit_policy_delete(pol, 1, task_valid);
1803 xfrm_policy_kill(pol);
1804 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1805 goto again;
1806 }
1807 if (cnt)
1808 __xfrm_policy_inexact_flush(net);
1809 else
1810 err = -ESRCH;
1811 out:
1812 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1813 return err;
1814 }
1815 EXPORT_SYMBOL(xfrm_policy_flush);
1816
1817 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
1818 int (*func)(struct xfrm_policy *, int, int, void*),
1819 void *data)
1820 {
1821 struct xfrm_policy *pol;
1822 struct xfrm_policy_walk_entry *x;
1823 int error = 0;
1824
1825 if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1826 walk->type != XFRM_POLICY_TYPE_ANY)
1827 return -EINVAL;
1828
1829 if (list_empty(&walk->walk.all) && walk->seq != 0)
1830 return 0;
1831
1832 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1833 if (list_empty(&walk->walk.all))
1834 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1835 else
1836 x = list_first_entry(&walk->walk.all,
1837 struct xfrm_policy_walk_entry, all);
1838
1839 list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1840 if (x->dead)
1841 continue;
1842 pol = container_of(x, struct xfrm_policy, walk);
1843 if (walk->type != XFRM_POLICY_TYPE_ANY &&
1844 walk->type != pol->type)
1845 continue;
1846 error = func(pol, xfrm_policy_id2dir(pol->index),
1847 walk->seq, data);
1848 if (error) {
1849 list_move_tail(&walk->walk.all, &x->all);
1850 goto out;
1851 }
1852 walk->seq++;
1853 }
1854 if (walk->seq == 0) {
1855 error = -ENOENT;
1856 goto out;
1857 }
1858 list_del_init(&walk->walk.all);
1859 out:
1860 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1861 return error;
1862 }
1863 EXPORT_SYMBOL(xfrm_policy_walk);
1864
1865 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1866 {
1867 INIT_LIST_HEAD(&walk->walk.all);
1868 walk->walk.dead = 1;
1869 walk->type = type;
1870 walk->seq = 0;
1871 }
1872 EXPORT_SYMBOL(xfrm_policy_walk_init);
1873
1874 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1875 {
1876 if (list_empty(&walk->walk.all))
1877 return;
1878
1879 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1880 list_del(&walk->walk.all);
1881 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1882 }
1883 EXPORT_SYMBOL(xfrm_policy_walk_done);
1884
1885
1886
1887
1888
1889
1890 static int xfrm_policy_match(const struct xfrm_policy *pol,
1891 const struct flowi *fl,
1892 u8 type, u16 family, int dir, u32 if_id)
1893 {
1894 const struct xfrm_selector *sel = &pol->selector;
1895 int ret = -ESRCH;
1896 bool match;
1897
1898 if (pol->family != family ||
1899 pol->if_id != if_id ||
1900 (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1901 pol->type != type)
1902 return ret;
1903
1904 match = xfrm_selector_match(sel, fl, family);
1905 if (match)
1906 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid);
1907 return ret;
1908 }
1909
1910 static struct xfrm_pol_inexact_node *
1911 xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
1912 seqcount_spinlock_t *count,
1913 const xfrm_address_t *addr, u16 family)
1914 {
1915 const struct rb_node *parent;
1916 int seq;
1917
1918 again:
1919 seq = read_seqcount_begin(count);
1920
1921 parent = rcu_dereference_raw(r->rb_node);
1922 while (parent) {
1923 struct xfrm_pol_inexact_node *node;
1924 int delta;
1925
1926 node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
1927
1928 delta = xfrm_policy_addr_delta(addr, &node->addr,
1929 node->prefixlen, family);
1930 if (delta < 0) {
1931 parent = rcu_dereference_raw(parent->rb_left);
1932 continue;
1933 } else if (delta > 0) {
1934 parent = rcu_dereference_raw(parent->rb_right);
1935 continue;
1936 }
1937
1938 return node;
1939 }
1940
1941 if (read_seqcount_retry(count, seq))
1942 goto again;
1943
1944 return NULL;
1945 }
1946
1947 static bool
1948 xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
1949 struct xfrm_pol_inexact_bin *b,
1950 const xfrm_address_t *saddr,
1951 const xfrm_address_t *daddr)
1952 {
1953 struct xfrm_pol_inexact_node *n;
1954 u16 family;
1955
1956 if (!b)
1957 return false;
1958
1959 family = b->k.family;
1960 memset(cand, 0, sizeof(*cand));
1961 cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
1962
1963 n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
1964 family);
1965 if (n) {
1966 cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
1967 n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
1968 family);
1969 if (n)
1970 cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
1971 }
1972
1973 n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
1974 family);
1975 if (n)
1976 cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
1977
1978 return true;
1979 }
1980
1981 static struct xfrm_pol_inexact_bin *
1982 xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
1983 u8 dir, u32 if_id)
1984 {
1985 struct xfrm_pol_inexact_key k = {
1986 .family = family,
1987 .type = type,
1988 .dir = dir,
1989 .if_id = if_id,
1990 };
1991
1992 write_pnet(&k.net, net);
1993
1994 return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
1995 xfrm_pol_inexact_params);
1996 }
1997
1998 static struct xfrm_pol_inexact_bin *
1999 xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
2000 u8 dir, u32 if_id)
2001 {
2002 struct xfrm_pol_inexact_bin *bin;
2003
2004 lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
2005
2006 rcu_read_lock();
2007 bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
2008 rcu_read_unlock();
2009
2010 return bin;
2011 }
2012
2013 static struct xfrm_policy *
2014 __xfrm_policy_eval_candidates(struct hlist_head *chain,
2015 struct xfrm_policy *prefer,
2016 const struct flowi *fl,
2017 u8 type, u16 family, int dir, u32 if_id)
2018 {
2019 u32 priority = prefer ? prefer->priority : ~0u;
2020 struct xfrm_policy *pol;
2021
2022 if (!chain)
2023 return NULL;
2024
2025 hlist_for_each_entry_rcu(pol, chain, bydst) {
2026 int err;
2027
2028 if (pol->priority > priority)
2029 break;
2030
2031 err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
2032 if (err) {
2033 if (err != -ESRCH)
2034 return ERR_PTR(err);
2035
2036 continue;
2037 }
2038
2039 if (prefer) {
2040
2041 if (pol->priority == priority &&
2042 prefer->pos < pol->pos)
2043 return prefer;
2044 }
2045
2046 return pol;
2047 }
2048
2049 return NULL;
2050 }
2051
2052 static struct xfrm_policy *
2053 xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
2054 struct xfrm_policy *prefer,
2055 const struct flowi *fl,
2056 u8 type, u16 family, int dir, u32 if_id)
2057 {
2058 struct xfrm_policy *tmp;
2059 int i;
2060
2061 for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
2062 tmp = __xfrm_policy_eval_candidates(cand->res[i],
2063 prefer,
2064 fl, type, family, dir,
2065 if_id);
2066 if (!tmp)
2067 continue;
2068
2069 if (IS_ERR(tmp))
2070 return tmp;
2071 prefer = tmp;
2072 }
2073
2074 return prefer;
2075 }
2076
2077 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
2078 const struct flowi *fl,
2079 u16 family, u8 dir,
2080 u32 if_id)
2081 {
2082 struct xfrm_pol_inexact_candidates cand;
2083 const xfrm_address_t *daddr, *saddr;
2084 struct xfrm_pol_inexact_bin *bin;
2085 struct xfrm_policy *pol, *ret;
2086 struct hlist_head *chain;
2087 unsigned int sequence;
2088 int err;
2089
2090 daddr = xfrm_flowi_daddr(fl, family);
2091 saddr = xfrm_flowi_saddr(fl, family);
2092 if (unlikely(!daddr || !saddr))
2093 return NULL;
2094
2095 rcu_read_lock();
2096 retry:
2097 do {
2098 sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
2099 chain = policy_hash_direct(net, daddr, saddr, family, dir);
2100 } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
2101
2102 ret = NULL;
2103 hlist_for_each_entry_rcu(pol, chain, bydst) {
2104 err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
2105 if (err) {
2106 if (err == -ESRCH)
2107 continue;
2108 else {
2109 ret = ERR_PTR(err);
2110 goto fail;
2111 }
2112 } else {
2113 ret = pol;
2114 break;
2115 }
2116 }
2117 bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
2118 if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
2119 daddr))
2120 goto skip_inexact;
2121
2122 pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
2123 family, dir, if_id);
2124 if (pol) {
2125 ret = pol;
2126 if (IS_ERR(pol))
2127 goto fail;
2128 }
2129
2130 skip_inexact:
2131 if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
2132 goto retry;
2133
2134 if (ret && !xfrm_pol_hold_rcu(ret))
2135 goto retry;
2136 fail:
2137 rcu_read_unlock();
2138
2139 return ret;
2140 }
2141
2142 static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
2143 const struct flowi *fl,
2144 u16 family, u8 dir, u32 if_id)
2145 {
2146 #ifdef CONFIG_XFRM_SUB_POLICY
2147 struct xfrm_policy *pol;
2148
2149 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
2150 dir, if_id);
2151 if (pol != NULL)
2152 return pol;
2153 #endif
2154 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
2155 dir, if_id);
2156 }
2157
2158 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
2159 const struct flowi *fl,
2160 u16 family, u32 if_id)
2161 {
2162 struct xfrm_policy *pol;
2163
2164 rcu_read_lock();
2165 again:
2166 pol = rcu_dereference(sk->sk_policy[dir]);
2167 if (pol != NULL) {
2168 bool match;
2169 int err = 0;
2170
2171 if (pol->family != family) {
2172 pol = NULL;
2173 goto out;
2174 }
2175
2176 match = xfrm_selector_match(&pol->selector, fl, family);
2177 if (match) {
2178 if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
2179 pol->if_id != if_id) {
2180 pol = NULL;
2181 goto out;
2182 }
2183 err = security_xfrm_policy_lookup(pol->security,
2184 fl->flowi_secid);
2185 if (!err) {
2186 if (!xfrm_pol_hold_rcu(pol))
2187 goto again;
2188 } else if (err == -ESRCH) {
2189 pol = NULL;
2190 } else {
2191 pol = ERR_PTR(err);
2192 }
2193 } else
2194 pol = NULL;
2195 }
2196 out:
2197 rcu_read_unlock();
2198 return pol;
2199 }
2200
2201 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
2202 {
2203 struct net *net = xp_net(pol);
2204
2205 list_add(&pol->walk.all, &net->xfrm.policy_all);
2206 net->xfrm.policy_count[dir]++;
2207 xfrm_pol_hold(pol);
2208 }
2209
2210 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
2211 int dir)
2212 {
2213 struct net *net = xp_net(pol);
2214
2215 if (list_empty(&pol->walk.all))
2216 return NULL;
2217
2218
2219 if (!hlist_unhashed(&pol->bydst)) {
2220 hlist_del_rcu(&pol->bydst);
2221 hlist_del_init(&pol->bydst_inexact_list);
2222 hlist_del(&pol->byidx);
2223 }
2224
2225 list_del_init(&pol->walk.all);
2226 net->xfrm.policy_count[dir]--;
2227
2228 return pol;
2229 }
2230
2231 static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
2232 {
2233 __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
2234 }
2235
2236 static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
2237 {
2238 __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
2239 }
2240
2241 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
2242 {
2243 struct net *net = xp_net(pol);
2244
2245 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
2246 pol = __xfrm_policy_unlink(pol, dir);
2247 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
2248 if (pol) {
2249 xfrm_policy_kill(pol);
2250 return 0;
2251 }
2252 return -ENOENT;
2253 }
2254 EXPORT_SYMBOL(xfrm_policy_delete);
2255
2256 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
2257 {
2258 struct net *net = sock_net(sk);
2259 struct xfrm_policy *old_pol;
2260
2261 #ifdef CONFIG_XFRM_SUB_POLICY
2262 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
2263 return -EINVAL;
2264 #endif
2265
2266 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
2267 old_pol = rcu_dereference_protected(sk->sk_policy[dir],
2268 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
2269 if (pol) {
2270 pol->curlft.add_time = ktime_get_real_seconds();
2271 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
2272 xfrm_sk_policy_link(pol, dir);
2273 }
2274 rcu_assign_pointer(sk->sk_policy[dir], pol);
2275 if (old_pol) {
2276 if (pol)
2277 xfrm_policy_requeue(old_pol, pol);
2278
2279
2280
2281
2282 xfrm_sk_policy_unlink(old_pol, dir);
2283 }
2284 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
2285
2286 if (old_pol) {
2287 xfrm_policy_kill(old_pol);
2288 }
2289 return 0;
2290 }
2291
2292 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
2293 {
2294 struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
2295 struct net *net = xp_net(old);
2296
2297 if (newp) {
2298 newp->selector = old->selector;
2299 if (security_xfrm_policy_clone(old->security,
2300 &newp->security)) {
2301 kfree(newp);
2302 return NULL;
2303 }
2304 newp->lft = old->lft;
2305 newp->curlft = old->curlft;
2306 newp->mark = old->mark;
2307 newp->if_id = old->if_id;
2308 newp->action = old->action;
2309 newp->flags = old->flags;
2310 newp->xfrm_nr = old->xfrm_nr;
2311 newp->index = old->index;
2312 newp->type = old->type;
2313 newp->family = old->family;
2314 memcpy(newp->xfrm_vec, old->xfrm_vec,
2315 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
2316 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
2317 xfrm_sk_policy_link(newp, dir);
2318 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
2319 xfrm_pol_put(newp);
2320 }
2321 return newp;
2322 }
2323
2324 int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
2325 {
2326 const struct xfrm_policy *p;
2327 struct xfrm_policy *np;
2328 int i, ret = 0;
2329
2330 rcu_read_lock();
2331 for (i = 0; i < 2; i++) {
2332 p = rcu_dereference(osk->sk_policy[i]);
2333 if (p) {
2334 np = clone_policy(p, i);
2335 if (unlikely(!np)) {
2336 ret = -ENOMEM;
2337 break;
2338 }
2339 rcu_assign_pointer(sk->sk_policy[i], np);
2340 }
2341 }
2342 rcu_read_unlock();
2343 return ret;
2344 }
2345
2346 static int
2347 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
2348 xfrm_address_t *remote, unsigned short family, u32 mark)
2349 {
2350 int err;
2351 const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2352
2353 if (unlikely(afinfo == NULL))
2354 return -EINVAL;
2355 err = afinfo->get_saddr(net, oif, local, remote, mark);
2356 rcu_read_unlock();
2357 return err;
2358 }
2359
2360
2361
2362 static int
2363 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
2364 struct xfrm_state **xfrm, unsigned short family)
2365 {
2366 struct net *net = xp_net(policy);
2367 int nx;
2368 int i, error;
2369 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
2370 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
2371 xfrm_address_t tmp;
2372
2373 for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
2374 struct xfrm_state *x;
2375 xfrm_address_t *remote = daddr;
2376 xfrm_address_t *local = saddr;
2377 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
2378
2379 if (tmpl->mode == XFRM_MODE_TUNNEL ||
2380 tmpl->mode == XFRM_MODE_BEET) {
2381 remote = &tmpl->id.daddr;
2382 local = &tmpl->saddr;
2383 if (xfrm_addr_any(local, tmpl->encap_family)) {
2384 error = xfrm_get_saddr(net, fl->flowi_oif,
2385 &tmp, remote,
2386 tmpl->encap_family, 0);
2387 if (error)
2388 goto fail;
2389 local = &tmp;
2390 }
2391 }
2392
2393 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
2394 family, policy->if_id);
2395
2396 if (x && x->km.state == XFRM_STATE_VALID) {
2397 xfrm[nx++] = x;
2398 daddr = remote;
2399 saddr = local;
2400 continue;
2401 }
2402 if (x) {
2403 error = (x->km.state == XFRM_STATE_ERROR ?
2404 -EINVAL : -EAGAIN);
2405 xfrm_state_put(x);
2406 } else if (error == -ESRCH) {
2407 error = -EAGAIN;
2408 }
2409
2410 if (!tmpl->optional)
2411 goto fail;
2412 }
2413 return nx;
2414
2415 fail:
2416 for (nx--; nx >= 0; nx--)
2417 xfrm_state_put(xfrm[nx]);
2418 return error;
2419 }
2420
2421 static int
2422 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
2423 struct xfrm_state **xfrm, unsigned short family)
2424 {
2425 struct xfrm_state *tp[XFRM_MAX_DEPTH];
2426 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
2427 int cnx = 0;
2428 int error;
2429 int ret;
2430 int i;
2431
2432 for (i = 0; i < npols; i++) {
2433 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
2434 error = -ENOBUFS;
2435 goto fail;
2436 }
2437
2438 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
2439 if (ret < 0) {
2440 error = ret;
2441 goto fail;
2442 } else
2443 cnx += ret;
2444 }
2445
2446
2447 if (npols > 1)
2448 xfrm_state_sort(xfrm, tpp, cnx, family);
2449
2450 return cnx;
2451
2452 fail:
2453 for (cnx--; cnx >= 0; cnx--)
2454 xfrm_state_put(tpp[cnx]);
2455 return error;
2456
2457 }
2458
2459 static int xfrm_get_tos(const struct flowi *fl, int family)
2460 {
2461 if (family == AF_INET)
2462 return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
2463
2464 return 0;
2465 }
2466
2467 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
2468 {
2469 const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2470 struct dst_ops *dst_ops;
2471 struct xfrm_dst *xdst;
2472
2473 if (!afinfo)
2474 return ERR_PTR(-EINVAL);
2475
2476 switch (family) {
2477 case AF_INET:
2478 dst_ops = &net->xfrm.xfrm4_dst_ops;
2479 break;
2480 #if IS_ENABLED(CONFIG_IPV6)
2481 case AF_INET6:
2482 dst_ops = &net->xfrm.xfrm6_dst_ops;
2483 break;
2484 #endif
2485 default:
2486 BUG();
2487 }
2488 xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
2489
2490 if (likely(xdst)) {
2491 memset_after(xdst, 0, u.dst);
2492 } else
2493 xdst = ERR_PTR(-ENOBUFS);
2494
2495 rcu_read_unlock();
2496
2497 return xdst;
2498 }
2499
2500 static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
2501 int nfheader_len)
2502 {
2503 if (dst->ops->family == AF_INET6) {
2504 struct rt6_info *rt = (struct rt6_info *)dst;
2505 path->path_cookie = rt6_get_cookie(rt);
2506 path->u.rt6.rt6i_nfheader_len = nfheader_len;
2507 }
2508 }
2509
2510 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
2511 const struct flowi *fl)
2512 {
2513 const struct xfrm_policy_afinfo *afinfo =
2514 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
2515 int err;
2516
2517 if (!afinfo)
2518 return -EINVAL;
2519
2520 err = afinfo->fill_dst(xdst, dev, fl);
2521
2522 rcu_read_unlock();
2523
2524 return err;
2525 }
2526
2527
2528
2529
2530
2531
2532 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
2533 struct xfrm_state **xfrm,
2534 struct xfrm_dst **bundle,
2535 int nx,
2536 const struct flowi *fl,
2537 struct dst_entry *dst)
2538 {
2539 const struct xfrm_state_afinfo *afinfo;
2540 const struct xfrm_mode *inner_mode;
2541 struct net *net = xp_net(policy);
2542 unsigned long now = jiffies;
2543 struct net_device *dev;
2544 struct xfrm_dst *xdst_prev = NULL;
2545 struct xfrm_dst *xdst0 = NULL;
2546 int i = 0;
2547 int err;
2548 int header_len = 0;
2549 int nfheader_len = 0;
2550 int trailer_len = 0;
2551 int tos;
2552 int family = policy->selector.family;
2553 xfrm_address_t saddr, daddr;
2554
2555 xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
2556
2557 tos = xfrm_get_tos(fl, family);
2558
2559 dst_hold(dst);
2560
2561 for (; i < nx; i++) {
2562 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
2563 struct dst_entry *dst1 = &xdst->u.dst;
2564
2565 err = PTR_ERR(xdst);
2566 if (IS_ERR(xdst)) {
2567 dst_release(dst);
2568 goto put_states;
2569 }
2570
2571 bundle[i] = xdst;
2572 if (!xdst_prev)
2573 xdst0 = xdst;
2574 else
2575
2576
2577
2578 xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
2579
2580 if (xfrm[i]->sel.family == AF_UNSPEC) {
2581 inner_mode = xfrm_ip2inner_mode(xfrm[i],
2582 xfrm_af2proto(family));
2583 if (!inner_mode) {
2584 err = -EAFNOSUPPORT;
2585 dst_release(dst);
2586 goto put_states;
2587 }
2588 } else
2589 inner_mode = &xfrm[i]->inner_mode;
2590
2591 xdst->route = dst;
2592 dst_copy_metrics(dst1, dst);
2593
2594 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
2595 __u32 mark = 0;
2596 int oif;
2597
2598 if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
2599 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
2600
2601 family = xfrm[i]->props.family;
2602 oif = fl->flowi_oif ? : fl->flowi_l3mdev;
2603 dst = xfrm_dst_lookup(xfrm[i], tos, oif,
2604 &saddr, &daddr, family, mark);
2605 err = PTR_ERR(dst);
2606 if (IS_ERR(dst))
2607 goto put_states;
2608 } else
2609 dst_hold(dst);
2610
2611 dst1->xfrm = xfrm[i];
2612 xdst->xfrm_genid = xfrm[i]->genid;
2613
2614 dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2615 dst1->lastuse = now;
2616
2617 dst1->input = dst_discard;
2618
2619 rcu_read_lock();
2620 afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
2621 if (likely(afinfo))
2622 dst1->output = afinfo->output;
2623 else
2624 dst1->output = dst_discard_out;
2625 rcu_read_unlock();
2626
2627 xdst_prev = xdst;
2628
2629 header_len += xfrm[i]->props.header_len;
2630 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
2631 nfheader_len += xfrm[i]->props.header_len;
2632 trailer_len += xfrm[i]->props.trailer_len;
2633 }
2634
2635 xfrm_dst_set_child(xdst_prev, dst);
2636 xdst0->path = dst;
2637
2638 err = -ENODEV;
2639 dev = dst->dev;
2640 if (!dev)
2641 goto free_dst;
2642
2643 xfrm_init_path(xdst0, dst, nfheader_len);
2644 xfrm_init_pmtu(bundle, nx);
2645
2646 for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
2647 xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
2648 err = xfrm_fill_dst(xdst_prev, dev, fl);
2649 if (err)
2650 goto free_dst;
2651
2652 xdst_prev->u.dst.header_len = header_len;
2653 xdst_prev->u.dst.trailer_len = trailer_len;
2654 header_len -= xdst_prev->u.dst.xfrm->props.header_len;
2655 trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
2656 }
2657
2658 return &xdst0->u.dst;
2659
2660 put_states:
2661 for (; i < nx; i++)
2662 xfrm_state_put(xfrm[i]);
2663 free_dst:
2664 if (xdst0)
2665 dst_release_immediate(&xdst0->u.dst);
2666
2667 return ERR_PTR(err);
2668 }
2669
2670 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
2671 struct xfrm_policy **pols,
2672 int *num_pols, int *num_xfrms)
2673 {
2674 int i;
2675
2676 if (*num_pols == 0 || !pols[0]) {
2677 *num_pols = 0;
2678 *num_xfrms = 0;
2679 return 0;
2680 }
2681 if (IS_ERR(pols[0])) {
2682 *num_pols = 0;
2683 return PTR_ERR(pols[0]);
2684 }
2685
2686 *num_xfrms = pols[0]->xfrm_nr;
2687
2688 #ifdef CONFIG_XFRM_SUB_POLICY
2689 if (pols[0]->action == XFRM_POLICY_ALLOW &&
2690 pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2691 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
2692 XFRM_POLICY_TYPE_MAIN,
2693 fl, family,
2694 XFRM_POLICY_OUT,
2695 pols[0]->if_id);
2696 if (pols[1]) {
2697 if (IS_ERR(pols[1])) {
2698 xfrm_pols_put(pols, *num_pols);
2699 *num_pols = 0;
2700 return PTR_ERR(pols[1]);
2701 }
2702 (*num_pols)++;
2703 (*num_xfrms) += pols[1]->xfrm_nr;
2704 }
2705 }
2706 #endif
2707 for (i = 0; i < *num_pols; i++) {
2708 if (pols[i]->action != XFRM_POLICY_ALLOW) {
2709 *num_xfrms = -1;
2710 break;
2711 }
2712 }
2713
2714 return 0;
2715
2716 }
2717
2718 static struct xfrm_dst *
2719 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
2720 const struct flowi *fl, u16 family,
2721 struct dst_entry *dst_orig)
2722 {
2723 struct net *net = xp_net(pols[0]);
2724 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
2725 struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
2726 struct xfrm_dst *xdst;
2727 struct dst_entry *dst;
2728 int err;
2729
2730
2731 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
2732 if (err <= 0) {
2733 if (err == 0)
2734 return NULL;
2735
2736 if (err != -EAGAIN)
2737 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2738 return ERR_PTR(err);
2739 }
2740
2741 dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
2742 if (IS_ERR(dst)) {
2743 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
2744 return ERR_CAST(dst);
2745 }
2746
2747 xdst = (struct xfrm_dst *)dst;
2748 xdst->num_xfrms = err;
2749 xdst->num_pols = num_pols;
2750 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2751 xdst->policy_genid = atomic_read(&pols[0]->genid);
2752
2753 return xdst;
2754 }
2755
2756 static void xfrm_policy_queue_process(struct timer_list *t)
2757 {
2758 struct sk_buff *skb;
2759 struct sock *sk;
2760 struct dst_entry *dst;
2761 struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
2762 struct net *net = xp_net(pol);
2763 struct xfrm_policy_queue *pq = &pol->polq;
2764 struct flowi fl;
2765 struct sk_buff_head list;
2766 __u32 skb_mark;
2767
2768 spin_lock(&pq->hold_queue.lock);
2769 skb = skb_peek(&pq->hold_queue);
2770 if (!skb) {
2771 spin_unlock(&pq->hold_queue.lock);
2772 goto out;
2773 }
2774 dst = skb_dst(skb);
2775 sk = skb->sk;
2776
2777
2778 skb_mark = skb->mark;
2779 skb->mark = pol->mark.v;
2780 xfrm_decode_session(skb, &fl, dst->ops->family);
2781 skb->mark = skb_mark;
2782 spin_unlock(&pq->hold_queue.lock);
2783
2784 dst_hold(xfrm_dst_path(dst));
2785 dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
2786 if (IS_ERR(dst))
2787 goto purge_queue;
2788
2789 if (dst->flags & DST_XFRM_QUEUE) {
2790 dst_release(dst);
2791
2792 if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
2793 goto purge_queue;
2794
2795 pq->timeout = pq->timeout << 1;
2796 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
2797 xfrm_pol_hold(pol);
2798 goto out;
2799 }
2800
2801 dst_release(dst);
2802
2803 __skb_queue_head_init(&list);
2804
2805 spin_lock(&pq->hold_queue.lock);
2806 pq->timeout = 0;
2807 skb_queue_splice_init(&pq->hold_queue, &list);
2808 spin_unlock(&pq->hold_queue.lock);
2809
2810 while (!skb_queue_empty(&list)) {
2811 skb = __skb_dequeue(&list);
2812
2813
2814 skb_mark = skb->mark;
2815 skb->mark = pol->mark.v;
2816 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
2817 skb->mark = skb_mark;
2818
2819 dst_hold(xfrm_dst_path(skb_dst(skb)));
2820 dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
2821 if (IS_ERR(dst)) {
2822 kfree_skb(skb);
2823 continue;
2824 }
2825
2826 nf_reset_ct(skb);
2827 skb_dst_drop(skb);
2828 skb_dst_set(skb, dst);
2829
2830 dst_output(net, skb->sk, skb);
2831 }
2832
2833 out:
2834 xfrm_pol_put(pol);
2835 return;
2836
2837 purge_queue:
2838 pq->timeout = 0;
2839 skb_queue_purge(&pq->hold_queue);
2840 xfrm_pol_put(pol);
2841 }
2842
2843 static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2844 {
2845 unsigned long sched_next;
2846 struct dst_entry *dst = skb_dst(skb);
2847 struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
2848 struct xfrm_policy *pol = xdst->pols[0];
2849 struct xfrm_policy_queue *pq = &pol->polq;
2850
2851 if (unlikely(skb_fclone_busy(sk, skb))) {
2852 kfree_skb(skb);
2853 return 0;
2854 }
2855
2856 if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
2857 kfree_skb(skb);
2858 return -EAGAIN;
2859 }
2860
2861 skb_dst_force(skb);
2862
2863 spin_lock_bh(&pq->hold_queue.lock);
2864
2865 if (!pq->timeout)
2866 pq->timeout = XFRM_QUEUE_TMO_MIN;
2867
2868 sched_next = jiffies + pq->timeout;
2869
2870 if (del_timer(&pq->hold_timer)) {
2871 if (time_before(pq->hold_timer.expires, sched_next))
2872 sched_next = pq->hold_timer.expires;
2873 xfrm_pol_put(pol);
2874 }
2875
2876 __skb_queue_tail(&pq->hold_queue, skb);
2877 if (!mod_timer(&pq->hold_timer, sched_next))
2878 xfrm_pol_hold(pol);
2879
2880 spin_unlock_bh(&pq->hold_queue.lock);
2881
2882 return 0;
2883 }
2884
2885 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2886 struct xfrm_flo *xflo,
2887 const struct flowi *fl,
2888 int num_xfrms,
2889 u16 family)
2890 {
2891 int err;
2892 struct net_device *dev;
2893 struct dst_entry *dst;
2894 struct dst_entry *dst1;
2895 struct xfrm_dst *xdst;
2896
2897 xdst = xfrm_alloc_dst(net, family);
2898 if (IS_ERR(xdst))
2899 return xdst;
2900
2901 if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
2902 net->xfrm.sysctl_larval_drop ||
2903 num_xfrms <= 0)
2904 return xdst;
2905
2906 dst = xflo->dst_orig;
2907 dst1 = &xdst->u.dst;
2908 dst_hold(dst);
2909 xdst->route = dst;
2910
2911 dst_copy_metrics(dst1, dst);
2912
2913 dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2914 dst1->flags |= DST_XFRM_QUEUE;
2915 dst1->lastuse = jiffies;
2916
2917 dst1->input = dst_discard;
2918 dst1->output = xdst_queue_output;
2919
2920 dst_hold(dst);
2921 xfrm_dst_set_child(xdst, dst);
2922 xdst->path = dst;
2923
2924 xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2925
2926 err = -ENODEV;
2927 dev = dst->dev;
2928 if (!dev)
2929 goto free_dst;
2930
2931 err = xfrm_fill_dst(xdst, dev, fl);
2932 if (err)
2933 goto free_dst;
2934
2935 out:
2936 return xdst;
2937
2938 free_dst:
2939 dst_release(dst1);
2940 xdst = ERR_PTR(err);
2941 goto out;
2942 }
2943
2944 static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
2945 const struct flowi *fl,
2946 u16 family, u8 dir,
2947 struct xfrm_flo *xflo, u32 if_id)
2948 {
2949 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2950 int num_pols = 0, num_xfrms = 0, err;
2951 struct xfrm_dst *xdst;
2952
2953
2954
2955 num_pols = 1;
2956 pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
2957 err = xfrm_expand_policies(fl, family, pols,
2958 &num_pols, &num_xfrms);
2959 if (err < 0)
2960 goto inc_error;
2961 if (num_pols == 0)
2962 return NULL;
2963 if (num_xfrms <= 0)
2964 goto make_dummy_bundle;
2965
2966 xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2967 xflo->dst_orig);
2968 if (IS_ERR(xdst)) {
2969 err = PTR_ERR(xdst);
2970 if (err == -EREMOTE) {
2971 xfrm_pols_put(pols, num_pols);
2972 return NULL;
2973 }
2974
2975 if (err != -EAGAIN)
2976 goto error;
2977 goto make_dummy_bundle;
2978 } else if (xdst == NULL) {
2979 num_xfrms = 0;
2980 goto make_dummy_bundle;
2981 }
2982
2983 return xdst;
2984
2985 make_dummy_bundle:
2986
2987
2988
2989 xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2990 if (IS_ERR(xdst)) {
2991 xfrm_pols_put(pols, num_pols);
2992 return ERR_CAST(xdst);
2993 }
2994 xdst->num_pols = num_pols;
2995 xdst->num_xfrms = num_xfrms;
2996 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2997
2998 return xdst;
2999
3000 inc_error:
3001 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
3002 error:
3003 xfrm_pols_put(pols, num_pols);
3004 return ERR_PTR(err);
3005 }
3006
3007 static struct dst_entry *make_blackhole(struct net *net, u16 family,
3008 struct dst_entry *dst_orig)
3009 {
3010 const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
3011 struct dst_entry *ret;
3012
3013 if (!afinfo) {
3014 dst_release(dst_orig);
3015 return ERR_PTR(-EINVAL);
3016 } else {
3017 ret = afinfo->blackhole_route(net, dst_orig);
3018 }
3019 rcu_read_unlock();
3020
3021 return ret;
3022 }
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032 struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
3033 struct dst_entry *dst_orig,
3034 const struct flowi *fl,
3035 const struct sock *sk,
3036 int flags, u32 if_id)
3037 {
3038 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
3039 struct xfrm_dst *xdst;
3040 struct dst_entry *dst, *route;
3041 u16 family = dst_orig->ops->family;
3042 u8 dir = XFRM_POLICY_OUT;
3043 int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
3044
3045 dst = NULL;
3046 xdst = NULL;
3047 route = NULL;
3048
3049 sk = sk_const_to_full_sk(sk);
3050 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
3051 num_pols = 1;
3052 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
3053 if_id);
3054 err = xfrm_expand_policies(fl, family, pols,
3055 &num_pols, &num_xfrms);
3056 if (err < 0)
3057 goto dropdst;
3058
3059 if (num_pols) {
3060 if (num_xfrms <= 0) {
3061 drop_pols = num_pols;
3062 goto no_transform;
3063 }
3064
3065 xdst = xfrm_resolve_and_create_bundle(
3066 pols, num_pols, fl,
3067 family, dst_orig);
3068
3069 if (IS_ERR(xdst)) {
3070 xfrm_pols_put(pols, num_pols);
3071 err = PTR_ERR(xdst);
3072 if (err == -EREMOTE)
3073 goto nopol;
3074
3075 goto dropdst;
3076 } else if (xdst == NULL) {
3077 num_xfrms = 0;
3078 drop_pols = num_pols;
3079 goto no_transform;
3080 }
3081
3082 route = xdst->route;
3083 }
3084 }
3085
3086 if (xdst == NULL) {
3087 struct xfrm_flo xflo;
3088
3089 xflo.dst_orig = dst_orig;
3090 xflo.flags = flags;
3091
3092
3093 if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
3094 !net->xfrm.policy_count[XFRM_POLICY_OUT]))
3095 goto nopol;
3096
3097 xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
3098 if (xdst == NULL)
3099 goto nopol;
3100 if (IS_ERR(xdst)) {
3101 err = PTR_ERR(xdst);
3102 goto dropdst;
3103 }
3104
3105 num_pols = xdst->num_pols;
3106 num_xfrms = xdst->num_xfrms;
3107 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
3108 route = xdst->route;
3109 }
3110
3111 dst = &xdst->u.dst;
3112 if (route == NULL && num_xfrms > 0) {
3113
3114
3115
3116
3117
3118
3119 if (net->xfrm.sysctl_larval_drop) {
3120 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
3121 err = -EREMOTE;
3122 goto error;
3123 }
3124
3125 err = -EAGAIN;
3126
3127 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
3128 goto error;
3129 }
3130
3131 no_transform:
3132 if (num_pols == 0)
3133 goto nopol;
3134
3135 if ((flags & XFRM_LOOKUP_ICMP) &&
3136 !(pols[0]->flags & XFRM_POLICY_ICMP)) {
3137 err = -ENOENT;
3138 goto error;
3139 }
3140
3141 for (i = 0; i < num_pols; i++)
3142 pols[i]->curlft.use_time = ktime_get_real_seconds();
3143
3144 if (num_xfrms < 0) {
3145
3146 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
3147 err = -EPERM;
3148 goto error;
3149 } else if (num_xfrms > 0) {
3150
3151 dst_release(dst_orig);
3152 } else {
3153
3154 dst_release(dst);
3155 dst = dst_orig;
3156 }
3157 ok:
3158 xfrm_pols_put(pols, drop_pols);
3159 if (dst && dst->xfrm &&
3160 dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
3161 dst->flags |= DST_XFRM_TUNNEL;
3162 return dst;
3163
3164 nopol:
3165 if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
3166 net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
3167 err = -EPERM;
3168 goto error;
3169 }
3170 if (!(flags & XFRM_LOOKUP_ICMP)) {
3171 dst = dst_orig;
3172 goto ok;
3173 }
3174 err = -ENOENT;
3175 error:
3176 dst_release(dst);
3177 dropdst:
3178 if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
3179 dst_release(dst_orig);
3180 xfrm_pols_put(pols, drop_pols);
3181 return ERR_PTR(err);
3182 }
3183 EXPORT_SYMBOL(xfrm_lookup_with_ifid);
3184
3185
3186
3187
3188
3189
3190 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
3191 const struct flowi *fl, const struct sock *sk,
3192 int flags)
3193 {
3194 return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
3195 }
3196 EXPORT_SYMBOL(xfrm_lookup);
3197
3198
3199
3200
3201 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
3202 const struct flowi *fl,
3203 const struct sock *sk, int flags)
3204 {
3205 struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
3206 flags | XFRM_LOOKUP_QUEUE |
3207 XFRM_LOOKUP_KEEP_DST_REF);
3208
3209 if (PTR_ERR(dst) == -EREMOTE)
3210 return make_blackhole(net, dst_orig->ops->family, dst_orig);
3211
3212 if (IS_ERR(dst))
3213 dst_release(dst_orig);
3214
3215 return dst;
3216 }
3217 EXPORT_SYMBOL(xfrm_lookup_route);
3218
3219 static inline int
3220 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
3221 {
3222 struct sec_path *sp = skb_sec_path(skb);
3223 struct xfrm_state *x;
3224
3225 if (!sp || idx < 0 || idx >= sp->len)
3226 return 0;
3227 x = sp->xvec[idx];
3228 if (!x->type->reject)
3229 return 0;
3230 return x->type->reject(x, skb, fl);
3231 }
3232
3233
3234
3235
3236
3237
3238
3239 static inline int
3240 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
3241 unsigned short family)
3242 {
3243 if (xfrm_state_kern(x))
3244 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
3245 return x->id.proto == tmpl->id.proto &&
3246 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
3247 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
3248 x->props.mode == tmpl->mode &&
3249 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
3250 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
3251 !(x->props.mode != XFRM_MODE_TRANSPORT &&
3252 xfrm_state_addr_cmp(tmpl, x, family));
3253 }
3254
3255
3256
3257
3258
3259
3260
3261
3262 static inline int
3263 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
3264 unsigned short family)
3265 {
3266 int idx = start;
3267
3268 if (tmpl->optional) {
3269 if (tmpl->mode == XFRM_MODE_TRANSPORT)
3270 return start;
3271 } else
3272 start = -1;
3273 for (; idx < sp->len; idx++) {
3274 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
3275 return ++idx;
3276 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
3277 if (start == -1)
3278 start = -2-idx;
3279 break;
3280 }
3281 }
3282 return start;
3283 }
3284
3285 static void
3286 decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
3287 {
3288 const struct iphdr *iph = ip_hdr(skb);
3289 int ihl = iph->ihl;
3290 u8 *xprth = skb_network_header(skb) + ihl * 4;
3291 struct flowi4 *fl4 = &fl->u.ip4;
3292 int oif = 0;
3293
3294 if (skb_dst(skb) && skb_dst(skb)->dev)
3295 oif = skb_dst(skb)->dev->ifindex;
3296
3297 memset(fl4, 0, sizeof(struct flowi4));
3298 fl4->flowi4_mark = skb->mark;
3299 fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
3300
3301 fl4->flowi4_proto = iph->protocol;
3302 fl4->daddr = reverse ? iph->saddr : iph->daddr;
3303 fl4->saddr = reverse ? iph->daddr : iph->saddr;
3304 fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
3305
3306 if (!ip_is_fragment(iph)) {
3307 switch (iph->protocol) {
3308 case IPPROTO_UDP:
3309 case IPPROTO_UDPLITE:
3310 case IPPROTO_TCP:
3311 case IPPROTO_SCTP:
3312 case IPPROTO_DCCP:
3313 if (xprth + 4 < skb->data ||
3314 pskb_may_pull(skb, xprth + 4 - skb->data)) {
3315 __be16 *ports;
3316
3317 xprth = skb_network_header(skb) + ihl * 4;
3318 ports = (__be16 *)xprth;
3319
3320 fl4->fl4_sport = ports[!!reverse];
3321 fl4->fl4_dport = ports[!reverse];
3322 }
3323 break;
3324 case IPPROTO_ICMP:
3325 if (xprth + 2 < skb->data ||
3326 pskb_may_pull(skb, xprth + 2 - skb->data)) {
3327 u8 *icmp;
3328
3329 xprth = skb_network_header(skb) + ihl * 4;
3330 icmp = xprth;
3331
3332 fl4->fl4_icmp_type = icmp[0];
3333 fl4->fl4_icmp_code = icmp[1];
3334 }
3335 break;
3336 case IPPROTO_GRE:
3337 if (xprth + 12 < skb->data ||
3338 pskb_may_pull(skb, xprth + 12 - skb->data)) {
3339 __be16 *greflags;
3340 __be32 *gre_hdr;
3341
3342 xprth = skb_network_header(skb) + ihl * 4;
3343 greflags = (__be16 *)xprth;
3344 gre_hdr = (__be32 *)xprth;
3345
3346 if (greflags[0] & GRE_KEY) {
3347 if (greflags[0] & GRE_CSUM)
3348 gre_hdr++;
3349 fl4->fl4_gre_key = gre_hdr[1];
3350 }
3351 }
3352 break;
3353 default:
3354 break;
3355 }
3356 }
3357 }
3358
3359 #if IS_ENABLED(CONFIG_IPV6)
3360 static void
3361 decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
3362 {
3363 struct flowi6 *fl6 = &fl->u.ip6;
3364 int onlyproto = 0;
3365 const struct ipv6hdr *hdr = ipv6_hdr(skb);
3366 u32 offset = sizeof(*hdr);
3367 struct ipv6_opt_hdr *exthdr;
3368 const unsigned char *nh = skb_network_header(skb);
3369 u16 nhoff = IP6CB(skb)->nhoff;
3370 int oif = 0;
3371 u8 nexthdr;
3372
3373 if (!nhoff)
3374 nhoff = offsetof(struct ipv6hdr, nexthdr);
3375
3376 nexthdr = nh[nhoff];
3377
3378 if (skb_dst(skb) && skb_dst(skb)->dev)
3379 oif = skb_dst(skb)->dev->ifindex;
3380
3381 memset(fl6, 0, sizeof(struct flowi6));
3382 fl6->flowi6_mark = skb->mark;
3383 fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
3384
3385 fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
3386 fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
3387
3388 while (nh + offset + sizeof(*exthdr) < skb->data ||
3389 pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
3390 nh = skb_network_header(skb);
3391 exthdr = (struct ipv6_opt_hdr *)(nh + offset);
3392
3393 switch (nexthdr) {
3394 case NEXTHDR_FRAGMENT:
3395 onlyproto = 1;
3396 fallthrough;
3397 case NEXTHDR_ROUTING:
3398 case NEXTHDR_HOP:
3399 case NEXTHDR_DEST:
3400 offset += ipv6_optlen(exthdr);
3401 nexthdr = exthdr->nexthdr;
3402 break;
3403 case IPPROTO_UDP:
3404 case IPPROTO_UDPLITE:
3405 case IPPROTO_TCP:
3406 case IPPROTO_SCTP:
3407 case IPPROTO_DCCP:
3408 if (!onlyproto && (nh + offset + 4 < skb->data ||
3409 pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
3410 __be16 *ports;
3411
3412 nh = skb_network_header(skb);
3413 ports = (__be16 *)(nh + offset);
3414 fl6->fl6_sport = ports[!!reverse];
3415 fl6->fl6_dport = ports[!reverse];
3416 }
3417 fl6->flowi6_proto = nexthdr;
3418 return;
3419 case IPPROTO_ICMPV6:
3420 if (!onlyproto && (nh + offset + 2 < skb->data ||
3421 pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
3422 u8 *icmp;
3423
3424 nh = skb_network_header(skb);
3425 icmp = (u8 *)(nh + offset);
3426 fl6->fl6_icmp_type = icmp[0];
3427 fl6->fl6_icmp_code = icmp[1];
3428 }
3429 fl6->flowi6_proto = nexthdr;
3430 return;
3431 case IPPROTO_GRE:
3432 if (!onlyproto &&
3433 (nh + offset + 12 < skb->data ||
3434 pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
3435 struct gre_base_hdr *gre_hdr;
3436 __be32 *gre_key;
3437
3438 nh = skb_network_header(skb);
3439 gre_hdr = (struct gre_base_hdr *)(nh + offset);
3440 gre_key = (__be32 *)(gre_hdr + 1);
3441
3442 if (gre_hdr->flags & GRE_KEY) {
3443 if (gre_hdr->flags & GRE_CSUM)
3444 gre_key++;
3445 fl6->fl6_gre_key = *gre_key;
3446 }
3447 }
3448 fl6->flowi6_proto = nexthdr;
3449 return;
3450
3451 #if IS_ENABLED(CONFIG_IPV6_MIP6)
3452 case IPPROTO_MH:
3453 offset += ipv6_optlen(exthdr);
3454 if (!onlyproto && (nh + offset + 3 < skb->data ||
3455 pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
3456 struct ip6_mh *mh;
3457
3458 nh = skb_network_header(skb);
3459 mh = (struct ip6_mh *)(nh + offset);
3460 fl6->fl6_mh_type = mh->ip6mh_type;
3461 }
3462 fl6->flowi6_proto = nexthdr;
3463 return;
3464 #endif
3465 default:
3466 fl6->flowi6_proto = nexthdr;
3467 return;
3468 }
3469 }
3470 }
3471 #endif
3472
3473 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
3474 unsigned int family, int reverse)
3475 {
3476 switch (family) {
3477 case AF_INET:
3478 decode_session4(skb, fl, reverse);
3479 break;
3480 #if IS_ENABLED(CONFIG_IPV6)
3481 case AF_INET6:
3482 decode_session6(skb, fl, reverse);
3483 break;
3484 #endif
3485 default:
3486 return -EAFNOSUPPORT;
3487 }
3488
3489 return security_xfrm_decode_session(skb, &fl->flowi_secid);
3490 }
3491 EXPORT_SYMBOL(__xfrm_decode_session);
3492
3493 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
3494 {
3495 for (; k < sp->len; k++) {
3496 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
3497 *idxp = k;
3498 return 1;
3499 }
3500 }
3501
3502 return 0;
3503 }
3504
3505 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
3506 unsigned short family)
3507 {
3508 struct net *net = dev_net(skb->dev);
3509 struct xfrm_policy *pol;
3510 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
3511 int npols = 0;
3512 int xfrm_nr;
3513 int pi;
3514 int reverse;
3515 struct flowi fl;
3516 int xerr_idx = -1;
3517 const struct xfrm_if_cb *ifcb;
3518 struct sec_path *sp;
3519 struct xfrm_if *xi;
3520 u32 if_id = 0;
3521
3522 rcu_read_lock();
3523 ifcb = xfrm_if_get_cb();
3524
3525 if (ifcb) {
3526 xi = ifcb->decode_session(skb, family);
3527 if (xi) {
3528 if_id = xi->p.if_id;
3529 net = xi->net;
3530 }
3531 }
3532 rcu_read_unlock();
3533
3534 reverse = dir & ~XFRM_POLICY_MASK;
3535 dir &= XFRM_POLICY_MASK;
3536
3537 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
3538 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
3539 return 0;
3540 }
3541
3542 nf_nat_decode_session(skb, &fl, family);
3543
3544
3545 sp = skb_sec_path(skb);
3546 if (sp) {
3547 int i;
3548
3549 for (i = sp->len - 1; i >= 0; i--) {
3550 struct xfrm_state *x = sp->xvec[i];
3551 if (!xfrm_selector_match(&x->sel, &fl, family)) {
3552 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
3553 return 0;
3554 }
3555 }
3556 }
3557
3558 pol = NULL;
3559 sk = sk_to_full_sk(sk);
3560 if (sk && sk->sk_policy[dir]) {
3561 pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
3562 if (IS_ERR(pol)) {
3563 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
3564 return 0;
3565 }
3566 }
3567
3568 if (!pol)
3569 pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
3570
3571 if (IS_ERR(pol)) {
3572 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
3573 return 0;
3574 }
3575
3576 if (!pol) {
3577 if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
3578 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
3579 return 0;
3580 }
3581
3582 if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
3583 xfrm_secpath_reject(xerr_idx, skb, &fl);
3584 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
3585 return 0;
3586 }
3587 return 1;
3588 }
3589
3590 pol->curlft.use_time = ktime_get_real_seconds();
3591
3592 pols[0] = pol;
3593 npols++;
3594 #ifdef CONFIG_XFRM_SUB_POLICY
3595 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
3596 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
3597 &fl, family,
3598 XFRM_POLICY_IN, if_id);
3599 if (pols[1]) {
3600 if (IS_ERR(pols[1])) {
3601 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
3602 xfrm_pol_put(pols[0]);
3603 return 0;
3604 }
3605 pols[1]->curlft.use_time = ktime_get_real_seconds();
3606 npols++;
3607 }
3608 }
3609 #endif
3610
3611 if (pol->action == XFRM_POLICY_ALLOW) {
3612 static struct sec_path dummy;
3613 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
3614 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
3615 struct xfrm_tmpl **tpp = tp;
3616 int ti = 0;
3617 int i, k;
3618
3619 sp = skb_sec_path(skb);
3620 if (!sp)
3621 sp = &dummy;
3622
3623 for (pi = 0; pi < npols; pi++) {
3624 if (pols[pi] != pol &&
3625 pols[pi]->action != XFRM_POLICY_ALLOW) {
3626 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
3627 goto reject;
3628 }
3629 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
3630 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
3631 goto reject_error;
3632 }
3633 for (i = 0; i < pols[pi]->xfrm_nr; i++)
3634 tpp[ti++] = &pols[pi]->xfrm_vec[i];
3635 }
3636 xfrm_nr = ti;
3637
3638 if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK &&
3639 !xfrm_nr) {
3640 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
3641 goto reject;
3642 }
3643
3644 if (npols > 1) {
3645 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
3646 tpp = stp;
3647 }
3648
3649
3650
3651
3652
3653
3654
3655 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
3656 k = xfrm_policy_ok(tpp[i], sp, k, family);
3657 if (k < 0) {
3658 if (k < -1)
3659
3660 xerr_idx = -(2+k);
3661 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
3662 goto reject;
3663 }
3664 }
3665
3666 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
3667 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
3668 goto reject;
3669 }
3670
3671 xfrm_pols_put(pols, npols);
3672 return 1;
3673 }
3674 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
3675
3676 reject:
3677 xfrm_secpath_reject(xerr_idx, skb, &fl);
3678 reject_error:
3679 xfrm_pols_put(pols, npols);
3680 return 0;
3681 }
3682 EXPORT_SYMBOL(__xfrm_policy_check);
3683
3684 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
3685 {
3686 struct net *net = dev_net(skb->dev);
3687 struct flowi fl;
3688 struct dst_entry *dst;
3689 int res = 1;
3690
3691 if (xfrm_decode_session(skb, &fl, family) < 0) {
3692 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
3693 return 0;
3694 }
3695
3696 skb_dst_force(skb);
3697 if (!skb_dst(skb)) {
3698 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
3699 return 0;
3700 }
3701
3702 dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
3703 if (IS_ERR(dst)) {
3704 res = 0;
3705 dst = NULL;
3706 }
3707 skb_dst_set(skb, dst);
3708 return res;
3709 }
3710 EXPORT_SYMBOL(__xfrm_route_forward);
3711
3712
3713
3714 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
3715 {
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737 if (dst->obsolete < 0 && !stale_bundle(dst))
3738 return dst;
3739
3740 return NULL;
3741 }
3742
3743 static int stale_bundle(struct dst_entry *dst)
3744 {
3745 return !xfrm_bundle_ok((struct xfrm_dst *)dst);
3746 }
3747
3748 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
3749 {
3750 while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
3751 dst->dev = blackhole_netdev;
3752 dev_hold(dst->dev);
3753 dev_put(dev);
3754 }
3755 }
3756 EXPORT_SYMBOL(xfrm_dst_ifdown);
3757
3758 static void xfrm_link_failure(struct sk_buff *skb)
3759 {
3760
3761 }
3762
3763 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
3764 {
3765 if (dst) {
3766 if (dst->obsolete) {
3767 dst_release(dst);
3768 dst = NULL;
3769 }
3770 }
3771 return dst;
3772 }
3773
3774 static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
3775 {
3776 while (nr--) {
3777 struct xfrm_dst *xdst = bundle[nr];
3778 u32 pmtu, route_mtu_cached;
3779 struct dst_entry *dst;
3780
3781 dst = &xdst->u.dst;
3782 pmtu = dst_mtu(xfrm_dst_child(dst));
3783 xdst->child_mtu_cached = pmtu;
3784
3785 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
3786
3787 route_mtu_cached = dst_mtu(xdst->route);
3788 xdst->route_mtu_cached = route_mtu_cached;
3789
3790 if (pmtu > route_mtu_cached)
3791 pmtu = route_mtu_cached;
3792
3793 dst_metric_set(dst, RTAX_MTU, pmtu);
3794 }
3795 }
3796
3797
3798
3799
3800
3801 static int xfrm_bundle_ok(struct xfrm_dst *first)
3802 {
3803 struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
3804 struct dst_entry *dst = &first->u.dst;
3805 struct xfrm_dst *xdst;
3806 int start_from, nr;
3807 u32 mtu;
3808
3809 if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
3810 (dst->dev && !netif_running(dst->dev)))
3811 return 0;
3812
3813 if (dst->flags & DST_XFRM_QUEUE)
3814 return 1;
3815
3816 start_from = nr = 0;
3817 do {
3818 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
3819
3820 if (dst->xfrm->km.state != XFRM_STATE_VALID)
3821 return 0;
3822 if (xdst->xfrm_genid != dst->xfrm->genid)
3823 return 0;
3824 if (xdst->num_pols > 0 &&
3825 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
3826 return 0;
3827
3828 bundle[nr++] = xdst;
3829
3830 mtu = dst_mtu(xfrm_dst_child(dst));
3831 if (xdst->child_mtu_cached != mtu) {
3832 start_from = nr;
3833 xdst->child_mtu_cached = mtu;
3834 }
3835
3836 if (!dst_check(xdst->route, xdst->route_cookie))
3837 return 0;
3838 mtu = dst_mtu(xdst->route);
3839 if (xdst->route_mtu_cached != mtu) {
3840 start_from = nr;
3841 xdst->route_mtu_cached = mtu;
3842 }
3843
3844 dst = xfrm_dst_child(dst);
3845 } while (dst->xfrm);
3846
3847 if (likely(!start_from))
3848 return 1;
3849
3850 xdst = bundle[start_from - 1];
3851 mtu = xdst->child_mtu_cached;
3852 while (start_from--) {
3853 dst = &xdst->u.dst;
3854
3855 mtu = xfrm_state_mtu(dst->xfrm, mtu);
3856 if (mtu > xdst->route_mtu_cached)
3857 mtu = xdst->route_mtu_cached;
3858 dst_metric_set(dst, RTAX_MTU, mtu);
3859 if (!start_from)
3860 break;
3861
3862 xdst = bundle[start_from - 1];
3863 xdst->child_mtu_cached = mtu;
3864 }
3865
3866 return 1;
3867 }
3868
3869 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
3870 {
3871 return dst_metric_advmss(xfrm_dst_path(dst));
3872 }
3873
3874 static unsigned int xfrm_mtu(const struct dst_entry *dst)
3875 {
3876 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
3877
3878 return mtu ? : dst_mtu(xfrm_dst_path(dst));
3879 }
3880
3881 static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
3882 const void *daddr)
3883 {
3884 while (dst->xfrm) {
3885 const struct xfrm_state *xfrm = dst->xfrm;
3886
3887 dst = xfrm_dst_child(dst);
3888
3889 if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
3890 continue;
3891 if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
3892 daddr = xfrm->coaddr;
3893 else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
3894 daddr = &xfrm->id.daddr;
3895 }
3896 return daddr;
3897 }
3898
3899 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
3900 struct sk_buff *skb,
3901 const void *daddr)
3902 {
3903 const struct dst_entry *path = xfrm_dst_path(dst);
3904
3905 if (!skb)
3906 daddr = xfrm_get_dst_nexthop(dst, daddr);
3907 return path->ops->neigh_lookup(path, skb, daddr);
3908 }
3909
3910 static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
3911 {
3912 const struct dst_entry *path = xfrm_dst_path(dst);
3913
3914 daddr = xfrm_get_dst_nexthop(dst, daddr);
3915 path->ops->confirm_neigh(path, daddr);
3916 }
3917
3918 int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family)
3919 {
3920 int err = 0;
3921
3922 if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo)))
3923 return -EAFNOSUPPORT;
3924
3925 spin_lock(&xfrm_policy_afinfo_lock);
3926 if (unlikely(xfrm_policy_afinfo[family] != NULL))
3927 err = -EEXIST;
3928 else {
3929 struct dst_ops *dst_ops = afinfo->dst_ops;
3930 if (likely(dst_ops->kmem_cachep == NULL))
3931 dst_ops->kmem_cachep = xfrm_dst_cache;
3932 if (likely(dst_ops->check == NULL))
3933 dst_ops->check = xfrm_dst_check;
3934 if (likely(dst_ops->default_advmss == NULL))
3935 dst_ops->default_advmss = xfrm_default_advmss;
3936 if (likely(dst_ops->mtu == NULL))
3937 dst_ops->mtu = xfrm_mtu;
3938 if (likely(dst_ops->negative_advice == NULL))
3939 dst_ops->negative_advice = xfrm_negative_advice;
3940 if (likely(dst_ops->link_failure == NULL))
3941 dst_ops->link_failure = xfrm_link_failure;
3942 if (likely(dst_ops->neigh_lookup == NULL))
3943 dst_ops->neigh_lookup = xfrm_neigh_lookup;
3944 if (likely(!dst_ops->confirm_neigh))
3945 dst_ops->confirm_neigh = xfrm_confirm_neigh;
3946 rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo);
3947 }
3948 spin_unlock(&xfrm_policy_afinfo_lock);
3949
3950 return err;
3951 }
3952 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
3953
3954 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
3955 {
3956 struct dst_ops *dst_ops = afinfo->dst_ops;
3957 int i;
3958
3959 for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) {
3960 if (xfrm_policy_afinfo[i] != afinfo)
3961 continue;
3962 RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL);
3963 break;
3964 }
3965
3966 synchronize_rcu();
3967
3968 dst_ops->kmem_cachep = NULL;
3969 dst_ops->check = NULL;
3970 dst_ops->negative_advice = NULL;
3971 dst_ops->link_failure = NULL;
3972 }
3973 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
3974
3975 void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
3976 {
3977 spin_lock(&xfrm_if_cb_lock);
3978 rcu_assign_pointer(xfrm_if_cb, ifcb);
3979 spin_unlock(&xfrm_if_cb_lock);
3980 }
3981 EXPORT_SYMBOL(xfrm_if_register_cb);
3982
3983 void xfrm_if_unregister_cb(void)
3984 {
3985 RCU_INIT_POINTER(xfrm_if_cb, NULL);
3986 synchronize_rcu();
3987 }
3988 EXPORT_SYMBOL(xfrm_if_unregister_cb);
3989
3990 #ifdef CONFIG_XFRM_STATISTICS
3991 static int __net_init xfrm_statistics_init(struct net *net)
3992 {
3993 int rv;
3994 net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
3995 if (!net->mib.xfrm_statistics)
3996 return -ENOMEM;
3997 rv = xfrm_proc_init(net);
3998 if (rv < 0)
3999 free_percpu(net->mib.xfrm_statistics);
4000 return rv;
4001 }
4002
4003 static void xfrm_statistics_fini(struct net *net)
4004 {
4005 xfrm_proc_fini(net);
4006 free_percpu(net->mib.xfrm_statistics);
4007 }
4008 #else
4009 static int __net_init xfrm_statistics_init(struct net *net)
4010 {
4011 return 0;
4012 }
4013
4014 static void xfrm_statistics_fini(struct net *net)
4015 {
4016 }
4017 #endif
4018
4019 static int __net_init xfrm_policy_init(struct net *net)
4020 {
4021 unsigned int hmask, sz;
4022 int dir, err;
4023
4024 if (net_eq(net, &init_net)) {
4025 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
4026 sizeof(struct xfrm_dst),
4027 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4028 NULL);
4029 err = rhashtable_init(&xfrm_policy_inexact_table,
4030 &xfrm_pol_inexact_params);
4031 BUG_ON(err);
4032 }
4033
4034 hmask = 8 - 1;
4035 sz = (hmask+1) * sizeof(struct hlist_head);
4036
4037 net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
4038 if (!net->xfrm.policy_byidx)
4039 goto out_byidx;
4040 net->xfrm.policy_idx_hmask = hmask;
4041
4042 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
4043 struct xfrm_policy_hash *htab;
4044
4045 net->xfrm.policy_count[dir] = 0;
4046 net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
4047 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
4048
4049 htab = &net->xfrm.policy_bydst[dir];
4050 htab->table = xfrm_hash_alloc(sz);
4051 if (!htab->table)
4052 goto out_bydst;
4053 htab->hmask = hmask;
4054 htab->dbits4 = 32;
4055 htab->sbits4 = 32;
4056 htab->dbits6 = 128;
4057 htab->sbits6 = 128;
4058 }
4059 net->xfrm.policy_hthresh.lbits4 = 32;
4060 net->xfrm.policy_hthresh.rbits4 = 32;
4061 net->xfrm.policy_hthresh.lbits6 = 128;
4062 net->xfrm.policy_hthresh.rbits6 = 128;
4063
4064 seqlock_init(&net->xfrm.policy_hthresh.lock);
4065
4066 INIT_LIST_HEAD(&net->xfrm.policy_all);
4067 INIT_LIST_HEAD(&net->xfrm.inexact_bins);
4068 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
4069 INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
4070 return 0;
4071
4072 out_bydst:
4073 for (dir--; dir >= 0; dir--) {
4074 struct xfrm_policy_hash *htab;
4075
4076 htab = &net->xfrm.policy_bydst[dir];
4077 xfrm_hash_free(htab->table, sz);
4078 }
4079 xfrm_hash_free(net->xfrm.policy_byidx, sz);
4080 out_byidx:
4081 return -ENOMEM;
4082 }
4083
4084 static void xfrm_policy_fini(struct net *net)
4085 {
4086 struct xfrm_pol_inexact_bin *b, *t;
4087 unsigned int sz;
4088 int dir;
4089
4090 flush_work(&net->xfrm.policy_hash_work);
4091 #ifdef CONFIG_XFRM_SUB_POLICY
4092 xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
4093 #endif
4094 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
4095
4096 WARN_ON(!list_empty(&net->xfrm.policy_all));
4097
4098 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
4099 struct xfrm_policy_hash *htab;
4100
4101 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
4102
4103 htab = &net->xfrm.policy_bydst[dir];
4104 sz = (htab->hmask + 1) * sizeof(struct hlist_head);
4105 WARN_ON(!hlist_empty(htab->table));
4106 xfrm_hash_free(htab->table, sz);
4107 }
4108
4109 sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
4110 WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
4111 xfrm_hash_free(net->xfrm.policy_byidx, sz);
4112
4113 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
4114 list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
4115 __xfrm_policy_inexact_prune_bin(b, true);
4116 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
4117 }
4118
4119 static int __net_init xfrm_net_init(struct net *net)
4120 {
4121 int rv;
4122
4123
4124 spin_lock_init(&net->xfrm.xfrm_state_lock);
4125 spin_lock_init(&net->xfrm.xfrm_policy_lock);
4126 seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
4127 mutex_init(&net->xfrm.xfrm_cfg_mutex);
4128 net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
4129 net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
4130 net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
4131
4132 rv = xfrm_statistics_init(net);
4133 if (rv < 0)
4134 goto out_statistics;
4135 rv = xfrm_state_init(net);
4136 if (rv < 0)
4137 goto out_state;
4138 rv = xfrm_policy_init(net);
4139 if (rv < 0)
4140 goto out_policy;
4141 rv = xfrm_sysctl_init(net);
4142 if (rv < 0)
4143 goto out_sysctl;
4144
4145 return 0;
4146
4147 out_sysctl:
4148 xfrm_policy_fini(net);
4149 out_policy:
4150 xfrm_state_fini(net);
4151 out_state:
4152 xfrm_statistics_fini(net);
4153 out_statistics:
4154 return rv;
4155 }
4156
4157 static void __net_exit xfrm_net_exit(struct net *net)
4158 {
4159 xfrm_sysctl_fini(net);
4160 xfrm_policy_fini(net);
4161 xfrm_state_fini(net);
4162 xfrm_statistics_fini(net);
4163 }
4164
4165 static struct pernet_operations __net_initdata xfrm_net_ops = {
4166 .init = xfrm_net_init,
4167 .exit = xfrm_net_exit,
4168 };
4169
4170 void __init xfrm_init(void)
4171 {
4172 register_pernet_subsys(&xfrm_net_ops);
4173 xfrm_dev_init();
4174 xfrm_input_init();
4175
4176 #ifdef CONFIG_XFRM_ESPINTCP
4177 espintcp_init();
4178 #endif
4179 }
4180
4181 #ifdef CONFIG_AUDITSYSCALL
4182 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
4183 struct audit_buffer *audit_buf)
4184 {
4185 struct xfrm_sec_ctx *ctx = xp->security;
4186 struct xfrm_selector *sel = &xp->selector;
4187
4188 if (ctx)
4189 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
4190 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
4191
4192 switch (sel->family) {
4193 case AF_INET:
4194 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
4195 if (sel->prefixlen_s != 32)
4196 audit_log_format(audit_buf, " src_prefixlen=%d",
4197 sel->prefixlen_s);
4198 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
4199 if (sel->prefixlen_d != 32)
4200 audit_log_format(audit_buf, " dst_prefixlen=%d",
4201 sel->prefixlen_d);
4202 break;
4203 case AF_INET6:
4204 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
4205 if (sel->prefixlen_s != 128)
4206 audit_log_format(audit_buf, " src_prefixlen=%d",
4207 sel->prefixlen_s);
4208 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
4209 if (sel->prefixlen_d != 128)
4210 audit_log_format(audit_buf, " dst_prefixlen=%d",
4211 sel->prefixlen_d);
4212 break;
4213 }
4214 }
4215
4216 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
4217 {
4218 struct audit_buffer *audit_buf;
4219
4220 audit_buf = xfrm_audit_start("SPD-add");
4221 if (audit_buf == NULL)
4222 return;
4223 xfrm_audit_helper_usrinfo(task_valid, audit_buf);
4224 audit_log_format(audit_buf, " res=%u", result);
4225 xfrm_audit_common_policyinfo(xp, audit_buf);
4226 audit_log_end(audit_buf);
4227 }
4228 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
4229
4230 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
4231 bool task_valid)
4232 {
4233 struct audit_buffer *audit_buf;
4234
4235 audit_buf = xfrm_audit_start("SPD-delete");
4236 if (audit_buf == NULL)
4237 return;
4238 xfrm_audit_helper_usrinfo(task_valid, audit_buf);
4239 audit_log_format(audit_buf, " res=%u", result);
4240 xfrm_audit_common_policyinfo(xp, audit_buf);
4241 audit_log_end(audit_buf);
4242 }
4243 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
4244 #endif
4245
4246 #ifdef CONFIG_XFRM_MIGRATE
4247 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
4248 const struct xfrm_selector *sel_tgt)
4249 {
4250 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
4251 if (sel_tgt->family == sel_cmp->family &&
4252 xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
4253 sel_cmp->family) &&
4254 xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
4255 sel_cmp->family) &&
4256 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
4257 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
4258 return true;
4259 }
4260 } else {
4261 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
4262 return true;
4263 }
4264 }
4265 return false;
4266 }
4267
4268 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
4269 u8 dir, u8 type, struct net *net, u32 if_id)
4270 {
4271 struct xfrm_policy *pol, *ret = NULL;
4272 struct hlist_head *chain;
4273 u32 priority = ~0U;
4274
4275 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
4276 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
4277 hlist_for_each_entry(pol, chain, bydst) {
4278 if ((if_id == 0 || pol->if_id == if_id) &&
4279 xfrm_migrate_selector_match(sel, &pol->selector) &&
4280 pol->type == type) {
4281 ret = pol;
4282 priority = ret->priority;
4283 break;
4284 }
4285 }
4286 chain = &net->xfrm.policy_inexact[dir];
4287 hlist_for_each_entry(pol, chain, bydst_inexact_list) {
4288 if ((pol->priority >= priority) && ret)
4289 break;
4290
4291 if ((if_id == 0 || pol->if_id == if_id) &&
4292 xfrm_migrate_selector_match(sel, &pol->selector) &&
4293 pol->type == type) {
4294 ret = pol;
4295 break;
4296 }
4297 }
4298
4299 xfrm_pol_hold(ret);
4300
4301 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
4302
4303 return ret;
4304 }
4305
4306 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
4307 {
4308 int match = 0;
4309
4310 if (t->mode == m->mode && t->id.proto == m->proto &&
4311 (m->reqid == 0 || t->reqid == m->reqid)) {
4312 switch (t->mode) {
4313 case XFRM_MODE_TUNNEL:
4314 case XFRM_MODE_BEET:
4315 if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
4316 m->old_family) &&
4317 xfrm_addr_equal(&t->saddr, &m->old_saddr,
4318 m->old_family)) {
4319 match = 1;
4320 }
4321 break;
4322 case XFRM_MODE_TRANSPORT:
4323
4324
4325
4326 match = 1;
4327 break;
4328 default:
4329 break;
4330 }
4331 }
4332 return match;
4333 }
4334
4335
4336 static int xfrm_policy_migrate(struct xfrm_policy *pol,
4337 struct xfrm_migrate *m, int num_migrate)
4338 {
4339 struct xfrm_migrate *mp;
4340 int i, j, n = 0;
4341
4342 write_lock_bh(&pol->lock);
4343 if (unlikely(pol->walk.dead)) {
4344
4345 write_unlock_bh(&pol->lock);
4346 return -ENOENT;
4347 }
4348
4349 for (i = 0; i < pol->xfrm_nr; i++) {
4350 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
4351 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
4352 continue;
4353 n++;
4354 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
4355 pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
4356 continue;
4357
4358 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
4359 sizeof(pol->xfrm_vec[i].id.daddr));
4360 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
4361 sizeof(pol->xfrm_vec[i].saddr));
4362 pol->xfrm_vec[i].encap_family = mp->new_family;
4363
4364 atomic_inc(&pol->genid);
4365 }
4366 }
4367
4368 write_unlock_bh(&pol->lock);
4369
4370 if (!n)
4371 return -ENODATA;
4372
4373 return 0;
4374 }
4375
4376 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
4377 {
4378 int i, j;
4379
4380 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
4381 return -EINVAL;
4382
4383 for (i = 0; i < num_migrate; i++) {
4384 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
4385 xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
4386 return -EINVAL;
4387
4388
4389 for (j = i + 1; j < num_migrate; j++) {
4390 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
4391 sizeof(m[i].old_daddr)) &&
4392 !memcmp(&m[i].old_saddr, &m[j].old_saddr,
4393 sizeof(m[i].old_saddr)) &&
4394 m[i].proto == m[j].proto &&
4395 m[i].mode == m[j].mode &&
4396 m[i].reqid == m[j].reqid &&
4397 m[i].old_family == m[j].old_family)
4398 return -EINVAL;
4399 }
4400 }
4401
4402 return 0;
4403 }
4404
4405 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
4406 struct xfrm_migrate *m, int num_migrate,
4407 struct xfrm_kmaddress *k, struct net *net,
4408 struct xfrm_encap_tmpl *encap, u32 if_id)
4409 {
4410 int i, err, nx_cur = 0, nx_new = 0;
4411 struct xfrm_policy *pol = NULL;
4412 struct xfrm_state *x, *xc;
4413 struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
4414 struct xfrm_state *x_new[XFRM_MAX_DEPTH];
4415 struct xfrm_migrate *mp;
4416
4417
4418 if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
4419 goto out;
4420
4421 if (dir >= XFRM_POLICY_MAX) {
4422 err = -EINVAL;
4423 goto out;
4424 }
4425
4426
4427 if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
4428 err = -ENOENT;
4429 goto out;
4430 }
4431
4432
4433 for (i = 0, mp = m; i < num_migrate; i++, mp++) {
4434 if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
4435 x_cur[nx_cur] = x;
4436 nx_cur++;
4437 xc = xfrm_state_migrate(x, mp, encap);
4438 if (xc) {
4439 x_new[nx_new] = xc;
4440 nx_new++;
4441 } else {
4442 err = -ENODATA;
4443 goto restore_state;
4444 }
4445 }
4446 }
4447
4448
4449 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
4450 goto restore_state;
4451
4452
4453 if (nx_cur) {
4454 xfrm_states_put(x_cur, nx_cur);
4455 xfrm_states_delete(x_cur, nx_cur);
4456 }
4457
4458
4459 km_migrate(sel, dir, type, m, num_migrate, k, encap);
4460
4461 xfrm_pol_put(pol);
4462
4463 return 0;
4464 out:
4465 return err;
4466
4467 restore_state:
4468 if (pol)
4469 xfrm_pol_put(pol);
4470 if (nx_cur)
4471 xfrm_states_put(x_cur, nx_cur);
4472 if (nx_new)
4473 xfrm_states_delete(x_new, nx_new);
4474
4475 return err;
4476 }
4477 EXPORT_SYMBOL(xfrm_migrate);
4478 #endif