0001
0002
0003
0004
0005
0006 #include "flow.h"
0007 #include "datapath.h"
0008 #include "flow_netlink.h"
0009 #include <linux/uaccess.h>
0010 #include <linux/netdevice.h>
0011 #include <linux/etherdevice.h>
0012 #include <linux/if_ether.h>
0013 #include <linux/if_vlan.h>
0014 #include <net/llc_pdu.h>
0015 #include <linux/kernel.h>
0016 #include <linux/jhash.h>
0017 #include <linux/jiffies.h>
0018 #include <linux/llc.h>
0019 #include <linux/module.h>
0020 #include <linux/in.h>
0021 #include <linux/rcupdate.h>
0022 #include <linux/cpumask.h>
0023 #include <linux/if_arp.h>
0024 #include <linux/ip.h>
0025 #include <linux/ipv6.h>
0026 #include <linux/sctp.h>
0027 #include <linux/tcp.h>
0028 #include <linux/udp.h>
0029 #include <linux/icmp.h>
0030 #include <linux/icmpv6.h>
0031 #include <linux/rculist.h>
0032 #include <linux/sort.h>
0033 #include <net/ip.h>
0034 #include <net/ipv6.h>
0035 #include <net/ndisc.h>
0036
0037 #define TBL_MIN_BUCKETS 1024
0038 #define MASK_ARRAY_SIZE_MIN 16
0039 #define REHASH_INTERVAL (10 * 60 * HZ)
0040
0041 #define MC_DEFAULT_HASH_ENTRIES 256
0042 #define MC_HASH_SHIFT 8
0043 #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
0044
0045 static struct kmem_cache *flow_cache;
0046 struct kmem_cache *flow_stats_cache __read_mostly;
0047
0048 static u16 range_n_bytes(const struct sw_flow_key_range *range)
0049 {
0050 return range->end - range->start;
0051 }
0052
0053 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
0054 bool full, const struct sw_flow_mask *mask)
0055 {
0056 int start = full ? 0 : mask->range.start;
0057 int len = full ? sizeof *dst : range_n_bytes(&mask->range);
0058 const long *m = (const long *)((const u8 *)&mask->key + start);
0059 const long *s = (const long *)((const u8 *)src + start);
0060 long *d = (long *)((u8 *)dst + start);
0061 int i;
0062
0063
0064
0065
0066
0067
0068 for (i = 0; i < len; i += sizeof(long))
0069 *d++ = *s++ & *m++;
0070 }
0071
0072 struct sw_flow *ovs_flow_alloc(void)
0073 {
0074 struct sw_flow *flow;
0075 struct sw_flow_stats *stats;
0076
0077 flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
0078 if (!flow)
0079 return ERR_PTR(-ENOMEM);
0080
0081 flow->stats_last_writer = -1;
0082
0083
0084 stats = kmem_cache_alloc_node(flow_stats_cache,
0085 GFP_KERNEL | __GFP_ZERO,
0086 node_online(0) ? 0 : NUMA_NO_NODE);
0087 if (!stats)
0088 goto err;
0089
0090 spin_lock_init(&stats->lock);
0091
0092 RCU_INIT_POINTER(flow->stats[0], stats);
0093
0094 cpumask_set_cpu(0, &flow->cpu_used_mask);
0095
0096 return flow;
0097 err:
0098 kmem_cache_free(flow_cache, flow);
0099 return ERR_PTR(-ENOMEM);
0100 }
0101
0102 int ovs_flow_tbl_count(const struct flow_table *table)
0103 {
0104 return table->count;
0105 }
0106
0107 static void flow_free(struct sw_flow *flow)
0108 {
0109 int cpu;
0110
0111 if (ovs_identifier_is_key(&flow->id))
0112 kfree(flow->id.unmasked_key);
0113 if (flow->sf_acts)
0114 ovs_nla_free_flow_actions((struct sw_flow_actions __force *)
0115 flow->sf_acts);
0116
0117 for (cpu = 0; cpu < nr_cpu_ids;
0118 cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
0119 if (flow->stats[cpu])
0120 kmem_cache_free(flow_stats_cache,
0121 (struct sw_flow_stats __force *)flow->stats[cpu]);
0122 }
0123
0124 kmem_cache_free(flow_cache, flow);
0125 }
0126
0127 static void rcu_free_flow_callback(struct rcu_head *rcu)
0128 {
0129 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
0130
0131 flow_free(flow);
0132 }
0133
0134 void ovs_flow_free(struct sw_flow *flow, bool deferred)
0135 {
0136 if (!flow)
0137 return;
0138
0139 if (deferred)
0140 call_rcu(&flow->rcu, rcu_free_flow_callback);
0141 else
0142 flow_free(flow);
0143 }
0144
0145 static void __table_instance_destroy(struct table_instance *ti)
0146 {
0147 kvfree(ti->buckets);
0148 kfree(ti);
0149 }
0150
0151 static struct table_instance *table_instance_alloc(int new_size)
0152 {
0153 struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
0154 int i;
0155
0156 if (!ti)
0157 return NULL;
0158
0159 ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
0160 GFP_KERNEL);
0161 if (!ti->buckets) {
0162 kfree(ti);
0163 return NULL;
0164 }
0165
0166 for (i = 0; i < new_size; i++)
0167 INIT_HLIST_HEAD(&ti->buckets[i]);
0168
0169 ti->n_buckets = new_size;
0170 ti->node_ver = 0;
0171 get_random_bytes(&ti->hash_seed, sizeof(u32));
0172
0173 return ti;
0174 }
0175
0176 static void __mask_array_destroy(struct mask_array *ma)
0177 {
0178 free_percpu(ma->masks_usage_stats);
0179 kfree(ma);
0180 }
0181
0182 static void mask_array_rcu_cb(struct rcu_head *rcu)
0183 {
0184 struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
0185
0186 __mask_array_destroy(ma);
0187 }
0188
0189 static void tbl_mask_array_reset_counters(struct mask_array *ma)
0190 {
0191 int i, cpu;
0192
0193
0194
0195
0196
0197
0198 for (i = 0; i < ma->max; i++) {
0199 ma->masks_usage_zero_cntr[i] = 0;
0200
0201 for_each_possible_cpu(cpu) {
0202 struct mask_array_stats *stats;
0203 unsigned int start;
0204 u64 counter;
0205
0206 stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
0207 do {
0208 start = u64_stats_fetch_begin_irq(&stats->syncp);
0209 counter = stats->usage_cntrs[i];
0210 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
0211
0212 ma->masks_usage_zero_cntr[i] += counter;
0213 }
0214 }
0215 }
0216
0217 static struct mask_array *tbl_mask_array_alloc(int size)
0218 {
0219 struct mask_array *new;
0220
0221 size = max(MASK_ARRAY_SIZE_MIN, size);
0222 new = kzalloc(sizeof(struct mask_array) +
0223 sizeof(struct sw_flow_mask *) * size +
0224 sizeof(u64) * size, GFP_KERNEL);
0225 if (!new)
0226 return NULL;
0227
0228 new->masks_usage_zero_cntr = (u64 *)((u8 *)new +
0229 sizeof(struct mask_array) +
0230 sizeof(struct sw_flow_mask *) *
0231 size);
0232
0233 new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
0234 sizeof(u64) * size,
0235 __alignof__(u64));
0236 if (!new->masks_usage_stats) {
0237 kfree(new);
0238 return NULL;
0239 }
0240
0241 new->count = 0;
0242 new->max = size;
0243
0244 return new;
0245 }
0246
0247 static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
0248 {
0249 struct mask_array *old;
0250 struct mask_array *new;
0251
0252 new = tbl_mask_array_alloc(size);
0253 if (!new)
0254 return -ENOMEM;
0255
0256 old = ovsl_dereference(tbl->mask_array);
0257 if (old) {
0258 int i;
0259
0260 for (i = 0; i < old->max; i++) {
0261 if (ovsl_dereference(old->masks[i]))
0262 new->masks[new->count++] = old->masks[i];
0263 }
0264 call_rcu(&old->rcu, mask_array_rcu_cb);
0265 }
0266
0267 rcu_assign_pointer(tbl->mask_array, new);
0268
0269 return 0;
0270 }
0271
0272 static int tbl_mask_array_add_mask(struct flow_table *tbl,
0273 struct sw_flow_mask *new)
0274 {
0275 struct mask_array *ma = ovsl_dereference(tbl->mask_array);
0276 int err, ma_count = READ_ONCE(ma->count);
0277
0278 if (ma_count >= ma->max) {
0279 err = tbl_mask_array_realloc(tbl, ma->max +
0280 MASK_ARRAY_SIZE_MIN);
0281 if (err)
0282 return err;
0283
0284 ma = ovsl_dereference(tbl->mask_array);
0285 } else {
0286
0287
0288
0289 tbl_mask_array_reset_counters(ma);
0290 }
0291
0292 BUG_ON(ovsl_dereference(ma->masks[ma_count]));
0293
0294 rcu_assign_pointer(ma->masks[ma_count], new);
0295 WRITE_ONCE(ma->count, ma_count + 1);
0296
0297 return 0;
0298 }
0299
0300 static void tbl_mask_array_del_mask(struct flow_table *tbl,
0301 struct sw_flow_mask *mask)
0302 {
0303 struct mask_array *ma = ovsl_dereference(tbl->mask_array);
0304 int i, ma_count = READ_ONCE(ma->count);
0305
0306
0307 for (i = 0; i < ma_count; i++) {
0308 if (mask == ovsl_dereference(ma->masks[i]))
0309 goto found;
0310 }
0311
0312 BUG();
0313 return;
0314
0315 found:
0316 WRITE_ONCE(ma->count, ma_count - 1);
0317
0318 rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]);
0319 RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL);
0320
0321 kfree_rcu(mask, rcu);
0322
0323
0324 if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
0325 ma_count <= (ma->max / 3))
0326 tbl_mask_array_realloc(tbl, ma->max / 2);
0327 else
0328 tbl_mask_array_reset_counters(ma);
0329
0330 }
0331
0332
0333 static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
0334 {
0335 if (mask) {
0336
0337
0338
0339 ASSERT_OVSL();
0340 BUG_ON(!mask->ref_count);
0341 mask->ref_count--;
0342
0343 if (!mask->ref_count)
0344 tbl_mask_array_del_mask(tbl, mask);
0345 }
0346 }
0347
0348 static void __mask_cache_destroy(struct mask_cache *mc)
0349 {
0350 free_percpu(mc->mask_cache);
0351 kfree(mc);
0352 }
0353
0354 static void mask_cache_rcu_cb(struct rcu_head *rcu)
0355 {
0356 struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
0357
0358 __mask_cache_destroy(mc);
0359 }
0360
0361 static struct mask_cache *tbl_mask_cache_alloc(u32 size)
0362 {
0363 struct mask_cache_entry __percpu *cache = NULL;
0364 struct mask_cache *new;
0365
0366
0367
0368
0369 if ((!is_power_of_2(size) && size != 0) ||
0370 (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
0371 return NULL;
0372
0373 new = kzalloc(sizeof(*new), GFP_KERNEL);
0374 if (!new)
0375 return NULL;
0376
0377 new->cache_size = size;
0378 if (new->cache_size > 0) {
0379 cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
0380 new->cache_size),
0381 __alignof__(struct mask_cache_entry));
0382 if (!cache) {
0383 kfree(new);
0384 return NULL;
0385 }
0386 }
0387
0388 new->mask_cache = cache;
0389 return new;
0390 }
0391 int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
0392 {
0393 struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
0394 struct mask_cache *new;
0395
0396 if (size == mc->cache_size)
0397 return 0;
0398
0399 if ((!is_power_of_2(size) && size != 0) ||
0400 (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
0401 return -EINVAL;
0402
0403 new = tbl_mask_cache_alloc(size);
0404 if (!new)
0405 return -ENOMEM;
0406
0407 rcu_assign_pointer(table->mask_cache, new);
0408 call_rcu(&mc->rcu, mask_cache_rcu_cb);
0409
0410 return 0;
0411 }
0412
0413 int ovs_flow_tbl_init(struct flow_table *table)
0414 {
0415 struct table_instance *ti, *ufid_ti;
0416 struct mask_cache *mc;
0417 struct mask_array *ma;
0418
0419 mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
0420 if (!mc)
0421 return -ENOMEM;
0422
0423 ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
0424 if (!ma)
0425 goto free_mask_cache;
0426
0427 ti = table_instance_alloc(TBL_MIN_BUCKETS);
0428 if (!ti)
0429 goto free_mask_array;
0430
0431 ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
0432 if (!ufid_ti)
0433 goto free_ti;
0434
0435 rcu_assign_pointer(table->ti, ti);
0436 rcu_assign_pointer(table->ufid_ti, ufid_ti);
0437 rcu_assign_pointer(table->mask_array, ma);
0438 rcu_assign_pointer(table->mask_cache, mc);
0439 table->last_rehash = jiffies;
0440 table->count = 0;
0441 table->ufid_count = 0;
0442 return 0;
0443
0444 free_ti:
0445 __table_instance_destroy(ti);
0446 free_mask_array:
0447 __mask_array_destroy(ma);
0448 free_mask_cache:
0449 __mask_cache_destroy(mc);
0450 return -ENOMEM;
0451 }
0452
0453 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
0454 {
0455 struct table_instance *ti;
0456
0457 ti = container_of(rcu, struct table_instance, rcu);
0458 __table_instance_destroy(ti);
0459 }
0460
0461 static void table_instance_flow_free(struct flow_table *table,
0462 struct table_instance *ti,
0463 struct table_instance *ufid_ti,
0464 struct sw_flow *flow)
0465 {
0466 hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
0467 table->count--;
0468
0469 if (ovs_identifier_is_ufid(&flow->id)) {
0470 hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
0471 table->ufid_count--;
0472 }
0473
0474 flow_mask_remove(table, flow->mask);
0475 }
0476
0477
0478 void table_instance_flow_flush(struct flow_table *table,
0479 struct table_instance *ti,
0480 struct table_instance *ufid_ti)
0481 {
0482 int i;
0483
0484 for (i = 0; i < ti->n_buckets; i++) {
0485 struct hlist_head *head = &ti->buckets[i];
0486 struct hlist_node *n;
0487 struct sw_flow *flow;
0488
0489 hlist_for_each_entry_safe(flow, n, head,
0490 flow_table.node[ti->node_ver]) {
0491
0492 table_instance_flow_free(table, ti, ufid_ti,
0493 flow);
0494 ovs_flow_free(flow, true);
0495 }
0496 }
0497
0498 if (WARN_ON(table->count != 0 ||
0499 table->ufid_count != 0)) {
0500 table->count = 0;
0501 table->ufid_count = 0;
0502 }
0503 }
0504
0505 static void table_instance_destroy(struct table_instance *ti,
0506 struct table_instance *ufid_ti)
0507 {
0508 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
0509 call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
0510 }
0511
0512
0513
0514
0515 void ovs_flow_tbl_destroy(struct flow_table *table)
0516 {
0517 struct table_instance *ti = rcu_dereference_raw(table->ti);
0518 struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
0519 struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
0520 struct mask_array *ma = rcu_dereference_raw(table->mask_array);
0521
0522 call_rcu(&mc->rcu, mask_cache_rcu_cb);
0523 call_rcu(&ma->rcu, mask_array_rcu_cb);
0524 table_instance_destroy(ti, ufid_ti);
0525 }
0526
0527 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
0528 u32 *bucket, u32 *last)
0529 {
0530 struct sw_flow *flow;
0531 struct hlist_head *head;
0532 int ver;
0533 int i;
0534
0535 ver = ti->node_ver;
0536 while (*bucket < ti->n_buckets) {
0537 i = 0;
0538 head = &ti->buckets[*bucket];
0539 hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
0540 if (i < *last) {
0541 i++;
0542 continue;
0543 }
0544 *last = i + 1;
0545 return flow;
0546 }
0547 (*bucket)++;
0548 *last = 0;
0549 }
0550
0551 return NULL;
0552 }
0553
0554 static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
0555 {
0556 hash = jhash_1word(hash, ti->hash_seed);
0557 return &ti->buckets[hash & (ti->n_buckets - 1)];
0558 }
0559
0560 static void table_instance_insert(struct table_instance *ti,
0561 struct sw_flow *flow)
0562 {
0563 struct hlist_head *head;
0564
0565 head = find_bucket(ti, flow->flow_table.hash);
0566 hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
0567 }
0568
0569 static void ufid_table_instance_insert(struct table_instance *ti,
0570 struct sw_flow *flow)
0571 {
0572 struct hlist_head *head;
0573
0574 head = find_bucket(ti, flow->ufid_table.hash);
0575 hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
0576 }
0577
0578 static void flow_table_copy_flows(struct table_instance *old,
0579 struct table_instance *new, bool ufid)
0580 {
0581 int old_ver;
0582 int i;
0583
0584 old_ver = old->node_ver;
0585 new->node_ver = !old_ver;
0586
0587
0588 for (i = 0; i < old->n_buckets; i++) {
0589 struct sw_flow *flow;
0590 struct hlist_head *head = &old->buckets[i];
0591
0592 if (ufid)
0593 hlist_for_each_entry_rcu(flow, head,
0594 ufid_table.node[old_ver],
0595 lockdep_ovsl_is_held())
0596 ufid_table_instance_insert(new, flow);
0597 else
0598 hlist_for_each_entry_rcu(flow, head,
0599 flow_table.node[old_ver],
0600 lockdep_ovsl_is_held())
0601 table_instance_insert(new, flow);
0602 }
0603 }
0604
0605 static struct table_instance *table_instance_rehash(struct table_instance *ti,
0606 int n_buckets, bool ufid)
0607 {
0608 struct table_instance *new_ti;
0609
0610 new_ti = table_instance_alloc(n_buckets);
0611 if (!new_ti)
0612 return NULL;
0613
0614 flow_table_copy_flows(ti, new_ti, ufid);
0615
0616 return new_ti;
0617 }
0618
0619 int ovs_flow_tbl_flush(struct flow_table *flow_table)
0620 {
0621 struct table_instance *old_ti, *new_ti;
0622 struct table_instance *old_ufid_ti, *new_ufid_ti;
0623
0624 new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
0625 if (!new_ti)
0626 return -ENOMEM;
0627 new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
0628 if (!new_ufid_ti)
0629 goto err_free_ti;
0630
0631 old_ti = ovsl_dereference(flow_table->ti);
0632 old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
0633
0634 rcu_assign_pointer(flow_table->ti, new_ti);
0635 rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
0636 flow_table->last_rehash = jiffies;
0637
0638 table_instance_flow_flush(flow_table, old_ti, old_ufid_ti);
0639 table_instance_destroy(old_ti, old_ufid_ti);
0640 return 0;
0641
0642 err_free_ti:
0643 __table_instance_destroy(new_ti);
0644 return -ENOMEM;
0645 }
0646
0647 static u32 flow_hash(const struct sw_flow_key *key,
0648 const struct sw_flow_key_range *range)
0649 {
0650 const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
0651
0652
0653 int hash_u32s = range_n_bytes(range) >> 2;
0654
0655 return jhash2(hash_key, hash_u32s, 0);
0656 }
0657
0658 static int flow_key_start(const struct sw_flow_key *key)
0659 {
0660 if (key->tun_proto)
0661 return 0;
0662 else
0663 return rounddown(offsetof(struct sw_flow_key, phy),
0664 sizeof(long));
0665 }
0666
0667 static bool cmp_key(const struct sw_flow_key *key1,
0668 const struct sw_flow_key *key2,
0669 int key_start, int key_end)
0670 {
0671 const long *cp1 = (const long *)((const u8 *)key1 + key_start);
0672 const long *cp2 = (const long *)((const u8 *)key2 + key_start);
0673 int i;
0674
0675 for (i = key_start; i < key_end; i += sizeof(long))
0676 if (*cp1++ ^ *cp2++)
0677 return false;
0678
0679 return true;
0680 }
0681
0682 static bool flow_cmp_masked_key(const struct sw_flow *flow,
0683 const struct sw_flow_key *key,
0684 const struct sw_flow_key_range *range)
0685 {
0686 return cmp_key(&flow->key, key, range->start, range->end);
0687 }
0688
0689 static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
0690 const struct sw_flow_match *match)
0691 {
0692 struct sw_flow_key *key = match->key;
0693 int key_start = flow_key_start(key);
0694 int key_end = match->range.end;
0695
0696 BUG_ON(ovs_identifier_is_ufid(&flow->id));
0697 return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
0698 }
0699
0700 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
0701 const struct sw_flow_key *unmasked,
0702 const struct sw_flow_mask *mask,
0703 u32 *n_mask_hit)
0704 {
0705 struct sw_flow *flow;
0706 struct hlist_head *head;
0707 u32 hash;
0708 struct sw_flow_key masked_key;
0709
0710 ovs_flow_mask_key(&masked_key, unmasked, false, mask);
0711 hash = flow_hash(&masked_key, &mask->range);
0712 head = find_bucket(ti, hash);
0713 (*n_mask_hit)++;
0714
0715 hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
0716 lockdep_ovsl_is_held()) {
0717 if (flow->mask == mask && flow->flow_table.hash == hash &&
0718 flow_cmp_masked_key(flow, &masked_key, &mask->range))
0719 return flow;
0720 }
0721 return NULL;
0722 }
0723
0724
0725
0726
0727
0728
0729 static struct sw_flow *flow_lookup(struct flow_table *tbl,
0730 struct table_instance *ti,
0731 struct mask_array *ma,
0732 const struct sw_flow_key *key,
0733 u32 *n_mask_hit,
0734 u32 *n_cache_hit,
0735 u32 *index)
0736 {
0737 struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
0738 struct sw_flow *flow;
0739 struct sw_flow_mask *mask;
0740 int i;
0741
0742 if (likely(*index < ma->max)) {
0743 mask = rcu_dereference_ovsl(ma->masks[*index]);
0744 if (mask) {
0745 flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
0746 if (flow) {
0747 u64_stats_update_begin(&stats->syncp);
0748 stats->usage_cntrs[*index]++;
0749 u64_stats_update_end(&stats->syncp);
0750 (*n_cache_hit)++;
0751 return flow;
0752 }
0753 }
0754 }
0755
0756 for (i = 0; i < ma->max; i++) {
0757
0758 if (i == *index)
0759 continue;
0760
0761 mask = rcu_dereference_ovsl(ma->masks[i]);
0762 if (unlikely(!mask))
0763 break;
0764
0765 flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
0766 if (flow) {
0767 *index = i;
0768 u64_stats_update_begin(&stats->syncp);
0769 stats->usage_cntrs[*index]++;
0770 u64_stats_update_end(&stats->syncp);
0771 return flow;
0772 }
0773 }
0774
0775 return NULL;
0776 }
0777
0778
0779
0780
0781
0782
0783
0784
0785 struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
0786 const struct sw_flow_key *key,
0787 u32 skb_hash,
0788 u32 *n_mask_hit,
0789 u32 *n_cache_hit)
0790 {
0791 struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
0792 struct mask_array *ma = rcu_dereference(tbl->mask_array);
0793 struct table_instance *ti = rcu_dereference(tbl->ti);
0794 struct mask_cache_entry *entries, *ce;
0795 struct sw_flow *flow;
0796 u32 hash;
0797 int seg;
0798
0799 *n_mask_hit = 0;
0800 *n_cache_hit = 0;
0801 if (unlikely(!skb_hash || mc->cache_size == 0)) {
0802 u32 mask_index = 0;
0803 u32 cache = 0;
0804
0805 return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache,
0806 &mask_index);
0807 }
0808
0809
0810
0811
0812 if (key->recirc_id)
0813 skb_hash = jhash_1word(skb_hash, key->recirc_id);
0814
0815 ce = NULL;
0816 hash = skb_hash;
0817 entries = this_cpu_ptr(mc->mask_cache);
0818
0819
0820 for (seg = 0; seg < MC_HASH_SEGS; seg++) {
0821 int index = hash & (mc->cache_size - 1);
0822 struct mask_cache_entry *e;
0823
0824 e = &entries[index];
0825 if (e->skb_hash == skb_hash) {
0826 flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
0827 n_cache_hit, &e->mask_index);
0828 if (!flow)
0829 e->skb_hash = 0;
0830 return flow;
0831 }
0832
0833 if (!ce || e->skb_hash < ce->skb_hash)
0834 ce = e;
0835
0836 hash >>= MC_HASH_SHIFT;
0837 }
0838
0839
0840 flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit,
0841 &ce->mask_index);
0842 if (flow)
0843 ce->skb_hash = skb_hash;
0844
0845 *n_cache_hit = 0;
0846 return flow;
0847 }
0848
0849 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
0850 const struct sw_flow_key *key)
0851 {
0852 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
0853 struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
0854 u32 __always_unused n_mask_hit;
0855 u32 __always_unused n_cache_hit;
0856 struct sw_flow *flow;
0857 u32 index = 0;
0858
0859
0860
0861
0862
0863 local_bh_disable();
0864 flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
0865 local_bh_enable();
0866 return flow;
0867 }
0868
0869 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
0870 const struct sw_flow_match *match)
0871 {
0872 struct mask_array *ma = ovsl_dereference(tbl->mask_array);
0873 int i;
0874
0875
0876 for (i = 0; i < ma->max; i++) {
0877 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
0878 u32 __always_unused n_mask_hit;
0879 struct sw_flow_mask *mask;
0880 struct sw_flow *flow;
0881
0882 mask = ovsl_dereference(ma->masks[i]);
0883 if (!mask)
0884 continue;
0885
0886 flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
0887 if (flow && ovs_identifier_is_key(&flow->id) &&
0888 ovs_flow_cmp_unmasked_key(flow, match)) {
0889 return flow;
0890 }
0891 }
0892
0893 return NULL;
0894 }
0895
0896 static u32 ufid_hash(const struct sw_flow_id *sfid)
0897 {
0898 return jhash(sfid->ufid, sfid->ufid_len, 0);
0899 }
0900
0901 static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
0902 const struct sw_flow_id *sfid)
0903 {
0904 if (flow->id.ufid_len != sfid->ufid_len)
0905 return false;
0906
0907 return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
0908 }
0909
0910 bool ovs_flow_cmp(const struct sw_flow *flow,
0911 const struct sw_flow_match *match)
0912 {
0913 if (ovs_identifier_is_ufid(&flow->id))
0914 return flow_cmp_masked_key(flow, match->key, &match->range);
0915
0916 return ovs_flow_cmp_unmasked_key(flow, match);
0917 }
0918
0919 struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
0920 const struct sw_flow_id *ufid)
0921 {
0922 struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
0923 struct sw_flow *flow;
0924 struct hlist_head *head;
0925 u32 hash;
0926
0927 hash = ufid_hash(ufid);
0928 head = find_bucket(ti, hash);
0929 hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
0930 lockdep_ovsl_is_held()) {
0931 if (flow->ufid_table.hash == hash &&
0932 ovs_flow_cmp_ufid(flow, ufid))
0933 return flow;
0934 }
0935 return NULL;
0936 }
0937
0938 int ovs_flow_tbl_num_masks(const struct flow_table *table)
0939 {
0940 struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
0941 return READ_ONCE(ma->count);
0942 }
0943
0944 u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
0945 {
0946 struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
0947
0948 return READ_ONCE(mc->cache_size);
0949 }
0950
0951 static struct table_instance *table_instance_expand(struct table_instance *ti,
0952 bool ufid)
0953 {
0954 return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
0955 }
0956
0957
0958 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
0959 {
0960 struct table_instance *ti = ovsl_dereference(table->ti);
0961 struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
0962
0963 BUG_ON(table->count == 0);
0964 table_instance_flow_free(table, ti, ufid_ti, flow);
0965 }
0966
0967 static struct sw_flow_mask *mask_alloc(void)
0968 {
0969 struct sw_flow_mask *mask;
0970
0971 mask = kmalloc(sizeof(*mask), GFP_KERNEL);
0972 if (mask)
0973 mask->ref_count = 1;
0974
0975 return mask;
0976 }
0977
0978 static bool mask_equal(const struct sw_flow_mask *a,
0979 const struct sw_flow_mask *b)
0980 {
0981 const u8 *a_ = (const u8 *)&a->key + a->range.start;
0982 const u8 *b_ = (const u8 *)&b->key + b->range.start;
0983
0984 return (a->range.end == b->range.end)
0985 && (a->range.start == b->range.start)
0986 && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
0987 }
0988
0989 static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
0990 const struct sw_flow_mask *mask)
0991 {
0992 struct mask_array *ma;
0993 int i;
0994
0995 ma = ovsl_dereference(tbl->mask_array);
0996 for (i = 0; i < ma->max; i++) {
0997 struct sw_flow_mask *t;
0998 t = ovsl_dereference(ma->masks[i]);
0999
1000 if (t && mask_equal(mask, t))
1001 return t;
1002 }
1003
1004 return NULL;
1005 }
1006
1007
1008 static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
1009 const struct sw_flow_mask *new)
1010 {
1011 struct sw_flow_mask *mask;
1012
1013 mask = flow_mask_find(tbl, new);
1014 if (!mask) {
1015
1016 mask = mask_alloc();
1017 if (!mask)
1018 return -ENOMEM;
1019 mask->key = new->key;
1020 mask->range = new->range;
1021
1022
1023 if (tbl_mask_array_add_mask(tbl, mask)) {
1024 kfree(mask);
1025 return -ENOMEM;
1026 }
1027 } else {
1028 BUG_ON(!mask->ref_count);
1029 mask->ref_count++;
1030 }
1031
1032 flow->mask = mask;
1033 return 0;
1034 }
1035
1036
1037 static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
1038 {
1039 struct table_instance *new_ti = NULL;
1040 struct table_instance *ti;
1041
1042 flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
1043 ti = ovsl_dereference(table->ti);
1044 table_instance_insert(ti, flow);
1045 table->count++;
1046
1047
1048 if (table->count > ti->n_buckets)
1049 new_ti = table_instance_expand(ti, false);
1050 else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
1051 new_ti = table_instance_rehash(ti, ti->n_buckets, false);
1052
1053 if (new_ti) {
1054 rcu_assign_pointer(table->ti, new_ti);
1055 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
1056 table->last_rehash = jiffies;
1057 }
1058 }
1059
1060
1061 static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
1062 {
1063 struct table_instance *ti;
1064
1065 flow->ufid_table.hash = ufid_hash(&flow->id);
1066 ti = ovsl_dereference(table->ufid_ti);
1067 ufid_table_instance_insert(ti, flow);
1068 table->ufid_count++;
1069
1070
1071 if (table->ufid_count > ti->n_buckets) {
1072 struct table_instance *new_ti;
1073
1074 new_ti = table_instance_expand(ti, true);
1075 if (new_ti) {
1076 rcu_assign_pointer(table->ufid_ti, new_ti);
1077 call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
1078 }
1079 }
1080 }
1081
1082
1083 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
1084 const struct sw_flow_mask *mask)
1085 {
1086 int err;
1087
1088 err = flow_mask_insert(table, flow, mask);
1089 if (err)
1090 return err;
1091 flow_key_insert(table, flow);
1092 if (ovs_identifier_is_ufid(&flow->id))
1093 flow_ufid_insert(table, flow);
1094
1095 return 0;
1096 }
1097
1098 static int compare_mask_and_count(const void *a, const void *b)
1099 {
1100 const struct mask_count *mc_a = a;
1101 const struct mask_count *mc_b = b;
1102
1103 return (s64)mc_b->counter - (s64)mc_a->counter;
1104 }
1105
1106
1107 void ovs_flow_masks_rebalance(struct flow_table *table)
1108 {
1109 struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
1110 struct mask_count *masks_and_count;
1111 struct mask_array *new;
1112 int masks_entries = 0;
1113 int i;
1114
1115
1116 masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count),
1117 GFP_KERNEL);
1118 if (!masks_and_count)
1119 return;
1120
1121 for (i = 0; i < ma->max; i++) {
1122 struct sw_flow_mask *mask;
1123 int cpu;
1124
1125 mask = rcu_dereference_ovsl(ma->masks[i]);
1126 if (unlikely(!mask))
1127 break;
1128
1129 masks_and_count[i].index = i;
1130 masks_and_count[i].counter = 0;
1131
1132 for_each_possible_cpu(cpu) {
1133 struct mask_array_stats *stats;
1134 unsigned int start;
1135 u64 counter;
1136
1137 stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
1138 do {
1139 start = u64_stats_fetch_begin_irq(&stats->syncp);
1140 counter = stats->usage_cntrs[i];
1141 } while (u64_stats_fetch_retry_irq(&stats->syncp,
1142 start));
1143
1144 masks_and_count[i].counter += counter;
1145 }
1146
1147
1148 masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i];
1149
1150
1151
1152
1153 ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter;
1154 }
1155
1156 if (i == 0)
1157 goto free_mask_entries;
1158
1159
1160 masks_entries = i;
1161 sort(masks_and_count, masks_entries, sizeof(*masks_and_count),
1162 compare_mask_and_count, NULL);
1163
1164
1165 for (i = 0; i < masks_entries; i++) {
1166 if (i != masks_and_count[i].index)
1167 break;
1168 }
1169 if (i == masks_entries)
1170 goto free_mask_entries;
1171
1172
1173 new = tbl_mask_array_alloc(ma->max);
1174 if (!new)
1175 goto free_mask_entries;
1176
1177 for (i = 0; i < masks_entries; i++) {
1178 int index = masks_and_count[i].index;
1179
1180 if (ovsl_dereference(ma->masks[index]))
1181 new->masks[new->count++] = ma->masks[index];
1182 }
1183
1184 rcu_assign_pointer(table->mask_array, new);
1185 call_rcu(&ma->rcu, mask_array_rcu_cb);
1186
1187 free_mask_entries:
1188 kfree(masks_and_count);
1189 }
1190
1191
1192
1193 int ovs_flow_init(void)
1194 {
1195 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
1196 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
1197
1198 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
1199 + (nr_cpu_ids
1200 * sizeof(struct sw_flow_stats *)),
1201 0, 0, NULL);
1202 if (flow_cache == NULL)
1203 return -ENOMEM;
1204
1205 flow_stats_cache
1206 = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
1207 0, SLAB_HWCACHE_ALIGN, NULL);
1208 if (flow_stats_cache == NULL) {
1209 kmem_cache_destroy(flow_cache);
1210 flow_cache = NULL;
1211 return -ENOMEM;
1212 }
1213
1214 return 0;
1215 }
1216
1217
1218 void ovs_flow_exit(void)
1219 {
1220 kmem_cache_destroy(flow_stats_cache);
1221 kmem_cache_destroy(flow_cache);
1222 }