0001
0002
0003
0004
0005
0006 #include <linux/topology.h>
0007 #include <linux/cpumask.h>
0008 #include <linux/interrupt.h>
0009 #include <linux/numa.h>
0010
0011 #include "hfi.h"
0012 #include "affinity.h"
0013 #include "sdma.h"
0014 #include "trace.h"
0015
0016 struct hfi1_affinity_node_list node_affinity = {
0017 .list = LIST_HEAD_INIT(node_affinity.list),
0018 .lock = __MUTEX_INITIALIZER(node_affinity.lock)
0019 };
0020
0021
0022 static const char * const irq_type_names[] = {
0023 "SDMA",
0024 "RCVCTXT",
0025 "NETDEVCTXT",
0026 "GENERAL",
0027 "OTHER",
0028 };
0029
0030
0031 static unsigned int *hfi1_per_node_cntr;
0032
0033 static inline void init_cpu_mask_set(struct cpu_mask_set *set)
0034 {
0035 cpumask_clear(&set->mask);
0036 cpumask_clear(&set->used);
0037 set->gen = 0;
0038 }
0039
0040
0041 static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
0042 {
0043 if (cpumask_equal(&set->mask, &set->used)) {
0044
0045
0046
0047
0048 set->gen++;
0049 cpumask_clear(&set->used);
0050 }
0051 }
0052
0053 static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
0054 {
0055 if (cpumask_empty(&set->used) && set->gen) {
0056 set->gen--;
0057 cpumask_copy(&set->used, &set->mask);
0058 }
0059 }
0060
0061
0062 static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
0063 {
0064 int cpu;
0065
0066 if (!diff || !set)
0067 return -EINVAL;
0068
0069 _cpu_mask_set_gen_inc(set);
0070
0071
0072 cpumask_andnot(diff, &set->mask, &set->used);
0073
0074 cpu = cpumask_first(diff);
0075 if (cpu >= nr_cpu_ids)
0076 cpu = -EINVAL;
0077 else
0078 cpumask_set_cpu(cpu, &set->used);
0079
0080 return cpu;
0081 }
0082
0083 static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
0084 {
0085 if (!set)
0086 return;
0087
0088 cpumask_clear_cpu(cpu, &set->used);
0089 _cpu_mask_set_gen_dec(set);
0090 }
0091
0092
0093 void init_real_cpu_mask(void)
0094 {
0095 int possible, curr_cpu, i, ht;
0096
0097 cpumask_clear(&node_affinity.real_cpu_mask);
0098
0099
0100 cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
0101
0102
0103
0104
0105 possible = cpumask_weight(&node_affinity.real_cpu_mask);
0106 ht = cpumask_weight(topology_sibling_cpumask(
0107 cpumask_first(&node_affinity.real_cpu_mask)));
0108
0109
0110
0111
0112
0113 curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
0114 for (i = 0; i < possible / ht; i++)
0115 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
0116
0117
0118
0119
0120 for (; i < possible; i++) {
0121 cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
0122 curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
0123 }
0124 }
0125
0126 int node_affinity_init(void)
0127 {
0128 int node;
0129 struct pci_dev *dev = NULL;
0130 const struct pci_device_id *ids = hfi1_pci_tbl;
0131
0132 cpumask_clear(&node_affinity.proc.used);
0133 cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
0134
0135 node_affinity.proc.gen = 0;
0136 node_affinity.num_core_siblings =
0137 cpumask_weight(topology_sibling_cpumask(
0138 cpumask_first(&node_affinity.proc.mask)
0139 ));
0140 node_affinity.num_possible_nodes = num_possible_nodes();
0141 node_affinity.num_online_nodes = num_online_nodes();
0142 node_affinity.num_online_cpus = num_online_cpus();
0143
0144
0145
0146
0147
0148
0149 init_real_cpu_mask();
0150
0151 hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
0152 sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
0153 if (!hfi1_per_node_cntr)
0154 return -ENOMEM;
0155
0156 while (ids->vendor) {
0157 dev = NULL;
0158 while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
0159 node = pcibus_to_node(dev->bus);
0160 if (node < 0)
0161 goto out;
0162
0163 hfi1_per_node_cntr[node]++;
0164 }
0165 ids++;
0166 }
0167
0168 return 0;
0169
0170 out:
0171
0172
0173
0174
0175 pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
0176 pr_err("HFI: System BIOS may need to be upgraded\n");
0177 for (node = 0; node < node_affinity.num_possible_nodes; node++)
0178 hfi1_per_node_cntr[node] = 1;
0179
0180 return 0;
0181 }
0182
0183 static void node_affinity_destroy(struct hfi1_affinity_node *entry)
0184 {
0185 free_percpu(entry->comp_vect_affinity);
0186 kfree(entry);
0187 }
0188
0189 void node_affinity_destroy_all(void)
0190 {
0191 struct list_head *pos, *q;
0192 struct hfi1_affinity_node *entry;
0193
0194 mutex_lock(&node_affinity.lock);
0195 list_for_each_safe(pos, q, &node_affinity.list) {
0196 entry = list_entry(pos, struct hfi1_affinity_node,
0197 list);
0198 list_del(pos);
0199 node_affinity_destroy(entry);
0200 }
0201 mutex_unlock(&node_affinity.lock);
0202 kfree(hfi1_per_node_cntr);
0203 }
0204
0205 static struct hfi1_affinity_node *node_affinity_allocate(int node)
0206 {
0207 struct hfi1_affinity_node *entry;
0208
0209 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
0210 if (!entry)
0211 return NULL;
0212 entry->node = node;
0213 entry->comp_vect_affinity = alloc_percpu(u16);
0214 INIT_LIST_HEAD(&entry->list);
0215
0216 return entry;
0217 }
0218
0219
0220
0221
0222
0223 static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
0224 {
0225 list_add_tail(&entry->list, &node_affinity.list);
0226 }
0227
0228
0229 static struct hfi1_affinity_node *node_affinity_lookup(int node)
0230 {
0231 struct list_head *pos;
0232 struct hfi1_affinity_node *entry;
0233
0234 list_for_each(pos, &node_affinity.list) {
0235 entry = list_entry(pos, struct hfi1_affinity_node, list);
0236 if (entry->node == node)
0237 return entry;
0238 }
0239
0240 return NULL;
0241 }
0242
0243 static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
0244 u16 __percpu *comp_vect_affinity)
0245 {
0246 int curr_cpu;
0247 u16 cntr;
0248 u16 prev_cntr;
0249 int ret_cpu;
0250
0251 if (!possible_cpumask) {
0252 ret_cpu = -EINVAL;
0253 goto fail;
0254 }
0255
0256 if (!comp_vect_affinity) {
0257 ret_cpu = -EINVAL;
0258 goto fail;
0259 }
0260
0261 ret_cpu = cpumask_first(possible_cpumask);
0262 if (ret_cpu >= nr_cpu_ids) {
0263 ret_cpu = -EINVAL;
0264 goto fail;
0265 }
0266
0267 prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
0268 for_each_cpu(curr_cpu, possible_cpumask) {
0269 cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
0270
0271 if (cntr < prev_cntr) {
0272 ret_cpu = curr_cpu;
0273 prev_cntr = cntr;
0274 }
0275 }
0276
0277 *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
0278
0279 fail:
0280 return ret_cpu;
0281 }
0282
0283 static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
0284 u16 __percpu *comp_vect_affinity)
0285 {
0286 int curr_cpu;
0287 int max_cpu;
0288 u16 cntr;
0289 u16 prev_cntr;
0290
0291 if (!possible_cpumask)
0292 return -EINVAL;
0293
0294 if (!comp_vect_affinity)
0295 return -EINVAL;
0296
0297 max_cpu = cpumask_first(possible_cpumask);
0298 if (max_cpu >= nr_cpu_ids)
0299 return -EINVAL;
0300
0301 prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
0302 for_each_cpu(curr_cpu, possible_cpumask) {
0303 cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
0304
0305 if (cntr > prev_cntr) {
0306 max_cpu = curr_cpu;
0307 prev_cntr = cntr;
0308 }
0309 }
0310
0311 *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
0312
0313 return max_cpu;
0314 }
0315
0316
0317
0318
0319
0320 static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
0321 struct hfi1_affinity_node *entry,
0322 cpumask_var_t non_intr_cpus,
0323 cpumask_var_t available_cpus)
0324 __must_hold(&node_affinity.lock)
0325 {
0326 int cpu;
0327 struct cpu_mask_set *set = dd->comp_vect;
0328
0329 lockdep_assert_held(&node_affinity.lock);
0330 if (!non_intr_cpus) {
0331 cpu = -1;
0332 goto fail;
0333 }
0334
0335 if (!available_cpus) {
0336 cpu = -1;
0337 goto fail;
0338 }
0339
0340
0341 _cpu_mask_set_gen_inc(set);
0342 cpumask_andnot(available_cpus, &set->mask, &set->used);
0343
0344
0345 cpumask_andnot(non_intr_cpus, available_cpus,
0346 &entry->def_intr.used);
0347
0348
0349 if (!cpumask_empty(non_intr_cpus))
0350 cpu = cpumask_first(non_intr_cpus);
0351 else
0352 cpu = cpumask_first(available_cpus);
0353
0354 if (cpu >= nr_cpu_ids) {
0355 cpu = -1;
0356 goto fail;
0357 }
0358 cpumask_set_cpu(cpu, &set->used);
0359
0360 fail:
0361 return cpu;
0362 }
0363
0364 static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
0365 {
0366 struct cpu_mask_set *set = dd->comp_vect;
0367
0368 if (cpu < 0)
0369 return;
0370
0371 cpu_mask_set_put(set, cpu);
0372 }
0373
0374
0375 static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
0376 {
0377 int i, cpu;
0378
0379 if (!dd->comp_vect_mappings)
0380 return;
0381
0382 for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
0383 cpu = dd->comp_vect_mappings[i];
0384 _dev_comp_vect_cpu_put(dd, cpu);
0385 dd->comp_vect_mappings[i] = -1;
0386 hfi1_cdbg(AFFINITY,
0387 "[%s] Release CPU %d from completion vector %d",
0388 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
0389 }
0390
0391 kfree(dd->comp_vect_mappings);
0392 dd->comp_vect_mappings = NULL;
0393 }
0394
0395
0396
0397
0398
0399 static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
0400 struct hfi1_affinity_node *entry)
0401 __must_hold(&node_affinity.lock)
0402 {
0403 int i, cpu, ret;
0404 cpumask_var_t non_intr_cpus;
0405 cpumask_var_t available_cpus;
0406
0407 lockdep_assert_held(&node_affinity.lock);
0408
0409 if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
0410 return -ENOMEM;
0411
0412 if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
0413 free_cpumask_var(non_intr_cpus);
0414 return -ENOMEM;
0415 }
0416
0417 dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
0418 sizeof(*dd->comp_vect_mappings),
0419 GFP_KERNEL);
0420 if (!dd->comp_vect_mappings) {
0421 ret = -ENOMEM;
0422 goto fail;
0423 }
0424 for (i = 0; i < dd->comp_vect_possible_cpus; i++)
0425 dd->comp_vect_mappings[i] = -1;
0426
0427 for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
0428 cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
0429 available_cpus);
0430 if (cpu < 0) {
0431 ret = -EINVAL;
0432 goto fail;
0433 }
0434
0435 dd->comp_vect_mappings[i] = cpu;
0436 hfi1_cdbg(AFFINITY,
0437 "[%s] Completion Vector %d -> CPU %d",
0438 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
0439 }
0440
0441 free_cpumask_var(available_cpus);
0442 free_cpumask_var(non_intr_cpus);
0443 return 0;
0444
0445 fail:
0446 free_cpumask_var(available_cpus);
0447 free_cpumask_var(non_intr_cpus);
0448 _dev_comp_vect_mappings_destroy(dd);
0449
0450 return ret;
0451 }
0452
0453 int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
0454 {
0455 int ret;
0456 struct hfi1_affinity_node *entry;
0457
0458 mutex_lock(&node_affinity.lock);
0459 entry = node_affinity_lookup(dd->node);
0460 if (!entry) {
0461 ret = -EINVAL;
0462 goto unlock;
0463 }
0464 ret = _dev_comp_vect_mappings_create(dd, entry);
0465 unlock:
0466 mutex_unlock(&node_affinity.lock);
0467
0468 return ret;
0469 }
0470
0471 void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
0472 {
0473 _dev_comp_vect_mappings_destroy(dd);
0474 }
0475
0476 int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
0477 {
0478 struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
0479 struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
0480
0481 if (!dd->comp_vect_mappings)
0482 return -EINVAL;
0483 if (comp_vect >= dd->comp_vect_possible_cpus)
0484 return -EINVAL;
0485
0486 return dd->comp_vect_mappings[comp_vect];
0487 }
0488
0489
0490
0491
0492 static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
0493 struct hfi1_affinity_node *entry,
0494 bool first_dev_init)
0495 __must_hold(&node_affinity.lock)
0496 {
0497 int i, j, curr_cpu;
0498 int possible_cpus_comp_vect = 0;
0499 struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
0500
0501 lockdep_assert_held(&node_affinity.lock);
0502
0503
0504
0505
0506
0507
0508
0509 if (cpumask_weight(&entry->comp_vect_mask) == 1) {
0510 possible_cpus_comp_vect = 1;
0511 dd_dev_warn(dd,
0512 "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
0513 } else {
0514 possible_cpus_comp_vect +=
0515 cpumask_weight(&entry->comp_vect_mask) /
0516 hfi1_per_node_cntr[dd->node];
0517
0518
0519
0520
0521
0522
0523 if (first_dev_init &&
0524 cpumask_weight(&entry->comp_vect_mask) %
0525 hfi1_per_node_cntr[dd->node] != 0)
0526 possible_cpus_comp_vect++;
0527 }
0528
0529 dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
0530
0531
0532 for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
0533 curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
0534 entry->comp_vect_affinity);
0535 if (curr_cpu < 0)
0536 goto fail;
0537
0538 cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
0539 }
0540
0541 hfi1_cdbg(AFFINITY,
0542 "[%s] Completion vector affinity CPU set(s) %*pbl",
0543 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
0544 cpumask_pr_args(dev_comp_vect_mask));
0545
0546 return 0;
0547
0548 fail:
0549 for (j = 0; j < i; j++)
0550 per_cpu_affinity_put_max(&entry->comp_vect_mask,
0551 entry->comp_vect_affinity);
0552
0553 return curr_cpu;
0554 }
0555
0556
0557
0558
0559 static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
0560 struct hfi1_affinity_node *entry)
0561 __must_hold(&node_affinity.lock)
0562 {
0563 int i, cpu;
0564
0565 lockdep_assert_held(&node_affinity.lock);
0566 if (!dd->comp_vect_possible_cpus)
0567 return;
0568
0569 for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
0570 cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
0571 entry->comp_vect_affinity);
0572
0573 if (cpu >= 0)
0574 cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
0575 }
0576
0577 dd->comp_vect_possible_cpus = 0;
0578 }
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591 int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
0592 {
0593 struct hfi1_affinity_node *entry;
0594 const struct cpumask *local_mask;
0595 int curr_cpu, possible, i, ret;
0596 bool new_entry = false;
0597
0598 local_mask = cpumask_of_node(dd->node);
0599 if (cpumask_first(local_mask) >= nr_cpu_ids)
0600 local_mask = topology_core_cpumask(0);
0601
0602 mutex_lock(&node_affinity.lock);
0603 entry = node_affinity_lookup(dd->node);
0604
0605
0606
0607
0608
0609 if (!entry) {
0610 entry = node_affinity_allocate(dd->node);
0611 if (!entry) {
0612 dd_dev_err(dd,
0613 "Unable to allocate global affinity node\n");
0614 ret = -ENOMEM;
0615 goto fail;
0616 }
0617 new_entry = true;
0618
0619 init_cpu_mask_set(&entry->def_intr);
0620 init_cpu_mask_set(&entry->rcv_intr);
0621 cpumask_clear(&entry->comp_vect_mask);
0622 cpumask_clear(&entry->general_intr_mask);
0623
0624 cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
0625 local_mask);
0626
0627
0628 possible = cpumask_weight(&entry->def_intr.mask);
0629 curr_cpu = cpumask_first(&entry->def_intr.mask);
0630
0631 if (possible == 1) {
0632
0633 cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
0634 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
0635 } else {
0636
0637
0638
0639
0640
0641 cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
0642 cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
0643 curr_cpu = cpumask_next(curr_cpu,
0644 &entry->def_intr.mask);
0645
0646
0647
0648
0649
0650 for (i = 0;
0651 i < (dd->n_krcv_queues - 1) *
0652 hfi1_per_node_cntr[dd->node];
0653 i++) {
0654 cpumask_clear_cpu(curr_cpu,
0655 &entry->def_intr.mask);
0656 cpumask_set_cpu(curr_cpu,
0657 &entry->rcv_intr.mask);
0658 curr_cpu = cpumask_next(curr_cpu,
0659 &entry->def_intr.mask);
0660 if (curr_cpu >= nr_cpu_ids)
0661 break;
0662 }
0663
0664
0665
0666
0667
0668
0669 if (cpumask_empty(&entry->def_intr.mask))
0670 cpumask_copy(&entry->def_intr.mask,
0671 &entry->general_intr_mask);
0672 }
0673
0674
0675 cpumask_and(&entry->comp_vect_mask,
0676 &node_affinity.real_cpu_mask, local_mask);
0677 cpumask_andnot(&entry->comp_vect_mask,
0678 &entry->comp_vect_mask,
0679 &entry->rcv_intr.mask);
0680 cpumask_andnot(&entry->comp_vect_mask,
0681 &entry->comp_vect_mask,
0682 &entry->general_intr_mask);
0683
0684
0685
0686
0687
0688
0689 if (cpumask_empty(&entry->comp_vect_mask))
0690 cpumask_copy(&entry->comp_vect_mask,
0691 &entry->general_intr_mask);
0692 }
0693
0694 ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
0695 if (ret < 0)
0696 goto fail;
0697
0698 if (new_entry)
0699 node_affinity_add_tail(entry);
0700
0701 dd->affinity_entry = entry;
0702 mutex_unlock(&node_affinity.lock);
0703
0704 return 0;
0705
0706 fail:
0707 if (new_entry)
0708 node_affinity_destroy(entry);
0709 mutex_unlock(&node_affinity.lock);
0710 return ret;
0711 }
0712
0713 void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
0714 {
0715 struct hfi1_affinity_node *entry;
0716
0717 mutex_lock(&node_affinity.lock);
0718 if (!dd->affinity_entry)
0719 goto unlock;
0720 entry = node_affinity_lookup(dd->node);
0721 if (!entry)
0722 goto unlock;
0723
0724
0725
0726
0727
0728 _dev_comp_vect_cpu_mask_clean_up(dd, entry);
0729 unlock:
0730 dd->affinity_entry = NULL;
0731 mutex_unlock(&node_affinity.lock);
0732 }
0733
0734
0735
0736
0737
0738
0739 static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
0740 {
0741 struct sdma_engine *sde = msix->arg;
0742 struct hfi1_devdata *dd = sde->dd;
0743 struct hfi1_affinity_node *entry;
0744 struct cpu_mask_set *set;
0745 int i, old_cpu;
0746
0747 if (cpu > num_online_cpus() || cpu == sde->cpu)
0748 return;
0749
0750 mutex_lock(&node_affinity.lock);
0751 entry = node_affinity_lookup(dd->node);
0752 if (!entry)
0753 goto unlock;
0754
0755 old_cpu = sde->cpu;
0756 sde->cpu = cpu;
0757 cpumask_clear(&msix->mask);
0758 cpumask_set_cpu(cpu, &msix->mask);
0759 dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
0760 msix->irq, irq_type_names[msix->type],
0761 sde->this_idx, cpu);
0762 irq_set_affinity_hint(msix->irq, &msix->mask);
0763
0764
0765
0766
0767
0768 set = &entry->def_intr;
0769 cpumask_set_cpu(cpu, &set->mask);
0770 cpumask_set_cpu(cpu, &set->used);
0771 for (i = 0; i < dd->msix_info.max_requested; i++) {
0772 struct hfi1_msix_entry *other_msix;
0773
0774 other_msix = &dd->msix_info.msix_entries[i];
0775 if (other_msix->type != IRQ_SDMA || other_msix == msix)
0776 continue;
0777
0778 if (cpumask_test_cpu(old_cpu, &other_msix->mask))
0779 goto unlock;
0780 }
0781 cpumask_clear_cpu(old_cpu, &set->mask);
0782 cpumask_clear_cpu(old_cpu, &set->used);
0783 unlock:
0784 mutex_unlock(&node_affinity.lock);
0785 }
0786
0787 static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
0788 const cpumask_t *mask)
0789 {
0790 int cpu = cpumask_first(mask);
0791 struct hfi1_msix_entry *msix = container_of(notify,
0792 struct hfi1_msix_entry,
0793 notify);
0794
0795
0796 hfi1_update_sdma_affinity(msix, cpu);
0797 }
0798
0799 static void hfi1_irq_notifier_release(struct kref *ref)
0800 {
0801
0802
0803
0804
0805 }
0806
0807 static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
0808 {
0809 struct irq_affinity_notify *notify = &msix->notify;
0810
0811 notify->irq = msix->irq;
0812 notify->notify = hfi1_irq_notifier_notify;
0813 notify->release = hfi1_irq_notifier_release;
0814
0815 if (irq_set_affinity_notifier(notify->irq, notify))
0816 pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
0817 notify->irq);
0818 }
0819
0820 static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
0821 {
0822 struct irq_affinity_notify *notify = &msix->notify;
0823
0824 if (irq_set_affinity_notifier(notify->irq, NULL))
0825 pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
0826 notify->irq);
0827 }
0828
0829
0830
0831
0832
0833 static int get_irq_affinity(struct hfi1_devdata *dd,
0834 struct hfi1_msix_entry *msix)
0835 {
0836 cpumask_var_t diff;
0837 struct hfi1_affinity_node *entry;
0838 struct cpu_mask_set *set = NULL;
0839 struct sdma_engine *sde = NULL;
0840 struct hfi1_ctxtdata *rcd = NULL;
0841 char extra[64];
0842 int cpu = -1;
0843
0844 extra[0] = '\0';
0845 cpumask_clear(&msix->mask);
0846
0847 entry = node_affinity_lookup(dd->node);
0848
0849 switch (msix->type) {
0850 case IRQ_SDMA:
0851 sde = (struct sdma_engine *)msix->arg;
0852 scnprintf(extra, 64, "engine %u", sde->this_idx);
0853 set = &entry->def_intr;
0854 break;
0855 case IRQ_GENERAL:
0856 cpu = cpumask_first(&entry->general_intr_mask);
0857 break;
0858 case IRQ_RCVCTXT:
0859 rcd = (struct hfi1_ctxtdata *)msix->arg;
0860 if (rcd->ctxt == HFI1_CTRL_CTXT)
0861 cpu = cpumask_first(&entry->general_intr_mask);
0862 else
0863 set = &entry->rcv_intr;
0864 scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
0865 break;
0866 case IRQ_NETDEVCTXT:
0867 rcd = (struct hfi1_ctxtdata *)msix->arg;
0868 set = &entry->def_intr;
0869 scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
0870 break;
0871 default:
0872 dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
0873 return -EINVAL;
0874 }
0875
0876
0877
0878
0879
0880
0881 if (cpu == -1 && set) {
0882 if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
0883 return -ENOMEM;
0884
0885 cpu = cpu_mask_set_get_first(set, diff);
0886 if (cpu < 0) {
0887 free_cpumask_var(diff);
0888 dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
0889 return cpu;
0890 }
0891
0892 free_cpumask_var(diff);
0893 }
0894
0895 cpumask_set_cpu(cpu, &msix->mask);
0896 dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
0897 msix->irq, irq_type_names[msix->type],
0898 extra, cpu);
0899 irq_set_affinity_hint(msix->irq, &msix->mask);
0900
0901 if (msix->type == IRQ_SDMA) {
0902 sde->cpu = cpu;
0903 hfi1_setup_sdma_notifier(msix);
0904 }
0905
0906 return 0;
0907 }
0908
0909 int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
0910 {
0911 int ret;
0912
0913 mutex_lock(&node_affinity.lock);
0914 ret = get_irq_affinity(dd, msix);
0915 mutex_unlock(&node_affinity.lock);
0916 return ret;
0917 }
0918
0919 void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
0920 struct hfi1_msix_entry *msix)
0921 {
0922 struct cpu_mask_set *set = NULL;
0923 struct hfi1_affinity_node *entry;
0924
0925 mutex_lock(&node_affinity.lock);
0926 entry = node_affinity_lookup(dd->node);
0927
0928 switch (msix->type) {
0929 case IRQ_SDMA:
0930 set = &entry->def_intr;
0931 hfi1_cleanup_sdma_notifier(msix);
0932 break;
0933 case IRQ_GENERAL:
0934
0935 break;
0936 case IRQ_RCVCTXT: {
0937 struct hfi1_ctxtdata *rcd = msix->arg;
0938
0939
0940 if (rcd->ctxt != HFI1_CTRL_CTXT)
0941 set = &entry->rcv_intr;
0942 break;
0943 }
0944 case IRQ_NETDEVCTXT:
0945 set = &entry->def_intr;
0946 break;
0947 default:
0948 mutex_unlock(&node_affinity.lock);
0949 return;
0950 }
0951
0952 if (set) {
0953 cpumask_andnot(&set->used, &set->used, &msix->mask);
0954 _cpu_mask_set_gen_dec(set);
0955 }
0956
0957 irq_set_affinity_hint(msix->irq, NULL);
0958 cpumask_clear(&msix->mask);
0959 mutex_unlock(&node_affinity.lock);
0960 }
0961
0962
0963 static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
0964 struct hfi1_affinity_node_list *affinity)
0965 {
0966 int possible, curr_cpu, i;
0967 uint num_cores_per_socket = node_affinity.num_online_cpus /
0968 affinity->num_core_siblings /
0969 node_affinity.num_online_nodes;
0970
0971 cpumask_copy(hw_thread_mask, &affinity->proc.mask);
0972 if (affinity->num_core_siblings > 0) {
0973
0974 possible = cpumask_weight(hw_thread_mask);
0975 curr_cpu = cpumask_first(hw_thread_mask);
0976 for (i = 0;
0977 i < num_cores_per_socket * node_affinity.num_online_nodes;
0978 i++)
0979 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
0980
0981 for (; i < possible; i++) {
0982 cpumask_clear_cpu(curr_cpu, hw_thread_mask);
0983 curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
0984 }
0985
0986
0987 cpumask_shift_left(hw_thread_mask, hw_thread_mask,
0988 num_cores_per_socket *
0989 node_affinity.num_online_nodes *
0990 hw_thread_no);
0991 }
0992 }
0993
0994 int hfi1_get_proc_affinity(int node)
0995 {
0996 int cpu = -1, ret, i;
0997 struct hfi1_affinity_node *entry;
0998 cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
0999 const struct cpumask *node_mask,
1000 *proc_mask = current->cpus_ptr;
1001 struct hfi1_affinity_node_list *affinity = &node_affinity;
1002 struct cpu_mask_set *set = &affinity->proc;
1003
1004
1005
1006
1007
1008 if (current->nr_cpus_allowed == 1) {
1009 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
1010 current->pid, current->comm,
1011 cpumask_pr_args(proc_mask));
1012
1013
1014
1015
1016 cpu = cpumask_first(proc_mask);
1017 cpumask_set_cpu(cpu, &set->used);
1018 goto done;
1019 } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
1020 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
1021 current->pid, current->comm,
1022 cpumask_pr_args(proc_mask));
1023 goto done;
1024 }
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
1048 if (!ret)
1049 goto done;
1050 ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
1051 if (!ret)
1052 goto free_diff;
1053 ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
1054 if (!ret)
1055 goto free_hw_thread_mask;
1056 ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
1057 if (!ret)
1058 goto free_available_mask;
1059
1060 mutex_lock(&affinity->lock);
1061
1062
1063
1064
1065 _cpu_mask_set_gen_inc(set);
1066
1067
1068
1069
1070
1071 entry = node_affinity_lookup(node);
1072 if (entry) {
1073 cpumask_copy(intrs_mask, (entry->def_intr.gen ?
1074 &entry->def_intr.mask :
1075 &entry->def_intr.used));
1076 cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
1077 &entry->rcv_intr.mask :
1078 &entry->rcv_intr.used));
1079 cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
1080 }
1081 hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
1082 cpumask_pr_args(intrs_mask));
1083
1084 cpumask_copy(hw_thread_mask, &set->mask);
1085
1086
1087
1088
1089
1090 if (affinity->num_core_siblings > 0) {
1091 for (i = 0; i < affinity->num_core_siblings; i++) {
1092 find_hw_thread_mask(i, hw_thread_mask, affinity);
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102 cpumask_andnot(diff, hw_thread_mask, &set->used);
1103 if (!cpumask_empty(diff))
1104 break;
1105 }
1106 }
1107 hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
1108 cpumask_pr_args(hw_thread_mask));
1109
1110 node_mask = cpumask_of_node(node);
1111 hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
1112 cpumask_pr_args(node_mask));
1113
1114
1115 cpumask_and(available_mask, hw_thread_mask, node_mask);
1116 cpumask_andnot(available_mask, available_mask, &set->used);
1117 hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
1118 cpumask_pr_args(available_mask));
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 cpumask_andnot(diff, available_mask, intrs_mask);
1137 if (!cpumask_empty(diff))
1138 cpumask_copy(available_mask, diff);
1139
1140
1141 if (cpumask_empty(available_mask)) {
1142 cpumask_andnot(available_mask, hw_thread_mask, &set->used);
1143
1144 cpumask_andnot(available_mask, available_mask, node_mask);
1145 hfi1_cdbg(PROC,
1146 "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
1147 cpumask_pr_args(available_mask));
1148
1149
1150
1151
1152
1153 cpumask_andnot(diff, available_mask, intrs_mask);
1154 if (!cpumask_empty(diff))
1155 cpumask_copy(available_mask, diff);
1156 }
1157 hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
1158 cpumask_pr_args(available_mask));
1159
1160 cpu = cpumask_first(available_mask);
1161 if (cpu >= nr_cpu_ids)
1162 cpu = -1;
1163 else
1164 cpumask_set_cpu(cpu, &set->used);
1165
1166 mutex_unlock(&affinity->lock);
1167 hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
1168
1169 free_cpumask_var(intrs_mask);
1170 free_available_mask:
1171 free_cpumask_var(available_mask);
1172 free_hw_thread_mask:
1173 free_cpumask_var(hw_thread_mask);
1174 free_diff:
1175 free_cpumask_var(diff);
1176 done:
1177 return cpu;
1178 }
1179
1180 void hfi1_put_proc_affinity(int cpu)
1181 {
1182 struct hfi1_affinity_node_list *affinity = &node_affinity;
1183 struct cpu_mask_set *set = &affinity->proc;
1184
1185 if (cpu < 0)
1186 return;
1187
1188 mutex_lock(&affinity->lock);
1189 cpu_mask_set_put(set, cpu);
1190 hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
1191 mutex_unlock(&affinity->lock);
1192 }