Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 #include <linux/module.h>
0003 
0004 #include <asm/cpu_device_id.h>
0005 #include <asm/intel-family.h>
0006 #include "uncore.h"
0007 #include "uncore_discovery.h"
0008 
0009 static bool uncore_no_discover;
0010 module_param(uncore_no_discover, bool, 0);
0011 MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
0012                      "(default: enable the discovery mechanism).");
0013 struct intel_uncore_type *empty_uncore[] = { NULL, };
0014 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
0015 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
0016 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
0017 
0018 static bool pcidrv_registered;
0019 struct pci_driver *uncore_pci_driver;
0020 /* The PCI driver for the device which the uncore doesn't own. */
0021 struct pci_driver *uncore_pci_sub_driver;
0022 /* pci bus to socket mapping */
0023 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
0024 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
0025 struct pci_extra_dev *uncore_extra_pci_dev;
0026 int __uncore_max_dies;
0027 
0028 /* mask of cpus that collect uncore events */
0029 static cpumask_t uncore_cpu_mask;
0030 
0031 /* constraint for the fixed counter */
0032 static struct event_constraint uncore_constraint_fixed =
0033     EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
0034 struct event_constraint uncore_constraint_empty =
0035     EVENT_CONSTRAINT(0, 0, 0);
0036 
0037 MODULE_LICENSE("GPL");
0038 
0039 int uncore_pcibus_to_dieid(struct pci_bus *bus)
0040 {
0041     struct pci2phy_map *map;
0042     int die_id = -1;
0043 
0044     raw_spin_lock(&pci2phy_map_lock);
0045     list_for_each_entry(map, &pci2phy_map_head, list) {
0046         if (map->segment == pci_domain_nr(bus)) {
0047             die_id = map->pbus_to_dieid[bus->number];
0048             break;
0049         }
0050     }
0051     raw_spin_unlock(&pci2phy_map_lock);
0052 
0053     return die_id;
0054 }
0055 
0056 int uncore_die_to_segment(int die)
0057 {
0058     struct pci_bus *bus = NULL;
0059 
0060     /* Find first pci bus which attributes to specified die. */
0061     while ((bus = pci_find_next_bus(bus)) &&
0062            (die != uncore_pcibus_to_dieid(bus)))
0063         ;
0064 
0065     return bus ? pci_domain_nr(bus) : -EINVAL;
0066 }
0067 
0068 static void uncore_free_pcibus_map(void)
0069 {
0070     struct pci2phy_map *map, *tmp;
0071 
0072     list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
0073         list_del(&map->list);
0074         kfree(map);
0075     }
0076 }
0077 
0078 struct pci2phy_map *__find_pci2phy_map(int segment)
0079 {
0080     struct pci2phy_map *map, *alloc = NULL;
0081     int i;
0082 
0083     lockdep_assert_held(&pci2phy_map_lock);
0084 
0085 lookup:
0086     list_for_each_entry(map, &pci2phy_map_head, list) {
0087         if (map->segment == segment)
0088             goto end;
0089     }
0090 
0091     if (!alloc) {
0092         raw_spin_unlock(&pci2phy_map_lock);
0093         alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
0094         raw_spin_lock(&pci2phy_map_lock);
0095 
0096         if (!alloc)
0097             return NULL;
0098 
0099         goto lookup;
0100     }
0101 
0102     map = alloc;
0103     alloc = NULL;
0104     map->segment = segment;
0105     for (i = 0; i < 256; i++)
0106         map->pbus_to_dieid[i] = -1;
0107     list_add_tail(&map->list, &pci2phy_map_head);
0108 
0109 end:
0110     kfree(alloc);
0111     return map;
0112 }
0113 
0114 ssize_t uncore_event_show(struct device *dev,
0115               struct device_attribute *attr, char *buf)
0116 {
0117     struct uncore_event_desc *event =
0118         container_of(attr, struct uncore_event_desc, attr);
0119     return sprintf(buf, "%s", event->config);
0120 }
0121 
0122 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
0123 {
0124     unsigned int dieid = topology_logical_die_id(cpu);
0125 
0126     /*
0127      * The unsigned check also catches the '-1' return value for non
0128      * existent mappings in the topology map.
0129      */
0130     return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
0131 }
0132 
0133 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
0134 {
0135     u64 count;
0136 
0137     rdmsrl(event->hw.event_base, count);
0138 
0139     return count;
0140 }
0141 
0142 void uncore_mmio_exit_box(struct intel_uncore_box *box)
0143 {
0144     if (box->io_addr)
0145         iounmap(box->io_addr);
0146 }
0147 
0148 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
0149                  struct perf_event *event)
0150 {
0151     if (!box->io_addr)
0152         return 0;
0153 
0154     if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
0155         return 0;
0156 
0157     return readq(box->io_addr + event->hw.event_base);
0158 }
0159 
0160 /*
0161  * generic get constraint function for shared match/mask registers.
0162  */
0163 struct event_constraint *
0164 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
0165 {
0166     struct intel_uncore_extra_reg *er;
0167     struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
0168     struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
0169     unsigned long flags;
0170     bool ok = false;
0171 
0172     /*
0173      * reg->alloc can be set due to existing state, so for fake box we
0174      * need to ignore this, otherwise we might fail to allocate proper
0175      * fake state for this extra reg constraint.
0176      */
0177     if (reg1->idx == EXTRA_REG_NONE ||
0178         (!uncore_box_is_fake(box) && reg1->alloc))
0179         return NULL;
0180 
0181     er = &box->shared_regs[reg1->idx];
0182     raw_spin_lock_irqsave(&er->lock, flags);
0183     if (!atomic_read(&er->ref) ||
0184         (er->config1 == reg1->config && er->config2 == reg2->config)) {
0185         atomic_inc(&er->ref);
0186         er->config1 = reg1->config;
0187         er->config2 = reg2->config;
0188         ok = true;
0189     }
0190     raw_spin_unlock_irqrestore(&er->lock, flags);
0191 
0192     if (ok) {
0193         if (!uncore_box_is_fake(box))
0194             reg1->alloc = 1;
0195         return NULL;
0196     }
0197 
0198     return &uncore_constraint_empty;
0199 }
0200 
0201 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
0202 {
0203     struct intel_uncore_extra_reg *er;
0204     struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
0205 
0206     /*
0207      * Only put constraint if extra reg was actually allocated. Also
0208      * takes care of event which do not use an extra shared reg.
0209      *
0210      * Also, if this is a fake box we shouldn't touch any event state
0211      * (reg->alloc) and we don't care about leaving inconsistent box
0212      * state either since it will be thrown out.
0213      */
0214     if (uncore_box_is_fake(box) || !reg1->alloc)
0215         return;
0216 
0217     er = &box->shared_regs[reg1->idx];
0218     atomic_dec(&er->ref);
0219     reg1->alloc = 0;
0220 }
0221 
0222 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
0223 {
0224     struct intel_uncore_extra_reg *er;
0225     unsigned long flags;
0226     u64 config;
0227 
0228     er = &box->shared_regs[idx];
0229 
0230     raw_spin_lock_irqsave(&er->lock, flags);
0231     config = er->config;
0232     raw_spin_unlock_irqrestore(&er->lock, flags);
0233 
0234     return config;
0235 }
0236 
0237 static void uncore_assign_hw_event(struct intel_uncore_box *box,
0238                    struct perf_event *event, int idx)
0239 {
0240     struct hw_perf_event *hwc = &event->hw;
0241 
0242     hwc->idx = idx;
0243     hwc->last_tag = ++box->tags[idx];
0244 
0245     if (uncore_pmc_fixed(hwc->idx)) {
0246         hwc->event_base = uncore_fixed_ctr(box);
0247         hwc->config_base = uncore_fixed_ctl(box);
0248         return;
0249     }
0250 
0251     hwc->config_base = uncore_event_ctl(box, hwc->idx);
0252     hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
0253 }
0254 
0255 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
0256 {
0257     u64 prev_count, new_count, delta;
0258     int shift;
0259 
0260     if (uncore_pmc_freerunning(event->hw.idx))
0261         shift = 64 - uncore_freerunning_bits(box, event);
0262     else if (uncore_pmc_fixed(event->hw.idx))
0263         shift = 64 - uncore_fixed_ctr_bits(box);
0264     else
0265         shift = 64 - uncore_perf_ctr_bits(box);
0266 
0267     /* the hrtimer might modify the previous event value */
0268 again:
0269     prev_count = local64_read(&event->hw.prev_count);
0270     new_count = uncore_read_counter(box, event);
0271     if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
0272         goto again;
0273 
0274     delta = (new_count << shift) - (prev_count << shift);
0275     delta >>= shift;
0276 
0277     local64_add(delta, &event->count);
0278 }
0279 
0280 /*
0281  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
0282  * for SandyBridge. So we use hrtimer to periodically poll the counter
0283  * to avoid overflow.
0284  */
0285 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
0286 {
0287     struct intel_uncore_box *box;
0288     struct perf_event *event;
0289     unsigned long flags;
0290     int bit;
0291 
0292     box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
0293     if (!box->n_active || box->cpu != smp_processor_id())
0294         return HRTIMER_NORESTART;
0295     /*
0296      * disable local interrupt to prevent uncore_pmu_event_start/stop
0297      * to interrupt the update process
0298      */
0299     local_irq_save(flags);
0300 
0301     /*
0302      * handle boxes with an active event list as opposed to active
0303      * counters
0304      */
0305     list_for_each_entry(event, &box->active_list, active_entry) {
0306         uncore_perf_event_update(box, event);
0307     }
0308 
0309     for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
0310         uncore_perf_event_update(box, box->events[bit]);
0311 
0312     local_irq_restore(flags);
0313 
0314     hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
0315     return HRTIMER_RESTART;
0316 }
0317 
0318 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
0319 {
0320     hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
0321               HRTIMER_MODE_REL_PINNED);
0322 }
0323 
0324 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
0325 {
0326     hrtimer_cancel(&box->hrtimer);
0327 }
0328 
0329 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
0330 {
0331     hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
0332     box->hrtimer.function = uncore_pmu_hrtimer;
0333 }
0334 
0335 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
0336                          int node)
0337 {
0338     int i, size, numshared = type->num_shared_regs ;
0339     struct intel_uncore_box *box;
0340 
0341     size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
0342 
0343     box = kzalloc_node(size, GFP_KERNEL, node);
0344     if (!box)
0345         return NULL;
0346 
0347     for (i = 0; i < numshared; i++)
0348         raw_spin_lock_init(&box->shared_regs[i].lock);
0349 
0350     uncore_pmu_init_hrtimer(box);
0351     box->cpu = -1;
0352     box->dieid = -1;
0353 
0354     /* set default hrtimer timeout */
0355     box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
0356 
0357     INIT_LIST_HEAD(&box->active_list);
0358 
0359     return box;
0360 }
0361 
0362 /*
0363  * Using uncore_pmu_event_init pmu event_init callback
0364  * as a detection point for uncore events.
0365  */
0366 static int uncore_pmu_event_init(struct perf_event *event);
0367 
0368 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
0369 {
0370     return &box->pmu->pmu == event->pmu;
0371 }
0372 
0373 static int
0374 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
0375               bool dogrp)
0376 {
0377     struct perf_event *event;
0378     int n, max_count;
0379 
0380     max_count = box->pmu->type->num_counters;
0381     if (box->pmu->type->fixed_ctl)
0382         max_count++;
0383 
0384     if (box->n_events >= max_count)
0385         return -EINVAL;
0386 
0387     n = box->n_events;
0388 
0389     if (is_box_event(box, leader)) {
0390         box->event_list[n] = leader;
0391         n++;
0392     }
0393 
0394     if (!dogrp)
0395         return n;
0396 
0397     for_each_sibling_event(event, leader) {
0398         if (!is_box_event(box, event) ||
0399             event->state <= PERF_EVENT_STATE_OFF)
0400             continue;
0401 
0402         if (n >= max_count)
0403             return -EINVAL;
0404 
0405         box->event_list[n] = event;
0406         n++;
0407     }
0408     return n;
0409 }
0410 
0411 static struct event_constraint *
0412 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
0413 {
0414     struct intel_uncore_type *type = box->pmu->type;
0415     struct event_constraint *c;
0416 
0417     if (type->ops->get_constraint) {
0418         c = type->ops->get_constraint(box, event);
0419         if (c)
0420             return c;
0421     }
0422 
0423     if (event->attr.config == UNCORE_FIXED_EVENT)
0424         return &uncore_constraint_fixed;
0425 
0426     if (type->constraints) {
0427         for_each_event_constraint(c, type->constraints) {
0428             if ((event->hw.config & c->cmask) == c->code)
0429                 return c;
0430         }
0431     }
0432 
0433     return &type->unconstrainted;
0434 }
0435 
0436 static void uncore_put_event_constraint(struct intel_uncore_box *box,
0437                     struct perf_event *event)
0438 {
0439     if (box->pmu->type->ops->put_constraint)
0440         box->pmu->type->ops->put_constraint(box, event);
0441 }
0442 
0443 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
0444 {
0445     unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
0446     struct event_constraint *c;
0447     int i, wmin, wmax, ret = 0;
0448     struct hw_perf_event *hwc;
0449 
0450     bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
0451 
0452     for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
0453         c = uncore_get_event_constraint(box, box->event_list[i]);
0454         box->event_constraint[i] = c;
0455         wmin = min(wmin, c->weight);
0456         wmax = max(wmax, c->weight);
0457     }
0458 
0459     /* fastpath, try to reuse previous register */
0460     for (i = 0; i < n; i++) {
0461         hwc = &box->event_list[i]->hw;
0462         c = box->event_constraint[i];
0463 
0464         /* never assigned */
0465         if (hwc->idx == -1)
0466             break;
0467 
0468         /* constraint still honored */
0469         if (!test_bit(hwc->idx, c->idxmsk))
0470             break;
0471 
0472         /* not already used */
0473         if (test_bit(hwc->idx, used_mask))
0474             break;
0475 
0476         __set_bit(hwc->idx, used_mask);
0477         if (assign)
0478             assign[i] = hwc->idx;
0479     }
0480     /* slow path */
0481     if (i != n)
0482         ret = perf_assign_events(box->event_constraint, n,
0483                      wmin, wmax, n, assign);
0484 
0485     if (!assign || ret) {
0486         for (i = 0; i < n; i++)
0487             uncore_put_event_constraint(box, box->event_list[i]);
0488     }
0489     return ret ? -EINVAL : 0;
0490 }
0491 
0492 void uncore_pmu_event_start(struct perf_event *event, int flags)
0493 {
0494     struct intel_uncore_box *box = uncore_event_to_box(event);
0495     int idx = event->hw.idx;
0496 
0497     if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
0498         return;
0499 
0500     /*
0501      * Free running counter is read-only and always active.
0502      * Use the current counter value as start point.
0503      * There is no overflow interrupt for free running counter.
0504      * Use hrtimer to periodically poll the counter to avoid overflow.
0505      */
0506     if (uncore_pmc_freerunning(event->hw.idx)) {
0507         list_add_tail(&event->active_entry, &box->active_list);
0508         local64_set(&event->hw.prev_count,
0509                 uncore_read_counter(box, event));
0510         if (box->n_active++ == 0)
0511             uncore_pmu_start_hrtimer(box);
0512         return;
0513     }
0514 
0515     if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
0516         return;
0517 
0518     event->hw.state = 0;
0519     box->events[idx] = event;
0520     box->n_active++;
0521     __set_bit(idx, box->active_mask);
0522 
0523     local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
0524     uncore_enable_event(box, event);
0525 
0526     if (box->n_active == 1)
0527         uncore_pmu_start_hrtimer(box);
0528 }
0529 
0530 void uncore_pmu_event_stop(struct perf_event *event, int flags)
0531 {
0532     struct intel_uncore_box *box = uncore_event_to_box(event);
0533     struct hw_perf_event *hwc = &event->hw;
0534 
0535     /* Cannot disable free running counter which is read-only */
0536     if (uncore_pmc_freerunning(hwc->idx)) {
0537         list_del(&event->active_entry);
0538         if (--box->n_active == 0)
0539             uncore_pmu_cancel_hrtimer(box);
0540         uncore_perf_event_update(box, event);
0541         return;
0542     }
0543 
0544     if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
0545         uncore_disable_event(box, event);
0546         box->n_active--;
0547         box->events[hwc->idx] = NULL;
0548         WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
0549         hwc->state |= PERF_HES_STOPPED;
0550 
0551         if (box->n_active == 0)
0552             uncore_pmu_cancel_hrtimer(box);
0553     }
0554 
0555     if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
0556         /*
0557          * Drain the remaining delta count out of a event
0558          * that we are disabling:
0559          */
0560         uncore_perf_event_update(box, event);
0561         hwc->state |= PERF_HES_UPTODATE;
0562     }
0563 }
0564 
0565 int uncore_pmu_event_add(struct perf_event *event, int flags)
0566 {
0567     struct intel_uncore_box *box = uncore_event_to_box(event);
0568     struct hw_perf_event *hwc = &event->hw;
0569     int assign[UNCORE_PMC_IDX_MAX];
0570     int i, n, ret;
0571 
0572     if (!box)
0573         return -ENODEV;
0574 
0575     /*
0576      * The free funning counter is assigned in event_init().
0577      * The free running counter event and free running counter
0578      * are 1:1 mapped. It doesn't need to be tracked in event_list.
0579      */
0580     if (uncore_pmc_freerunning(hwc->idx)) {
0581         if (flags & PERF_EF_START)
0582             uncore_pmu_event_start(event, 0);
0583         return 0;
0584     }
0585 
0586     ret = n = uncore_collect_events(box, event, false);
0587     if (ret < 0)
0588         return ret;
0589 
0590     hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
0591     if (!(flags & PERF_EF_START))
0592         hwc->state |= PERF_HES_ARCH;
0593 
0594     ret = uncore_assign_events(box, assign, n);
0595     if (ret)
0596         return ret;
0597 
0598     /* save events moving to new counters */
0599     for (i = 0; i < box->n_events; i++) {
0600         event = box->event_list[i];
0601         hwc = &event->hw;
0602 
0603         if (hwc->idx == assign[i] &&
0604             hwc->last_tag == box->tags[assign[i]])
0605             continue;
0606         /*
0607          * Ensure we don't accidentally enable a stopped
0608          * counter simply because we rescheduled.
0609          */
0610         if (hwc->state & PERF_HES_STOPPED)
0611             hwc->state |= PERF_HES_ARCH;
0612 
0613         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
0614     }
0615 
0616     /* reprogram moved events into new counters */
0617     for (i = 0; i < n; i++) {
0618         event = box->event_list[i];
0619         hwc = &event->hw;
0620 
0621         if (hwc->idx != assign[i] ||
0622             hwc->last_tag != box->tags[assign[i]])
0623             uncore_assign_hw_event(box, event, assign[i]);
0624         else if (i < box->n_events)
0625             continue;
0626 
0627         if (hwc->state & PERF_HES_ARCH)
0628             continue;
0629 
0630         uncore_pmu_event_start(event, 0);
0631     }
0632     box->n_events = n;
0633 
0634     return 0;
0635 }
0636 
0637 void uncore_pmu_event_del(struct perf_event *event, int flags)
0638 {
0639     struct intel_uncore_box *box = uncore_event_to_box(event);
0640     int i;
0641 
0642     uncore_pmu_event_stop(event, PERF_EF_UPDATE);
0643 
0644     /*
0645      * The event for free running counter is not tracked by event_list.
0646      * It doesn't need to force event->hw.idx = -1 to reassign the counter.
0647      * Because the event and the free running counter are 1:1 mapped.
0648      */
0649     if (uncore_pmc_freerunning(event->hw.idx))
0650         return;
0651 
0652     for (i = 0; i < box->n_events; i++) {
0653         if (event == box->event_list[i]) {
0654             uncore_put_event_constraint(box, event);
0655 
0656             for (++i; i < box->n_events; i++)
0657                 box->event_list[i - 1] = box->event_list[i];
0658 
0659             --box->n_events;
0660             break;
0661         }
0662     }
0663 
0664     event->hw.idx = -1;
0665     event->hw.last_tag = ~0ULL;
0666 }
0667 
0668 void uncore_pmu_event_read(struct perf_event *event)
0669 {
0670     struct intel_uncore_box *box = uncore_event_to_box(event);
0671     uncore_perf_event_update(box, event);
0672 }
0673 
0674 /*
0675  * validation ensures the group can be loaded onto the
0676  * PMU if it was the only group available.
0677  */
0678 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
0679                 struct perf_event *event)
0680 {
0681     struct perf_event *leader = event->group_leader;
0682     struct intel_uncore_box *fake_box;
0683     int ret = -EINVAL, n;
0684 
0685     /* The free running counter is always active. */
0686     if (uncore_pmc_freerunning(event->hw.idx))
0687         return 0;
0688 
0689     fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
0690     if (!fake_box)
0691         return -ENOMEM;
0692 
0693     fake_box->pmu = pmu;
0694     /*
0695      * the event is not yet connected with its
0696      * siblings therefore we must first collect
0697      * existing siblings, then add the new event
0698      * before we can simulate the scheduling
0699      */
0700     n = uncore_collect_events(fake_box, leader, true);
0701     if (n < 0)
0702         goto out;
0703 
0704     fake_box->n_events = n;
0705     n = uncore_collect_events(fake_box, event, false);
0706     if (n < 0)
0707         goto out;
0708 
0709     fake_box->n_events = n;
0710 
0711     ret = uncore_assign_events(fake_box, NULL, n);
0712 out:
0713     kfree(fake_box);
0714     return ret;
0715 }
0716 
0717 static int uncore_pmu_event_init(struct perf_event *event)
0718 {
0719     struct intel_uncore_pmu *pmu;
0720     struct intel_uncore_box *box;
0721     struct hw_perf_event *hwc = &event->hw;
0722     int ret;
0723 
0724     if (event->attr.type != event->pmu->type)
0725         return -ENOENT;
0726 
0727     pmu = uncore_event_to_pmu(event);
0728     /* no device found for this pmu */
0729     if (pmu->func_id < 0)
0730         return -ENOENT;
0731 
0732     /* Sampling not supported yet */
0733     if (hwc->sample_period)
0734         return -EINVAL;
0735 
0736     /*
0737      * Place all uncore events for a particular physical package
0738      * onto a single cpu
0739      */
0740     if (event->cpu < 0)
0741         return -EINVAL;
0742     box = uncore_pmu_to_box(pmu, event->cpu);
0743     if (!box || box->cpu < 0)
0744         return -EINVAL;
0745     event->cpu = box->cpu;
0746     event->pmu_private = box;
0747 
0748     event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
0749 
0750     event->hw.idx = -1;
0751     event->hw.last_tag = ~0ULL;
0752     event->hw.extra_reg.idx = EXTRA_REG_NONE;
0753     event->hw.branch_reg.idx = EXTRA_REG_NONE;
0754 
0755     if (event->attr.config == UNCORE_FIXED_EVENT) {
0756         /* no fixed counter */
0757         if (!pmu->type->fixed_ctl)
0758             return -EINVAL;
0759         /*
0760          * if there is only one fixed counter, only the first pmu
0761          * can access the fixed counter
0762          */
0763         if (pmu->type->single_fixed && pmu->pmu_idx > 0)
0764             return -EINVAL;
0765 
0766         /* fixed counters have event field hardcoded to zero */
0767         hwc->config = 0ULL;
0768     } else if (is_freerunning_event(event)) {
0769         hwc->config = event->attr.config;
0770         if (!check_valid_freerunning_event(box, event))
0771             return -EINVAL;
0772         event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
0773         /*
0774          * The free running counter event and free running counter
0775          * are always 1:1 mapped.
0776          * The free running counter is always active.
0777          * Assign the free running counter here.
0778          */
0779         event->hw.event_base = uncore_freerunning_counter(box, event);
0780     } else {
0781         hwc->config = event->attr.config &
0782                   (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
0783         if (pmu->type->ops->hw_config) {
0784             ret = pmu->type->ops->hw_config(box, event);
0785             if (ret)
0786                 return ret;
0787         }
0788     }
0789 
0790     if (event->group_leader != event)
0791         ret = uncore_validate_group(pmu, event);
0792     else
0793         ret = 0;
0794 
0795     return ret;
0796 }
0797 
0798 static void uncore_pmu_enable(struct pmu *pmu)
0799 {
0800     struct intel_uncore_pmu *uncore_pmu;
0801     struct intel_uncore_box *box;
0802 
0803     uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
0804 
0805     box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
0806     if (!box)
0807         return;
0808 
0809     if (uncore_pmu->type->ops->enable_box)
0810         uncore_pmu->type->ops->enable_box(box);
0811 }
0812 
0813 static void uncore_pmu_disable(struct pmu *pmu)
0814 {
0815     struct intel_uncore_pmu *uncore_pmu;
0816     struct intel_uncore_box *box;
0817 
0818     uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
0819 
0820     box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
0821     if (!box)
0822         return;
0823 
0824     if (uncore_pmu->type->ops->disable_box)
0825         uncore_pmu->type->ops->disable_box(box);
0826 }
0827 
0828 static ssize_t uncore_get_attr_cpumask(struct device *dev,
0829                 struct device_attribute *attr, char *buf)
0830 {
0831     return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
0832 }
0833 
0834 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
0835 
0836 static struct attribute *uncore_pmu_attrs[] = {
0837     &dev_attr_cpumask.attr,
0838     NULL,
0839 };
0840 
0841 static const struct attribute_group uncore_pmu_attr_group = {
0842     .attrs = uncore_pmu_attrs,
0843 };
0844 
0845 void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
0846 {
0847     struct intel_uncore_type *type = pmu->type;
0848 
0849     if (type->num_boxes == 1)
0850         sprintf(pmu_name, "uncore_type_%u", type->type_id);
0851     else {
0852         sprintf(pmu_name, "uncore_type_%u_%d",
0853             type->type_id, type->box_ids[pmu->pmu_idx]);
0854     }
0855 }
0856 
0857 static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
0858 {
0859     struct intel_uncore_type *type = pmu->type;
0860 
0861     /*
0862      * No uncore block name in discovery table.
0863      * Use uncore_type_&typeid_&boxid as name.
0864      */
0865     if (!type->name) {
0866         uncore_get_alias_name(pmu->name, pmu);
0867         return;
0868     }
0869 
0870     if (type->num_boxes == 1) {
0871         if (strlen(type->name) > 0)
0872             sprintf(pmu->name, "uncore_%s", type->name);
0873         else
0874             sprintf(pmu->name, "uncore");
0875     } else {
0876         /*
0877          * Use the box ID from the discovery table if applicable.
0878          */
0879         sprintf(pmu->name, "uncore_%s_%d", type->name,
0880             type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
0881     }
0882 }
0883 
0884 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
0885 {
0886     int ret;
0887 
0888     if (!pmu->type->pmu) {
0889         pmu->pmu = (struct pmu) {
0890             .attr_groups    = pmu->type->attr_groups,
0891             .task_ctx_nr    = perf_invalid_context,
0892             .pmu_enable = uncore_pmu_enable,
0893             .pmu_disable    = uncore_pmu_disable,
0894             .event_init = uncore_pmu_event_init,
0895             .add        = uncore_pmu_event_add,
0896             .del        = uncore_pmu_event_del,
0897             .start      = uncore_pmu_event_start,
0898             .stop       = uncore_pmu_event_stop,
0899             .read       = uncore_pmu_event_read,
0900             .module     = THIS_MODULE,
0901             .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
0902             .attr_update    = pmu->type->attr_update,
0903         };
0904     } else {
0905         pmu->pmu = *pmu->type->pmu;
0906         pmu->pmu.attr_groups = pmu->type->attr_groups;
0907         pmu->pmu.attr_update = pmu->type->attr_update;
0908     }
0909 
0910     uncore_get_pmu_name(pmu);
0911 
0912     ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
0913     if (!ret)
0914         pmu->registered = true;
0915     return ret;
0916 }
0917 
0918 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
0919 {
0920     if (!pmu->registered)
0921         return;
0922     perf_pmu_unregister(&pmu->pmu);
0923     pmu->registered = false;
0924 }
0925 
0926 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
0927 {
0928     int die;
0929 
0930     for (die = 0; die < uncore_max_dies(); die++)
0931         kfree(pmu->boxes[die]);
0932     kfree(pmu->boxes);
0933 }
0934 
0935 static void uncore_type_exit(struct intel_uncore_type *type)
0936 {
0937     struct intel_uncore_pmu *pmu = type->pmus;
0938     int i;
0939 
0940     if (type->cleanup_mapping)
0941         type->cleanup_mapping(type);
0942 
0943     if (pmu) {
0944         for (i = 0; i < type->num_boxes; i++, pmu++) {
0945             uncore_pmu_unregister(pmu);
0946             uncore_free_boxes(pmu);
0947         }
0948         kfree(type->pmus);
0949         type->pmus = NULL;
0950     }
0951     if (type->box_ids) {
0952         kfree(type->box_ids);
0953         type->box_ids = NULL;
0954     }
0955     kfree(type->events_group);
0956     type->events_group = NULL;
0957 }
0958 
0959 static void uncore_types_exit(struct intel_uncore_type **types)
0960 {
0961     for (; *types; types++)
0962         uncore_type_exit(*types);
0963 }
0964 
0965 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
0966 {
0967     struct intel_uncore_pmu *pmus;
0968     size_t size;
0969     int i, j;
0970 
0971     pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
0972     if (!pmus)
0973         return -ENOMEM;
0974 
0975     size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
0976 
0977     for (i = 0; i < type->num_boxes; i++) {
0978         pmus[i].func_id = setid ? i : -1;
0979         pmus[i].pmu_idx = i;
0980         pmus[i].type    = type;
0981         pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
0982         if (!pmus[i].boxes)
0983             goto err;
0984     }
0985 
0986     type->pmus = pmus;
0987     type->unconstrainted = (struct event_constraint)
0988         __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
0989                 0, type->num_counters, 0, 0);
0990 
0991     if (type->event_descs) {
0992         struct {
0993             struct attribute_group group;
0994             struct attribute *attrs[];
0995         } *attr_group;
0996         for (i = 0; type->event_descs[i].attr.attr.name; i++);
0997 
0998         attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
0999                                 GFP_KERNEL);
1000         if (!attr_group)
1001             goto err;
1002 
1003         attr_group->group.name = "events";
1004         attr_group->group.attrs = attr_group->attrs;
1005 
1006         for (j = 0; j < i; j++)
1007             attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1008 
1009         type->events_group = &attr_group->group;
1010     }
1011 
1012     type->pmu_group = &uncore_pmu_attr_group;
1013 
1014     if (type->set_mapping)
1015         type->set_mapping(type);
1016 
1017     return 0;
1018 
1019 err:
1020     for (i = 0; i < type->num_boxes; i++)
1021         kfree(pmus[i].boxes);
1022     kfree(pmus);
1023 
1024     return -ENOMEM;
1025 }
1026 
1027 static int __init
1028 uncore_types_init(struct intel_uncore_type **types, bool setid)
1029 {
1030     int ret;
1031 
1032     for (; *types; types++) {
1033         ret = uncore_type_init(*types, setid);
1034         if (ret)
1035             return ret;
1036     }
1037     return 0;
1038 }
1039 
1040 /*
1041  * Get the die information of a PCI device.
1042  * @pdev: The PCI device.
1043  * @die: The die id which the device maps to.
1044  */
1045 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1046 {
1047     *die = uncore_pcibus_to_dieid(pdev->bus);
1048     if (*die < 0)
1049         return -EINVAL;
1050 
1051     return 0;
1052 }
1053 
1054 static struct intel_uncore_pmu *
1055 uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1056 {
1057     struct intel_uncore_type **types = uncore_pci_uncores;
1058     struct intel_uncore_type *type;
1059     u64 box_ctl;
1060     int i, die;
1061 
1062     for (; *types; types++) {
1063         type = *types;
1064         for (die = 0; die < __uncore_max_dies; die++) {
1065             for (i = 0; i < type->num_boxes; i++) {
1066                 if (!type->box_ctls[die])
1067                     continue;
1068                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1069                 if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1070                     pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1071                     pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1072                     return &type->pmus[i];
1073             }
1074         }
1075     }
1076 
1077     return NULL;
1078 }
1079 
1080 /*
1081  * Find the PMU of a PCI device.
1082  * @pdev: The PCI device.
1083  * @ids: The ID table of the available PCI devices with a PMU.
1084  *       If NULL, search the whole uncore_pci_uncores.
1085  */
1086 static struct intel_uncore_pmu *
1087 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1088 {
1089     struct intel_uncore_pmu *pmu = NULL;
1090     struct intel_uncore_type *type;
1091     kernel_ulong_t data;
1092     unsigned int devfn;
1093 
1094     if (!ids)
1095         return uncore_pci_find_dev_pmu_from_types(pdev);
1096 
1097     while (ids && ids->vendor) {
1098         if ((ids->vendor == pdev->vendor) &&
1099             (ids->device == pdev->device)) {
1100             data = ids->driver_data;
1101             devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1102                       UNCORE_PCI_DEV_FUNC(data));
1103             if (devfn == pdev->devfn) {
1104                 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1105                 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1106                 break;
1107             }
1108         }
1109         ids++;
1110     }
1111     return pmu;
1112 }
1113 
1114 /*
1115  * Register the PMU for a PCI device
1116  * @pdev: The PCI device.
1117  * @type: The corresponding PMU type of the device.
1118  * @pmu: The corresponding PMU of the device.
1119  * @die: The die id which the device maps to.
1120  */
1121 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1122                    struct intel_uncore_type *type,
1123                    struct intel_uncore_pmu *pmu,
1124                    int die)
1125 {
1126     struct intel_uncore_box *box;
1127     int ret;
1128 
1129     if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1130         return -EINVAL;
1131 
1132     box = uncore_alloc_box(type, NUMA_NO_NODE);
1133     if (!box)
1134         return -ENOMEM;
1135 
1136     if (pmu->func_id < 0)
1137         pmu->func_id = pdev->devfn;
1138     else
1139         WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1140 
1141     atomic_inc(&box->refcnt);
1142     box->dieid = die;
1143     box->pci_dev = pdev;
1144     box->pmu = pmu;
1145     uncore_box_init(box);
1146 
1147     pmu->boxes[die] = box;
1148     if (atomic_inc_return(&pmu->activeboxes) > 1)
1149         return 0;
1150 
1151     /* First active box registers the pmu */
1152     ret = uncore_pmu_register(pmu);
1153     if (ret) {
1154         pmu->boxes[die] = NULL;
1155         uncore_box_exit(box);
1156         kfree(box);
1157     }
1158     return ret;
1159 }
1160 
1161 /*
1162  * add a pci uncore device
1163  */
1164 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1165 {
1166     struct intel_uncore_type *type;
1167     struct intel_uncore_pmu *pmu = NULL;
1168     int die, ret;
1169 
1170     ret = uncore_pci_get_dev_die_info(pdev, &die);
1171     if (ret)
1172         return ret;
1173 
1174     if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1175         int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1176 
1177         uncore_extra_pci_dev[die].dev[idx] = pdev;
1178         pci_set_drvdata(pdev, NULL);
1179         return 0;
1180     }
1181 
1182     type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1183 
1184     /*
1185      * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1186      * for multiple instances of an uncore PMU device type. We should check
1187      * PCI slot and func to indicate the uncore box.
1188      */
1189     if (id->driver_data & ~0xffff) {
1190         struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
1191 
1192         pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1193         if (pmu == NULL)
1194             return -ENODEV;
1195     } else {
1196         /*
1197          * for performance monitoring unit with multiple boxes,
1198          * each box has a different function id.
1199          */
1200         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1201     }
1202 
1203     ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1204 
1205     pci_set_drvdata(pdev, pmu->boxes[die]);
1206 
1207     return ret;
1208 }
1209 
1210 /*
1211  * Unregister the PMU of a PCI device
1212  * @pmu: The corresponding PMU is unregistered.
1213  * @die: The die id which the device maps to.
1214  */
1215 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1216 {
1217     struct intel_uncore_box *box = pmu->boxes[die];
1218 
1219     pmu->boxes[die] = NULL;
1220     if (atomic_dec_return(&pmu->activeboxes) == 0)
1221         uncore_pmu_unregister(pmu);
1222     uncore_box_exit(box);
1223     kfree(box);
1224 }
1225 
1226 static void uncore_pci_remove(struct pci_dev *pdev)
1227 {
1228     struct intel_uncore_box *box;
1229     struct intel_uncore_pmu *pmu;
1230     int i, die;
1231 
1232     if (uncore_pci_get_dev_die_info(pdev, &die))
1233         return;
1234 
1235     box = pci_get_drvdata(pdev);
1236     if (!box) {
1237         for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1238             if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1239                 uncore_extra_pci_dev[die].dev[i] = NULL;
1240                 break;
1241             }
1242         }
1243         WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1244         return;
1245     }
1246 
1247     pmu = box->pmu;
1248 
1249     pci_set_drvdata(pdev, NULL);
1250 
1251     uncore_pci_pmu_unregister(pmu, die);
1252 }
1253 
1254 static int uncore_bus_notify(struct notifier_block *nb,
1255                  unsigned long action, void *data,
1256                  const struct pci_device_id *ids)
1257 {
1258     struct device *dev = data;
1259     struct pci_dev *pdev = to_pci_dev(dev);
1260     struct intel_uncore_pmu *pmu;
1261     int die;
1262 
1263     /* Unregister the PMU when the device is going to be deleted. */
1264     if (action != BUS_NOTIFY_DEL_DEVICE)
1265         return NOTIFY_DONE;
1266 
1267     pmu = uncore_pci_find_dev_pmu(pdev, ids);
1268     if (!pmu)
1269         return NOTIFY_DONE;
1270 
1271     if (uncore_pci_get_dev_die_info(pdev, &die))
1272         return NOTIFY_DONE;
1273 
1274     uncore_pci_pmu_unregister(pmu, die);
1275 
1276     return NOTIFY_OK;
1277 }
1278 
1279 static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1280                      unsigned long action, void *data)
1281 {
1282     return uncore_bus_notify(nb, action, data,
1283                  uncore_pci_sub_driver->id_table);
1284 }
1285 
1286 static struct notifier_block uncore_pci_sub_notifier = {
1287     .notifier_call = uncore_pci_sub_bus_notify,
1288 };
1289 
1290 static void uncore_pci_sub_driver_init(void)
1291 {
1292     const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1293     struct intel_uncore_type *type;
1294     struct intel_uncore_pmu *pmu;
1295     struct pci_dev *pci_sub_dev;
1296     bool notify = false;
1297     unsigned int devfn;
1298     int die;
1299 
1300     while (ids && ids->vendor) {
1301         pci_sub_dev = NULL;
1302         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1303         /*
1304          * Search the available device, and register the
1305          * corresponding PMU.
1306          */
1307         while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1308                              ids->device, pci_sub_dev))) {
1309             devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1310                       UNCORE_PCI_DEV_FUNC(ids->driver_data));
1311             if (devfn != pci_sub_dev->devfn)
1312                 continue;
1313 
1314             pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1315             if (!pmu)
1316                 continue;
1317 
1318             if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1319                 continue;
1320 
1321             if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1322                              die))
1323                 notify = true;
1324         }
1325         ids++;
1326     }
1327 
1328     if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1329         notify = false;
1330 
1331     if (!notify)
1332         uncore_pci_sub_driver = NULL;
1333 }
1334 
1335 static int uncore_pci_bus_notify(struct notifier_block *nb,
1336                      unsigned long action, void *data)
1337 {
1338     return uncore_bus_notify(nb, action, data, NULL);
1339 }
1340 
1341 static struct notifier_block uncore_pci_notifier = {
1342     .notifier_call = uncore_pci_bus_notify,
1343 };
1344 
1345 
1346 static void uncore_pci_pmus_register(void)
1347 {
1348     struct intel_uncore_type **types = uncore_pci_uncores;
1349     struct intel_uncore_type *type;
1350     struct intel_uncore_pmu *pmu;
1351     struct pci_dev *pdev;
1352     u64 box_ctl;
1353     int i, die;
1354 
1355     for (; *types; types++) {
1356         type = *types;
1357         for (die = 0; die < __uncore_max_dies; die++) {
1358             for (i = 0; i < type->num_boxes; i++) {
1359                 if (!type->box_ctls[die])
1360                     continue;
1361                 box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1362                 pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1363                                    UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1364                                    UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1365                 if (!pdev)
1366                     continue;
1367                 pmu = &type->pmus[i];
1368 
1369                 uncore_pci_pmu_register(pdev, type, pmu, die);
1370             }
1371         }
1372     }
1373 
1374     bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1375 }
1376 
1377 static int __init uncore_pci_init(void)
1378 {
1379     size_t size;
1380     int ret;
1381 
1382     size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1383     uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1384     if (!uncore_extra_pci_dev) {
1385         ret = -ENOMEM;
1386         goto err;
1387     }
1388 
1389     ret = uncore_types_init(uncore_pci_uncores, false);
1390     if (ret)
1391         goto errtype;
1392 
1393     if (uncore_pci_driver) {
1394         uncore_pci_driver->probe = uncore_pci_probe;
1395         uncore_pci_driver->remove = uncore_pci_remove;
1396 
1397         ret = pci_register_driver(uncore_pci_driver);
1398         if (ret)
1399             goto errtype;
1400     } else
1401         uncore_pci_pmus_register();
1402 
1403     if (uncore_pci_sub_driver)
1404         uncore_pci_sub_driver_init();
1405 
1406     pcidrv_registered = true;
1407     return 0;
1408 
1409 errtype:
1410     uncore_types_exit(uncore_pci_uncores);
1411     kfree(uncore_extra_pci_dev);
1412     uncore_extra_pci_dev = NULL;
1413     uncore_free_pcibus_map();
1414 err:
1415     uncore_pci_uncores = empty_uncore;
1416     return ret;
1417 }
1418 
1419 static void uncore_pci_exit(void)
1420 {
1421     if (pcidrv_registered) {
1422         pcidrv_registered = false;
1423         if (uncore_pci_sub_driver)
1424             bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1425         if (uncore_pci_driver)
1426             pci_unregister_driver(uncore_pci_driver);
1427         else
1428             bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1429         uncore_types_exit(uncore_pci_uncores);
1430         kfree(uncore_extra_pci_dev);
1431         uncore_free_pcibus_map();
1432     }
1433 }
1434 
1435 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1436                    int new_cpu)
1437 {
1438     struct intel_uncore_pmu *pmu = type->pmus;
1439     struct intel_uncore_box *box;
1440     int i, die;
1441 
1442     die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1443     for (i = 0; i < type->num_boxes; i++, pmu++) {
1444         box = pmu->boxes[die];
1445         if (!box)
1446             continue;
1447 
1448         if (old_cpu < 0) {
1449             WARN_ON_ONCE(box->cpu != -1);
1450             box->cpu = new_cpu;
1451             continue;
1452         }
1453 
1454         WARN_ON_ONCE(box->cpu != old_cpu);
1455         box->cpu = -1;
1456         if (new_cpu < 0)
1457             continue;
1458 
1459         uncore_pmu_cancel_hrtimer(box);
1460         perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1461         box->cpu = new_cpu;
1462     }
1463 }
1464 
1465 static void uncore_change_context(struct intel_uncore_type **uncores,
1466                   int old_cpu, int new_cpu)
1467 {
1468     for (; *uncores; uncores++)
1469         uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1470 }
1471 
1472 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1473 {
1474     struct intel_uncore_type *type;
1475     struct intel_uncore_pmu *pmu;
1476     struct intel_uncore_box *box;
1477     int i;
1478 
1479     for (; *types; types++) {
1480         type = *types;
1481         pmu = type->pmus;
1482         for (i = 0; i < type->num_boxes; i++, pmu++) {
1483             box = pmu->boxes[id];
1484             if (box && atomic_dec_return(&box->refcnt) == 0)
1485                 uncore_box_exit(box);
1486         }
1487     }
1488 }
1489 
1490 static int uncore_event_cpu_offline(unsigned int cpu)
1491 {
1492     int die, target;
1493 
1494     /* Check if exiting cpu is used for collecting uncore events */
1495     if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1496         goto unref;
1497     /* Find a new cpu to collect uncore events */
1498     target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1499 
1500     /* Migrate uncore events to the new target */
1501     if (target < nr_cpu_ids)
1502         cpumask_set_cpu(target, &uncore_cpu_mask);
1503     else
1504         target = -1;
1505 
1506     uncore_change_context(uncore_msr_uncores, cpu, target);
1507     uncore_change_context(uncore_mmio_uncores, cpu, target);
1508     uncore_change_context(uncore_pci_uncores, cpu, target);
1509 
1510 unref:
1511     /* Clear the references */
1512     die = topology_logical_die_id(cpu);
1513     uncore_box_unref(uncore_msr_uncores, die);
1514     uncore_box_unref(uncore_mmio_uncores, die);
1515     return 0;
1516 }
1517 
1518 static int allocate_boxes(struct intel_uncore_type **types,
1519              unsigned int die, unsigned int cpu)
1520 {
1521     struct intel_uncore_box *box, *tmp;
1522     struct intel_uncore_type *type;
1523     struct intel_uncore_pmu *pmu;
1524     LIST_HEAD(allocated);
1525     int i;
1526 
1527     /* Try to allocate all required boxes */
1528     for (; *types; types++) {
1529         type = *types;
1530         pmu = type->pmus;
1531         for (i = 0; i < type->num_boxes; i++, pmu++) {
1532             if (pmu->boxes[die])
1533                 continue;
1534             box = uncore_alloc_box(type, cpu_to_node(cpu));
1535             if (!box)
1536                 goto cleanup;
1537             box->pmu = pmu;
1538             box->dieid = die;
1539             list_add(&box->active_list, &allocated);
1540         }
1541     }
1542     /* Install them in the pmus */
1543     list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1544         list_del_init(&box->active_list);
1545         box->pmu->boxes[die] = box;
1546     }
1547     return 0;
1548 
1549 cleanup:
1550     list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1551         list_del_init(&box->active_list);
1552         kfree(box);
1553     }
1554     return -ENOMEM;
1555 }
1556 
1557 static int uncore_box_ref(struct intel_uncore_type **types,
1558               int id, unsigned int cpu)
1559 {
1560     struct intel_uncore_type *type;
1561     struct intel_uncore_pmu *pmu;
1562     struct intel_uncore_box *box;
1563     int i, ret;
1564 
1565     ret = allocate_boxes(types, id, cpu);
1566     if (ret)
1567         return ret;
1568 
1569     for (; *types; types++) {
1570         type = *types;
1571         pmu = type->pmus;
1572         for (i = 0; i < type->num_boxes; i++, pmu++) {
1573             box = pmu->boxes[id];
1574             if (box && atomic_inc_return(&box->refcnt) == 1)
1575                 uncore_box_init(box);
1576         }
1577     }
1578     return 0;
1579 }
1580 
1581 static int uncore_event_cpu_online(unsigned int cpu)
1582 {
1583     int die, target, msr_ret, mmio_ret;
1584 
1585     die = topology_logical_die_id(cpu);
1586     msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1587     mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1588     if (msr_ret && mmio_ret)
1589         return -ENOMEM;
1590 
1591     /*
1592      * Check if there is an online cpu in the package
1593      * which collects uncore events already.
1594      */
1595     target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1596     if (target < nr_cpu_ids)
1597         return 0;
1598 
1599     cpumask_set_cpu(cpu, &uncore_cpu_mask);
1600 
1601     if (!msr_ret)
1602         uncore_change_context(uncore_msr_uncores, -1, cpu);
1603     if (!mmio_ret)
1604         uncore_change_context(uncore_mmio_uncores, -1, cpu);
1605     uncore_change_context(uncore_pci_uncores, -1, cpu);
1606     return 0;
1607 }
1608 
1609 static int __init type_pmu_register(struct intel_uncore_type *type)
1610 {
1611     int i, ret;
1612 
1613     for (i = 0; i < type->num_boxes; i++) {
1614         ret = uncore_pmu_register(&type->pmus[i]);
1615         if (ret)
1616             return ret;
1617     }
1618     return 0;
1619 }
1620 
1621 static int __init uncore_msr_pmus_register(void)
1622 {
1623     struct intel_uncore_type **types = uncore_msr_uncores;
1624     int ret;
1625 
1626     for (; *types; types++) {
1627         ret = type_pmu_register(*types);
1628         if (ret)
1629             return ret;
1630     }
1631     return 0;
1632 }
1633 
1634 static int __init uncore_cpu_init(void)
1635 {
1636     int ret;
1637 
1638     ret = uncore_types_init(uncore_msr_uncores, true);
1639     if (ret)
1640         goto err;
1641 
1642     ret = uncore_msr_pmus_register();
1643     if (ret)
1644         goto err;
1645     return 0;
1646 err:
1647     uncore_types_exit(uncore_msr_uncores);
1648     uncore_msr_uncores = empty_uncore;
1649     return ret;
1650 }
1651 
1652 static int __init uncore_mmio_init(void)
1653 {
1654     struct intel_uncore_type **types = uncore_mmio_uncores;
1655     int ret;
1656 
1657     ret = uncore_types_init(types, true);
1658     if (ret)
1659         goto err;
1660 
1661     for (; *types; types++) {
1662         ret = type_pmu_register(*types);
1663         if (ret)
1664             goto err;
1665     }
1666     return 0;
1667 err:
1668     uncore_types_exit(uncore_mmio_uncores);
1669     uncore_mmio_uncores = empty_uncore;
1670     return ret;
1671 }
1672 
1673 struct intel_uncore_init_fun {
1674     void    (*cpu_init)(void);
1675     int (*pci_init)(void);
1676     void    (*mmio_init)(void);
1677     bool    use_discovery;
1678 };
1679 
1680 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1681     .cpu_init = nhm_uncore_cpu_init,
1682 };
1683 
1684 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1685     .cpu_init = snb_uncore_cpu_init,
1686     .pci_init = snb_uncore_pci_init,
1687 };
1688 
1689 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1690     .cpu_init = snb_uncore_cpu_init,
1691     .pci_init = ivb_uncore_pci_init,
1692 };
1693 
1694 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1695     .cpu_init = snb_uncore_cpu_init,
1696     .pci_init = hsw_uncore_pci_init,
1697 };
1698 
1699 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1700     .cpu_init = snb_uncore_cpu_init,
1701     .pci_init = bdw_uncore_pci_init,
1702 };
1703 
1704 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1705     .cpu_init = snbep_uncore_cpu_init,
1706     .pci_init = snbep_uncore_pci_init,
1707 };
1708 
1709 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1710     .cpu_init = nhmex_uncore_cpu_init,
1711 };
1712 
1713 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1714     .cpu_init = ivbep_uncore_cpu_init,
1715     .pci_init = ivbep_uncore_pci_init,
1716 };
1717 
1718 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1719     .cpu_init = hswep_uncore_cpu_init,
1720     .pci_init = hswep_uncore_pci_init,
1721 };
1722 
1723 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1724     .cpu_init = bdx_uncore_cpu_init,
1725     .pci_init = bdx_uncore_pci_init,
1726 };
1727 
1728 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1729     .cpu_init = knl_uncore_cpu_init,
1730     .pci_init = knl_uncore_pci_init,
1731 };
1732 
1733 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1734     .cpu_init = skl_uncore_cpu_init,
1735     .pci_init = skl_uncore_pci_init,
1736 };
1737 
1738 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1739     .cpu_init = skx_uncore_cpu_init,
1740     .pci_init = skx_uncore_pci_init,
1741 };
1742 
1743 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1744     .cpu_init = icl_uncore_cpu_init,
1745     .pci_init = skl_uncore_pci_init,
1746 };
1747 
1748 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1749     .cpu_init = tgl_uncore_cpu_init,
1750     .mmio_init = tgl_uncore_mmio_init,
1751 };
1752 
1753 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1754     .cpu_init = tgl_uncore_cpu_init,
1755     .mmio_init = tgl_l_uncore_mmio_init,
1756 };
1757 
1758 static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1759     .cpu_init = tgl_uncore_cpu_init,
1760     .pci_init = skl_uncore_pci_init,
1761 };
1762 
1763 static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1764     .cpu_init = adl_uncore_cpu_init,
1765     .mmio_init = adl_uncore_mmio_init,
1766 };
1767 
1768 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1769     .cpu_init = icx_uncore_cpu_init,
1770     .pci_init = icx_uncore_pci_init,
1771     .mmio_init = icx_uncore_mmio_init,
1772 };
1773 
1774 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1775     .cpu_init = snr_uncore_cpu_init,
1776     .pci_init = snr_uncore_pci_init,
1777     .mmio_init = snr_uncore_mmio_init,
1778 };
1779 
1780 static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
1781     .cpu_init = spr_uncore_cpu_init,
1782     .pci_init = spr_uncore_pci_init,
1783     .mmio_init = spr_uncore_mmio_init,
1784     .use_discovery = true,
1785 };
1786 
1787 static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1788     .cpu_init = intel_uncore_generic_uncore_cpu_init,
1789     .pci_init = intel_uncore_generic_uncore_pci_init,
1790     .mmio_init = intel_uncore_generic_uncore_mmio_init,
1791 };
1792 
1793 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1794     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,      &nhm_uncore_init),
1795     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,     &nhm_uncore_init),
1796     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,        &nhm_uncore_init),
1797     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,     &nhm_uncore_init),
1798     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,     &snb_uncore_init),
1799     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,       &ivb_uncore_init),
1800     X86_MATCH_INTEL_FAM6_MODEL(HASWELL,     &hsw_uncore_init),
1801     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,       &hsw_uncore_init),
1802     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,       &hsw_uncore_init),
1803     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,       &bdw_uncore_init),
1804     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,     &bdw_uncore_init),
1805     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,   &snbep_uncore_init),
1806     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,      &nhmex_uncore_init),
1807     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,     &nhmex_uncore_init),
1808     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,     &ivbep_uncore_init),
1809     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,       &hswep_uncore_init),
1810     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,     &bdx_uncore_init),
1811     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,     &bdx_uncore_init),
1812     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,    &knl_uncore_init),
1813     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,    &knl_uncore_init),
1814     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,     &skl_uncore_init),
1815     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,       &skl_uncore_init),
1816     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,       &skx_uncore_init),
1817     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,      &skl_uncore_init),
1818     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,        &skl_uncore_init),
1819     X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,     &skl_uncore_init),
1820     X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,       &skl_uncore_init),
1821     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,       &icl_uncore_init),
1822     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,    &icl_uncore_init),
1823     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,     &icl_uncore_init),
1824     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,       &icx_uncore_init),
1825     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,       &icx_uncore_init),
1826     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,     &tgl_l_uncore_init),
1827     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,       &tgl_uncore_init),
1828     X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,      &rkl_uncore_init),
1829     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,       &adl_uncore_init),
1830     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,     &adl_uncore_init),
1831     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,     &adl_uncore_init),
1832     X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,      &adl_uncore_init),
1833     X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,    &adl_uncore_init),
1834     X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
1835     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,  &snr_uncore_init),
1836     {},
1837 };
1838 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1839 
1840 static int __init intel_uncore_init(void)
1841 {
1842     const struct x86_cpu_id *id;
1843     struct intel_uncore_init_fun *uncore_init;
1844     int pret = 0, cret = 0, mret = 0, ret;
1845 
1846     if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1847         return -ENODEV;
1848 
1849     __uncore_max_dies =
1850         topology_max_packages() * topology_max_die_per_package();
1851 
1852     id = x86_match_cpu(intel_uncore_match);
1853     if (!id) {
1854         if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1855             uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1856         else
1857             return -ENODEV;
1858     } else {
1859         uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1860         if (uncore_no_discover && uncore_init->use_discovery)
1861             return -ENODEV;
1862         if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
1863             return -ENODEV;
1864     }
1865 
1866     if (uncore_init->pci_init) {
1867         pret = uncore_init->pci_init();
1868         if (!pret)
1869             pret = uncore_pci_init();
1870     }
1871 
1872     if (uncore_init->cpu_init) {
1873         uncore_init->cpu_init();
1874         cret = uncore_cpu_init();
1875     }
1876 
1877     if (uncore_init->mmio_init) {
1878         uncore_init->mmio_init();
1879         mret = uncore_mmio_init();
1880     }
1881 
1882     if (cret && pret && mret) {
1883         ret = -ENODEV;
1884         goto free_discovery;
1885     }
1886 
1887     /* Install hotplug callbacks to setup the targets for each package */
1888     ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1889                 "perf/x86/intel/uncore:online",
1890                 uncore_event_cpu_online,
1891                 uncore_event_cpu_offline);
1892     if (ret)
1893         goto err;
1894     return 0;
1895 
1896 err:
1897     uncore_types_exit(uncore_msr_uncores);
1898     uncore_types_exit(uncore_mmio_uncores);
1899     uncore_pci_exit();
1900 free_discovery:
1901     intel_uncore_clear_discovery_tables();
1902     return ret;
1903 }
1904 module_init(intel_uncore_init);
1905 
1906 static void __exit intel_uncore_exit(void)
1907 {
1908     cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1909     uncore_types_exit(uncore_msr_uncores);
1910     uncore_types_exit(uncore_mmio_uncores);
1911     uncore_pci_exit();
1912     intel_uncore_clear_discovery_tables();
1913 }
1914 module_exit(intel_uncore_exit);