Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *    Copyright IBM Corp. 2007, 2011
0004  */
0005 
0006 #define KMSG_COMPONENT "cpu"
0007 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
0008 
0009 #include <linux/workqueue.h>
0010 #include <linux/memblock.h>
0011 #include <linux/uaccess.h>
0012 #include <linux/sysctl.h>
0013 #include <linux/cpuset.h>
0014 #include <linux/device.h>
0015 #include <linux/export.h>
0016 #include <linux/kernel.h>
0017 #include <linux/sched.h>
0018 #include <linux/sched/topology.h>
0019 #include <linux/delay.h>
0020 #include <linux/init.h>
0021 #include <linux/slab.h>
0022 #include <linux/cpu.h>
0023 #include <linux/smp.h>
0024 #include <linux/mm.h>
0025 #include <linux/nodemask.h>
0026 #include <linux/node.h>
0027 #include <asm/sysinfo.h>
0028 
0029 #define PTF_HORIZONTAL  (0UL)
0030 #define PTF_VERTICAL    (1UL)
0031 #define PTF_CHECK   (2UL)
0032 
0033 enum {
0034     TOPOLOGY_MODE_HW,
0035     TOPOLOGY_MODE_SINGLE,
0036     TOPOLOGY_MODE_PACKAGE,
0037     TOPOLOGY_MODE_UNINITIALIZED
0038 };
0039 
0040 struct mask_info {
0041     struct mask_info *next;
0042     unsigned char id;
0043     cpumask_t mask;
0044 };
0045 
0046 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
0047 static void set_topology_timer(void);
0048 static void topology_work_fn(struct work_struct *work);
0049 static struct sysinfo_15_1_x *tl_info;
0050 
0051 static DECLARE_WORK(topology_work, topology_work_fn);
0052 
0053 /*
0054  * Socket/Book linked lists and cpu_topology updates are
0055  * protected by "sched_domains_mutex".
0056  */
0057 static struct mask_info socket_info;
0058 static struct mask_info book_info;
0059 static struct mask_info drawer_info;
0060 
0061 struct cpu_topology_s390 cpu_topology[NR_CPUS];
0062 EXPORT_SYMBOL_GPL(cpu_topology);
0063 
0064 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
0065 {
0066     static cpumask_t mask;
0067 
0068     cpumask_clear(&mask);
0069     if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
0070         goto out;
0071     cpumask_set_cpu(cpu, &mask);
0072     switch (topology_mode) {
0073     case TOPOLOGY_MODE_HW:
0074         while (info) {
0075             if (cpumask_test_cpu(cpu, &info->mask)) {
0076                 cpumask_copy(&mask, &info->mask);
0077                 break;
0078             }
0079             info = info->next;
0080         }
0081         break;
0082     case TOPOLOGY_MODE_PACKAGE:
0083         cpumask_copy(&mask, cpu_present_mask);
0084         break;
0085     default:
0086         fallthrough;
0087     case TOPOLOGY_MODE_SINGLE:
0088         break;
0089     }
0090     cpumask_and(&mask, &mask, &cpu_setup_mask);
0091 out:
0092     cpumask_copy(dst, &mask);
0093 }
0094 
0095 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
0096 {
0097     static cpumask_t mask;
0098     int i;
0099 
0100     cpumask_clear(&mask);
0101     if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
0102         goto out;
0103     cpumask_set_cpu(cpu, &mask);
0104     if (topology_mode != TOPOLOGY_MODE_HW)
0105         goto out;
0106     cpu -= cpu % (smp_cpu_mtid + 1);
0107     for (i = 0; i <= smp_cpu_mtid; i++) {
0108         if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
0109             cpumask_set_cpu(cpu + i, &mask);
0110     }
0111 out:
0112     cpumask_copy(dst, &mask);
0113 }
0114 
0115 #define TOPOLOGY_CORE_BITS  64
0116 
0117 static void add_cpus_to_mask(struct topology_core *tl_core,
0118                  struct mask_info *drawer,
0119                  struct mask_info *book,
0120                  struct mask_info *socket)
0121 {
0122     struct cpu_topology_s390 *topo;
0123     unsigned int core;
0124 
0125     for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
0126         unsigned int rcore;
0127         int lcpu, i;
0128 
0129         rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
0130         lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
0131         if (lcpu < 0)
0132             continue;
0133         for (i = 0; i <= smp_cpu_mtid; i++) {
0134             topo = &cpu_topology[lcpu + i];
0135             topo->drawer_id = drawer->id;
0136             topo->book_id = book->id;
0137             topo->socket_id = socket->id;
0138             topo->core_id = rcore;
0139             topo->thread_id = lcpu + i;
0140             topo->dedicated = tl_core->d;
0141             cpumask_set_cpu(lcpu + i, &drawer->mask);
0142             cpumask_set_cpu(lcpu + i, &book->mask);
0143             cpumask_set_cpu(lcpu + i, &socket->mask);
0144             smp_cpu_set_polarization(lcpu + i, tl_core->pp);
0145         }
0146     }
0147 }
0148 
0149 static void clear_masks(void)
0150 {
0151     struct mask_info *info;
0152 
0153     info = &socket_info;
0154     while (info) {
0155         cpumask_clear(&info->mask);
0156         info = info->next;
0157     }
0158     info = &book_info;
0159     while (info) {
0160         cpumask_clear(&info->mask);
0161         info = info->next;
0162     }
0163     info = &drawer_info;
0164     while (info) {
0165         cpumask_clear(&info->mask);
0166         info = info->next;
0167     }
0168 }
0169 
0170 static union topology_entry *next_tle(union topology_entry *tle)
0171 {
0172     if (!tle->nl)
0173         return (union topology_entry *)((struct topology_core *)tle + 1);
0174     return (union topology_entry *)((struct topology_container *)tle + 1);
0175 }
0176 
0177 static void tl_to_masks(struct sysinfo_15_1_x *info)
0178 {
0179     struct mask_info *socket = &socket_info;
0180     struct mask_info *book = &book_info;
0181     struct mask_info *drawer = &drawer_info;
0182     union topology_entry *tle, *end;
0183 
0184     clear_masks();
0185     tle = info->tle;
0186     end = (union topology_entry *)((unsigned long)info + info->length);
0187     while (tle < end) {
0188         switch (tle->nl) {
0189         case 3:
0190             drawer = drawer->next;
0191             drawer->id = tle->container.id;
0192             break;
0193         case 2:
0194             book = book->next;
0195             book->id = tle->container.id;
0196             break;
0197         case 1:
0198             socket = socket->next;
0199             socket->id = tle->container.id;
0200             break;
0201         case 0:
0202             add_cpus_to_mask(&tle->cpu, drawer, book, socket);
0203             break;
0204         default:
0205             clear_masks();
0206             return;
0207         }
0208         tle = next_tle(tle);
0209     }
0210 }
0211 
0212 static void topology_update_polarization_simple(void)
0213 {
0214     int cpu;
0215 
0216     for_each_possible_cpu(cpu)
0217         smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
0218 }
0219 
0220 static int ptf(unsigned long fc)
0221 {
0222     int rc;
0223 
0224     asm volatile(
0225         "   .insn   rre,0xb9a20000,%1,%1\n"
0226         "   ipm %0\n"
0227         "   srl %0,28\n"
0228         : "=d" (rc)
0229         : "d" (fc)  : "cc");
0230     return rc;
0231 }
0232 
0233 int topology_set_cpu_management(int fc)
0234 {
0235     int cpu, rc;
0236 
0237     if (!MACHINE_HAS_TOPOLOGY)
0238         return -EOPNOTSUPP;
0239     if (fc)
0240         rc = ptf(PTF_VERTICAL);
0241     else
0242         rc = ptf(PTF_HORIZONTAL);
0243     if (rc)
0244         return -EBUSY;
0245     for_each_possible_cpu(cpu)
0246         smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
0247     return rc;
0248 }
0249 
0250 void update_cpu_masks(void)
0251 {
0252     struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
0253     int cpu, sibling, pkg_first, smt_first, id;
0254 
0255     for_each_possible_cpu(cpu) {
0256         topo = &cpu_topology[cpu];
0257         cpu_thread_map(&topo->thread_mask, cpu);
0258         cpu_group_map(&topo->core_mask, &socket_info, cpu);
0259         cpu_group_map(&topo->book_mask, &book_info, cpu);
0260         cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
0261         topo->booted_cores = 0;
0262         if (topology_mode != TOPOLOGY_MODE_HW) {
0263             id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
0264             topo->thread_id = cpu;
0265             topo->core_id = cpu;
0266             topo->socket_id = id;
0267             topo->book_id = id;
0268             topo->drawer_id = id;
0269         }
0270     }
0271     for_each_online_cpu(cpu) {
0272         topo = &cpu_topology[cpu];
0273         pkg_first = cpumask_first(&topo->core_mask);
0274         topo_package = &cpu_topology[pkg_first];
0275         if (cpu == pkg_first) {
0276             for_each_cpu(sibling, &topo->core_mask) {
0277                 topo_sibling = &cpu_topology[sibling];
0278                 smt_first = cpumask_first(&topo_sibling->thread_mask);
0279                 if (sibling == smt_first)
0280                     topo_package->booted_cores++;
0281             }
0282         } else {
0283             topo->booted_cores = topo_package->booted_cores;
0284         }
0285     }
0286 }
0287 
0288 void store_topology(struct sysinfo_15_1_x *info)
0289 {
0290     stsi(info, 15, 1, topology_mnest_limit());
0291 }
0292 
0293 static void __arch_update_dedicated_flag(void *arg)
0294 {
0295     if (topology_cpu_dedicated(smp_processor_id()))
0296         set_cpu_flag(CIF_DEDICATED_CPU);
0297     else
0298         clear_cpu_flag(CIF_DEDICATED_CPU);
0299 }
0300 
0301 static int __arch_update_cpu_topology(void)
0302 {
0303     struct sysinfo_15_1_x *info = tl_info;
0304     int rc = 0;
0305 
0306     mutex_lock(&smp_cpu_state_mutex);
0307     if (MACHINE_HAS_TOPOLOGY) {
0308         rc = 1;
0309         store_topology(info);
0310         tl_to_masks(info);
0311     }
0312     update_cpu_masks();
0313     if (!MACHINE_HAS_TOPOLOGY)
0314         topology_update_polarization_simple();
0315     mutex_unlock(&smp_cpu_state_mutex);
0316     return rc;
0317 }
0318 
0319 int arch_update_cpu_topology(void)
0320 {
0321     struct device *dev;
0322     int cpu, rc;
0323 
0324     rc = __arch_update_cpu_topology();
0325     on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
0326     for_each_online_cpu(cpu) {
0327         dev = get_cpu_device(cpu);
0328         if (dev)
0329             kobject_uevent(&dev->kobj, KOBJ_CHANGE);
0330     }
0331     return rc;
0332 }
0333 
0334 static void topology_work_fn(struct work_struct *work)
0335 {
0336     rebuild_sched_domains();
0337 }
0338 
0339 void topology_schedule_update(void)
0340 {
0341     schedule_work(&topology_work);
0342 }
0343 
0344 static void topology_flush_work(void)
0345 {
0346     flush_work(&topology_work);
0347 }
0348 
0349 static void topology_timer_fn(struct timer_list *unused)
0350 {
0351     if (ptf(PTF_CHECK))
0352         topology_schedule_update();
0353     set_topology_timer();
0354 }
0355 
0356 static struct timer_list topology_timer;
0357 
0358 static atomic_t topology_poll = ATOMIC_INIT(0);
0359 
0360 static void set_topology_timer(void)
0361 {
0362     if (atomic_add_unless(&topology_poll, -1, 0))
0363         mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
0364     else
0365         mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
0366 }
0367 
0368 void topology_expect_change(void)
0369 {
0370     if (!MACHINE_HAS_TOPOLOGY)
0371         return;
0372     /* This is racy, but it doesn't matter since it is just a heuristic.
0373      * Worst case is that we poll in a higher frequency for a bit longer.
0374      */
0375     if (atomic_read(&topology_poll) > 60)
0376         return;
0377     atomic_add(60, &topology_poll);
0378     set_topology_timer();
0379 }
0380 
0381 static int cpu_management;
0382 
0383 static ssize_t dispatching_show(struct device *dev,
0384                 struct device_attribute *attr,
0385                 char *buf)
0386 {
0387     ssize_t count;
0388 
0389     mutex_lock(&smp_cpu_state_mutex);
0390     count = sprintf(buf, "%d\n", cpu_management);
0391     mutex_unlock(&smp_cpu_state_mutex);
0392     return count;
0393 }
0394 
0395 static ssize_t dispatching_store(struct device *dev,
0396                  struct device_attribute *attr,
0397                  const char *buf,
0398                  size_t count)
0399 {
0400     int val, rc;
0401     char delim;
0402 
0403     if (sscanf(buf, "%d %c", &val, &delim) != 1)
0404         return -EINVAL;
0405     if (val != 0 && val != 1)
0406         return -EINVAL;
0407     rc = 0;
0408     cpus_read_lock();
0409     mutex_lock(&smp_cpu_state_mutex);
0410     if (cpu_management == val)
0411         goto out;
0412     rc = topology_set_cpu_management(val);
0413     if (rc)
0414         goto out;
0415     cpu_management = val;
0416     topology_expect_change();
0417 out:
0418     mutex_unlock(&smp_cpu_state_mutex);
0419     cpus_read_unlock();
0420     return rc ? rc : count;
0421 }
0422 static DEVICE_ATTR_RW(dispatching);
0423 
0424 static ssize_t cpu_polarization_show(struct device *dev,
0425                      struct device_attribute *attr, char *buf)
0426 {
0427     int cpu = dev->id;
0428     ssize_t count;
0429 
0430     mutex_lock(&smp_cpu_state_mutex);
0431     switch (smp_cpu_get_polarization(cpu)) {
0432     case POLARIZATION_HRZ:
0433         count = sprintf(buf, "horizontal\n");
0434         break;
0435     case POLARIZATION_VL:
0436         count = sprintf(buf, "vertical:low\n");
0437         break;
0438     case POLARIZATION_VM:
0439         count = sprintf(buf, "vertical:medium\n");
0440         break;
0441     case POLARIZATION_VH:
0442         count = sprintf(buf, "vertical:high\n");
0443         break;
0444     default:
0445         count = sprintf(buf, "unknown\n");
0446         break;
0447     }
0448     mutex_unlock(&smp_cpu_state_mutex);
0449     return count;
0450 }
0451 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
0452 
0453 static struct attribute *topology_cpu_attrs[] = {
0454     &dev_attr_polarization.attr,
0455     NULL,
0456 };
0457 
0458 static struct attribute_group topology_cpu_attr_group = {
0459     .attrs = topology_cpu_attrs,
0460 };
0461 
0462 static ssize_t cpu_dedicated_show(struct device *dev,
0463                   struct device_attribute *attr, char *buf)
0464 {
0465     int cpu = dev->id;
0466     ssize_t count;
0467 
0468     mutex_lock(&smp_cpu_state_mutex);
0469     count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
0470     mutex_unlock(&smp_cpu_state_mutex);
0471     return count;
0472 }
0473 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
0474 
0475 static struct attribute *topology_extra_cpu_attrs[] = {
0476     &dev_attr_dedicated.attr,
0477     NULL,
0478 };
0479 
0480 static struct attribute_group topology_extra_cpu_attr_group = {
0481     .attrs = topology_extra_cpu_attrs,
0482 };
0483 
0484 int topology_cpu_init(struct cpu *cpu)
0485 {
0486     int rc;
0487 
0488     rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
0489     if (rc || !MACHINE_HAS_TOPOLOGY)
0490         return rc;
0491     rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
0492     if (rc)
0493         sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
0494     return rc;
0495 }
0496 
0497 static const struct cpumask *cpu_thread_mask(int cpu)
0498 {
0499     return &cpu_topology[cpu].thread_mask;
0500 }
0501 
0502 
0503 const struct cpumask *cpu_coregroup_mask(int cpu)
0504 {
0505     return &cpu_topology[cpu].core_mask;
0506 }
0507 
0508 static const struct cpumask *cpu_book_mask(int cpu)
0509 {
0510     return &cpu_topology[cpu].book_mask;
0511 }
0512 
0513 static const struct cpumask *cpu_drawer_mask(int cpu)
0514 {
0515     return &cpu_topology[cpu].drawer_mask;
0516 }
0517 
0518 static struct sched_domain_topology_level s390_topology[] = {
0519     { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
0520     { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
0521     { cpu_book_mask, SD_INIT_NAME(BOOK) },
0522     { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
0523     { cpu_cpu_mask, SD_INIT_NAME(DIE) },
0524     { NULL, },
0525 };
0526 
0527 static void __init alloc_masks(struct sysinfo_15_1_x *info,
0528                    struct mask_info *mask, int offset)
0529 {
0530     int i, nr_masks;
0531 
0532     nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
0533     for (i = 0; i < info->mnest - offset; i++)
0534         nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
0535     nr_masks = max(nr_masks, 1);
0536     for (i = 0; i < nr_masks; i++) {
0537         mask->next = memblock_alloc(sizeof(*mask->next), 8);
0538         if (!mask->next)
0539             panic("%s: Failed to allocate %zu bytes align=0x%x\n",
0540                   __func__, sizeof(*mask->next), 8);
0541         mask = mask->next;
0542     }
0543 }
0544 
0545 void __init topology_init_early(void)
0546 {
0547     struct sysinfo_15_1_x *info;
0548 
0549     set_sched_topology(s390_topology);
0550     if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
0551         if (MACHINE_HAS_TOPOLOGY)
0552             topology_mode = TOPOLOGY_MODE_HW;
0553         else
0554             topology_mode = TOPOLOGY_MODE_SINGLE;
0555     }
0556     if (!MACHINE_HAS_TOPOLOGY)
0557         goto out;
0558     tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
0559     if (!tl_info)
0560         panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
0561               __func__, PAGE_SIZE, PAGE_SIZE);
0562     info = tl_info;
0563     store_topology(info);
0564     pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
0565         info->mag[0], info->mag[1], info->mag[2], info->mag[3],
0566         info->mag[4], info->mag[5], info->mnest);
0567     alloc_masks(info, &socket_info, 1);
0568     alloc_masks(info, &book_info, 2);
0569     alloc_masks(info, &drawer_info, 3);
0570 out:
0571     cpumask_set_cpu(0, &cpu_setup_mask);
0572     __arch_update_cpu_topology();
0573     __arch_update_dedicated_flag(NULL);
0574 }
0575 
0576 static inline int topology_get_mode(int enabled)
0577 {
0578     if (!enabled)
0579         return TOPOLOGY_MODE_SINGLE;
0580     return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
0581 }
0582 
0583 static inline int topology_is_enabled(void)
0584 {
0585     return topology_mode != TOPOLOGY_MODE_SINGLE;
0586 }
0587 
0588 static int __init topology_setup(char *str)
0589 {
0590     bool enabled;
0591     int rc;
0592 
0593     rc = kstrtobool(str, &enabled);
0594     if (rc)
0595         return rc;
0596     topology_mode = topology_get_mode(enabled);
0597     return 0;
0598 }
0599 early_param("topology", topology_setup);
0600 
0601 static int topology_ctl_handler(struct ctl_table *ctl, int write,
0602                 void *buffer, size_t *lenp, loff_t *ppos)
0603 {
0604     int enabled = topology_is_enabled();
0605     int new_mode;
0606     int rc;
0607     struct ctl_table ctl_entry = {
0608         .procname   = ctl->procname,
0609         .data       = &enabled,
0610         .maxlen     = sizeof(int),
0611         .extra1     = SYSCTL_ZERO,
0612         .extra2     = SYSCTL_ONE,
0613     };
0614 
0615     rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
0616     if (rc < 0 || !write)
0617         return rc;
0618 
0619     mutex_lock(&smp_cpu_state_mutex);
0620     new_mode = topology_get_mode(enabled);
0621     if (topology_mode != new_mode) {
0622         topology_mode = new_mode;
0623         topology_schedule_update();
0624     }
0625     mutex_unlock(&smp_cpu_state_mutex);
0626     topology_flush_work();
0627 
0628     return rc;
0629 }
0630 
0631 static struct ctl_table topology_ctl_table[] = {
0632     {
0633         .procname   = "topology",
0634         .mode       = 0644,
0635         .proc_handler   = topology_ctl_handler,
0636     },
0637     { },
0638 };
0639 
0640 static struct ctl_table topology_dir_table[] = {
0641     {
0642         .procname   = "s390",
0643         .maxlen     = 0,
0644         .mode       = 0555,
0645         .child      = topology_ctl_table,
0646     },
0647     { },
0648 };
0649 
0650 static int __init topology_init(void)
0651 {
0652     timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
0653     if (MACHINE_HAS_TOPOLOGY)
0654         set_topology_timer();
0655     else
0656         topology_update_polarization_simple();
0657     register_sysctl_table(topology_dir_table);
0658     return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
0659 }
0660 device_initcall(topology_init);