0001
0002
0003
0004
0005
0006 #include <linux/module.h>
0007 #include <linux/init.h>
0008 #include <linux/mm.h>
0009 #include <linux/memory.h>
0010 #include <linux/vmstat.h>
0011 #include <linux/notifier.h>
0012 #include <linux/node.h>
0013 #include <linux/hugetlb.h>
0014 #include <linux/compaction.h>
0015 #include <linux/cpumask.h>
0016 #include <linux/topology.h>
0017 #include <linux/nodemask.h>
0018 #include <linux/cpu.h>
0019 #include <linux/device.h>
0020 #include <linux/pm_runtime.h>
0021 #include <linux/swap.h>
0022 #include <linux/slab.h>
0023
0024 static struct bus_type node_subsys = {
0025 .name = "node",
0026 .dev_name = "node",
0027 };
0028
0029 static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
0030 struct bin_attribute *attr, char *buf,
0031 loff_t off, size_t count)
0032 {
0033 struct device *dev = kobj_to_dev(kobj);
0034 struct node *node_dev = to_node(dev);
0035 cpumask_var_t mask;
0036 ssize_t n;
0037
0038 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
0039 return 0;
0040
0041 cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
0042 n = cpumap_print_bitmask_to_buf(buf, mask, off, count);
0043 free_cpumask_var(mask);
0044
0045 return n;
0046 }
0047
0048 static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
0049
0050 static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
0051 struct bin_attribute *attr, char *buf,
0052 loff_t off, size_t count)
0053 {
0054 struct device *dev = kobj_to_dev(kobj);
0055 struct node *node_dev = to_node(dev);
0056 cpumask_var_t mask;
0057 ssize_t n;
0058
0059 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
0060 return 0;
0061
0062 cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
0063 n = cpumap_print_list_to_buf(buf, mask, off, count);
0064 free_cpumask_var(mask);
0065
0066 return n;
0067 }
0068
0069 static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079 struct node_access_nodes {
0080 struct device dev;
0081 struct list_head list_node;
0082 unsigned int access;
0083 #ifdef CONFIG_HMEM_REPORTING
0084 struct node_hmem_attrs hmem_attrs;
0085 #endif
0086 };
0087 #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
0088
0089 static struct attribute *node_init_access_node_attrs[] = {
0090 NULL,
0091 };
0092
0093 static struct attribute *node_targ_access_node_attrs[] = {
0094 NULL,
0095 };
0096
0097 static const struct attribute_group initiators = {
0098 .name = "initiators",
0099 .attrs = node_init_access_node_attrs,
0100 };
0101
0102 static const struct attribute_group targets = {
0103 .name = "targets",
0104 .attrs = node_targ_access_node_attrs,
0105 };
0106
0107 static const struct attribute_group *node_access_node_groups[] = {
0108 &initiators,
0109 &targets,
0110 NULL,
0111 };
0112
0113 static void node_remove_accesses(struct node *node)
0114 {
0115 struct node_access_nodes *c, *cnext;
0116
0117 list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
0118 list_del(&c->list_node);
0119 device_unregister(&c->dev);
0120 }
0121 }
0122
0123 static void node_access_release(struct device *dev)
0124 {
0125 kfree(to_access_nodes(dev));
0126 }
0127
0128 static struct node_access_nodes *node_init_node_access(struct node *node,
0129 unsigned int access)
0130 {
0131 struct node_access_nodes *access_node;
0132 struct device *dev;
0133
0134 list_for_each_entry(access_node, &node->access_list, list_node)
0135 if (access_node->access == access)
0136 return access_node;
0137
0138 access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
0139 if (!access_node)
0140 return NULL;
0141
0142 access_node->access = access;
0143 dev = &access_node->dev;
0144 dev->parent = &node->dev;
0145 dev->release = node_access_release;
0146 dev->groups = node_access_node_groups;
0147 if (dev_set_name(dev, "access%u", access))
0148 goto free;
0149
0150 if (device_register(dev))
0151 goto free_name;
0152
0153 pm_runtime_no_callbacks(dev);
0154 list_add_tail(&access_node->list_node, &node->access_list);
0155 return access_node;
0156 free_name:
0157 kfree_const(dev->kobj.name);
0158 free:
0159 kfree(access_node);
0160 return NULL;
0161 }
0162
0163 #ifdef CONFIG_HMEM_REPORTING
0164 #define ACCESS_ATTR(name) \
0165 static ssize_t name##_show(struct device *dev, \
0166 struct device_attribute *attr, \
0167 char *buf) \
0168 { \
0169 return sysfs_emit(buf, "%u\n", \
0170 to_access_nodes(dev)->hmem_attrs.name); \
0171 } \
0172 static DEVICE_ATTR_RO(name)
0173
0174 ACCESS_ATTR(read_bandwidth);
0175 ACCESS_ATTR(read_latency);
0176 ACCESS_ATTR(write_bandwidth);
0177 ACCESS_ATTR(write_latency);
0178
0179 static struct attribute *access_attrs[] = {
0180 &dev_attr_read_bandwidth.attr,
0181 &dev_attr_read_latency.attr,
0182 &dev_attr_write_bandwidth.attr,
0183 &dev_attr_write_latency.attr,
0184 NULL,
0185 };
0186
0187
0188
0189
0190
0191
0192
0193 void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
0194 unsigned int access)
0195 {
0196 struct node_access_nodes *c;
0197 struct node *node;
0198 int i;
0199
0200 if (WARN_ON_ONCE(!node_online(nid)))
0201 return;
0202
0203 node = node_devices[nid];
0204 c = node_init_node_access(node, access);
0205 if (!c)
0206 return;
0207
0208 c->hmem_attrs = *hmem_attrs;
0209 for (i = 0; access_attrs[i] != NULL; i++) {
0210 if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
0211 "initiators")) {
0212 pr_info("failed to add performance attribute to node %d\n",
0213 nid);
0214 break;
0215 }
0216 }
0217 }
0218
0219
0220
0221
0222
0223
0224
0225 struct node_cache_info {
0226 struct device dev;
0227 struct list_head node;
0228 struct node_cache_attrs cache_attrs;
0229 };
0230 #define to_cache_info(device) container_of(device, struct node_cache_info, dev)
0231
0232 #define CACHE_ATTR(name, fmt) \
0233 static ssize_t name##_show(struct device *dev, \
0234 struct device_attribute *attr, \
0235 char *buf) \
0236 { \
0237 return sysfs_emit(buf, fmt "\n", \
0238 to_cache_info(dev)->cache_attrs.name); \
0239 } \
0240 static DEVICE_ATTR_RO(name);
0241
0242 CACHE_ATTR(size, "%llu")
0243 CACHE_ATTR(line_size, "%u")
0244 CACHE_ATTR(indexing, "%u")
0245 CACHE_ATTR(write_policy, "%u")
0246
0247 static struct attribute *cache_attrs[] = {
0248 &dev_attr_indexing.attr,
0249 &dev_attr_size.attr,
0250 &dev_attr_line_size.attr,
0251 &dev_attr_write_policy.attr,
0252 NULL,
0253 };
0254 ATTRIBUTE_GROUPS(cache);
0255
0256 static void node_cache_release(struct device *dev)
0257 {
0258 kfree(dev);
0259 }
0260
0261 static void node_cacheinfo_release(struct device *dev)
0262 {
0263 struct node_cache_info *info = to_cache_info(dev);
0264 kfree(info);
0265 }
0266
0267 static void node_init_cache_dev(struct node *node)
0268 {
0269 struct device *dev;
0270
0271 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
0272 if (!dev)
0273 return;
0274
0275 device_initialize(dev);
0276 dev->parent = &node->dev;
0277 dev->release = node_cache_release;
0278 if (dev_set_name(dev, "memory_side_cache"))
0279 goto put_device;
0280
0281 if (device_add(dev))
0282 goto put_device;
0283
0284 pm_runtime_no_callbacks(dev);
0285 node->cache_dev = dev;
0286 return;
0287 put_device:
0288 put_device(dev);
0289 }
0290
0291
0292
0293
0294
0295
0296 void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
0297 {
0298 struct node_cache_info *info;
0299 struct device *dev;
0300 struct node *node;
0301
0302 if (!node_online(nid) || !node_devices[nid])
0303 return;
0304
0305 node = node_devices[nid];
0306 list_for_each_entry(info, &node->cache_attrs, node) {
0307 if (info->cache_attrs.level == cache_attrs->level) {
0308 dev_warn(&node->dev,
0309 "attempt to add duplicate cache level:%d\n",
0310 cache_attrs->level);
0311 return;
0312 }
0313 }
0314
0315 if (!node->cache_dev)
0316 node_init_cache_dev(node);
0317 if (!node->cache_dev)
0318 return;
0319
0320 info = kzalloc(sizeof(*info), GFP_KERNEL);
0321 if (!info)
0322 return;
0323
0324 dev = &info->dev;
0325 device_initialize(dev);
0326 dev->parent = node->cache_dev;
0327 dev->release = node_cacheinfo_release;
0328 dev->groups = cache_groups;
0329 if (dev_set_name(dev, "index%d", cache_attrs->level))
0330 goto put_device;
0331
0332 info->cache_attrs = *cache_attrs;
0333 if (device_add(dev)) {
0334 dev_warn(&node->dev, "failed to add cache level:%d\n",
0335 cache_attrs->level);
0336 goto put_device;
0337 }
0338 pm_runtime_no_callbacks(dev);
0339 list_add_tail(&info->node, &node->cache_attrs);
0340 return;
0341 put_device:
0342 put_device(dev);
0343 }
0344
0345 static void node_remove_caches(struct node *node)
0346 {
0347 struct node_cache_info *info, *next;
0348
0349 if (!node->cache_dev)
0350 return;
0351
0352 list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
0353 list_del(&info->node);
0354 device_unregister(&info->dev);
0355 }
0356 device_unregister(node->cache_dev);
0357 }
0358
0359 static void node_init_caches(unsigned int nid)
0360 {
0361 INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
0362 }
0363 #else
0364 static void node_init_caches(unsigned int nid) { }
0365 static void node_remove_caches(struct node *node) { }
0366 #endif
0367
0368 #define K(x) ((x) << (PAGE_SHIFT - 10))
0369 static ssize_t node_read_meminfo(struct device *dev,
0370 struct device_attribute *attr, char *buf)
0371 {
0372 int len = 0;
0373 int nid = dev->id;
0374 struct pglist_data *pgdat = NODE_DATA(nid);
0375 struct sysinfo i;
0376 unsigned long sreclaimable, sunreclaimable;
0377 unsigned long swapcached = 0;
0378
0379 si_meminfo_node(&i, nid);
0380 sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
0381 sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
0382 #ifdef CONFIG_SWAP
0383 swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE);
0384 #endif
0385 len = sysfs_emit_at(buf, len,
0386 "Node %d MemTotal: %8lu kB\n"
0387 "Node %d MemFree: %8lu kB\n"
0388 "Node %d MemUsed: %8lu kB\n"
0389 "Node %d SwapCached: %8lu kB\n"
0390 "Node %d Active: %8lu kB\n"
0391 "Node %d Inactive: %8lu kB\n"
0392 "Node %d Active(anon): %8lu kB\n"
0393 "Node %d Inactive(anon): %8lu kB\n"
0394 "Node %d Active(file): %8lu kB\n"
0395 "Node %d Inactive(file): %8lu kB\n"
0396 "Node %d Unevictable: %8lu kB\n"
0397 "Node %d Mlocked: %8lu kB\n",
0398 nid, K(i.totalram),
0399 nid, K(i.freeram),
0400 nid, K(i.totalram - i.freeram),
0401 nid, K(swapcached),
0402 nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
0403 node_page_state(pgdat, NR_ACTIVE_FILE)),
0404 nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
0405 node_page_state(pgdat, NR_INACTIVE_FILE)),
0406 nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
0407 nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
0408 nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
0409 nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
0410 nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
0411 nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
0412
0413 #ifdef CONFIG_HIGHMEM
0414 len += sysfs_emit_at(buf, len,
0415 "Node %d HighTotal: %8lu kB\n"
0416 "Node %d HighFree: %8lu kB\n"
0417 "Node %d LowTotal: %8lu kB\n"
0418 "Node %d LowFree: %8lu kB\n",
0419 nid, K(i.totalhigh),
0420 nid, K(i.freehigh),
0421 nid, K(i.totalram - i.totalhigh),
0422 nid, K(i.freeram - i.freehigh));
0423 #endif
0424 len += sysfs_emit_at(buf, len,
0425 "Node %d Dirty: %8lu kB\n"
0426 "Node %d Writeback: %8lu kB\n"
0427 "Node %d FilePages: %8lu kB\n"
0428 "Node %d Mapped: %8lu kB\n"
0429 "Node %d AnonPages: %8lu kB\n"
0430 "Node %d Shmem: %8lu kB\n"
0431 "Node %d KernelStack: %8lu kB\n"
0432 #ifdef CONFIG_SHADOW_CALL_STACK
0433 "Node %d ShadowCallStack:%8lu kB\n"
0434 #endif
0435 "Node %d PageTables: %8lu kB\n"
0436 "Node %d NFS_Unstable: %8lu kB\n"
0437 "Node %d Bounce: %8lu kB\n"
0438 "Node %d WritebackTmp: %8lu kB\n"
0439 "Node %d KReclaimable: %8lu kB\n"
0440 "Node %d Slab: %8lu kB\n"
0441 "Node %d SReclaimable: %8lu kB\n"
0442 "Node %d SUnreclaim: %8lu kB\n"
0443 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0444 "Node %d AnonHugePages: %8lu kB\n"
0445 "Node %d ShmemHugePages: %8lu kB\n"
0446 "Node %d ShmemPmdMapped: %8lu kB\n"
0447 "Node %d FileHugePages: %8lu kB\n"
0448 "Node %d FilePmdMapped: %8lu kB\n"
0449 #endif
0450 ,
0451 nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
0452 nid, K(node_page_state(pgdat, NR_WRITEBACK)),
0453 nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
0454 nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
0455 nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
0456 nid, K(i.sharedram),
0457 nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
0458 #ifdef CONFIG_SHADOW_CALL_STACK
0459 nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
0460 #endif
0461 nid, K(node_page_state(pgdat, NR_PAGETABLE)),
0462 nid, 0UL,
0463 nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
0464 nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
0465 nid, K(sreclaimable +
0466 node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
0467 nid, K(sreclaimable + sunreclaimable),
0468 nid, K(sreclaimable),
0469 nid, K(sunreclaimable)
0470 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0471 ,
0472 nid, K(node_page_state(pgdat, NR_ANON_THPS)),
0473 nid, K(node_page_state(pgdat, NR_SHMEM_THPS)),
0474 nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
0475 nid, K(node_page_state(pgdat, NR_FILE_THPS)),
0476 nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
0477 #endif
0478 );
0479 len += hugetlb_report_node_meminfo(buf, len, nid);
0480 return len;
0481 }
0482
0483 #undef K
0484 static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
0485
0486 static ssize_t node_read_numastat(struct device *dev,
0487 struct device_attribute *attr, char *buf)
0488 {
0489 fold_vm_numa_events();
0490 return sysfs_emit(buf,
0491 "numa_hit %lu\n"
0492 "numa_miss %lu\n"
0493 "numa_foreign %lu\n"
0494 "interleave_hit %lu\n"
0495 "local_node %lu\n"
0496 "other_node %lu\n",
0497 sum_zone_numa_event_state(dev->id, NUMA_HIT),
0498 sum_zone_numa_event_state(dev->id, NUMA_MISS),
0499 sum_zone_numa_event_state(dev->id, NUMA_FOREIGN),
0500 sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT),
0501 sum_zone_numa_event_state(dev->id, NUMA_LOCAL),
0502 sum_zone_numa_event_state(dev->id, NUMA_OTHER));
0503 }
0504 static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
0505
0506 static ssize_t node_read_vmstat(struct device *dev,
0507 struct device_attribute *attr, char *buf)
0508 {
0509 int nid = dev->id;
0510 struct pglist_data *pgdat = NODE_DATA(nid);
0511 int i;
0512 int len = 0;
0513
0514 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
0515 len += sysfs_emit_at(buf, len, "%s %lu\n",
0516 zone_stat_name(i),
0517 sum_zone_node_page_state(nid, i));
0518
0519 #ifdef CONFIG_NUMA
0520 fold_vm_numa_events();
0521 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
0522 len += sysfs_emit_at(buf, len, "%s %lu\n",
0523 numa_stat_name(i),
0524 sum_zone_numa_event_state(nid, i));
0525
0526 #endif
0527 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
0528 unsigned long pages = node_page_state_pages(pgdat, i);
0529
0530 if (vmstat_item_print_in_thp(i))
0531 pages /= HPAGE_PMD_NR;
0532 len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i),
0533 pages);
0534 }
0535
0536 return len;
0537 }
0538 static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL);
0539
0540 static ssize_t node_read_distance(struct device *dev,
0541 struct device_attribute *attr, char *buf)
0542 {
0543 int nid = dev->id;
0544 int len = 0;
0545 int i;
0546
0547
0548
0549
0550
0551 BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
0552
0553 for_each_online_node(i) {
0554 len += sysfs_emit_at(buf, len, "%s%d",
0555 i ? " " : "", node_distance(nid, i));
0556 }
0557
0558 len += sysfs_emit_at(buf, len, "\n");
0559 return len;
0560 }
0561 static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
0562
0563 static struct attribute *node_dev_attrs[] = {
0564 &dev_attr_meminfo.attr,
0565 &dev_attr_numastat.attr,
0566 &dev_attr_distance.attr,
0567 &dev_attr_vmstat.attr,
0568 NULL
0569 };
0570
0571 static struct bin_attribute *node_dev_bin_attrs[] = {
0572 &bin_attr_cpumap,
0573 &bin_attr_cpulist,
0574 NULL
0575 };
0576
0577 static const struct attribute_group node_dev_group = {
0578 .attrs = node_dev_attrs,
0579 .bin_attrs = node_dev_bin_attrs
0580 };
0581
0582 static const struct attribute_group *node_dev_groups[] = {
0583 &node_dev_group,
0584 #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP
0585 &arch_node_dev_group,
0586 #endif
0587 NULL
0588 };
0589
0590 #ifdef CONFIG_HUGETLBFS
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600 static node_registration_func_t __hugetlb_register_node;
0601 static node_registration_func_t __hugetlb_unregister_node;
0602
0603 static inline bool hugetlb_register_node(struct node *node)
0604 {
0605 if (__hugetlb_register_node &&
0606 node_state(node->dev.id, N_MEMORY)) {
0607 __hugetlb_register_node(node);
0608 return true;
0609 }
0610 return false;
0611 }
0612
0613 static inline void hugetlb_unregister_node(struct node *node)
0614 {
0615 if (__hugetlb_unregister_node)
0616 __hugetlb_unregister_node(node);
0617 }
0618
0619 void register_hugetlbfs_with_node(node_registration_func_t doregister,
0620 node_registration_func_t unregister)
0621 {
0622 __hugetlb_register_node = doregister;
0623 __hugetlb_unregister_node = unregister;
0624 }
0625 #else
0626 static inline void hugetlb_register_node(struct node *node) {}
0627
0628 static inline void hugetlb_unregister_node(struct node *node) {}
0629 #endif
0630
0631 static void node_device_release(struct device *dev)
0632 {
0633 struct node *node = to_node(dev);
0634
0635 #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645 flush_work(&node->node_work);
0646 #endif
0647 kfree(node);
0648 }
0649
0650
0651
0652
0653
0654
0655
0656 static int register_node(struct node *node, int num)
0657 {
0658 int error;
0659
0660 node->dev.id = num;
0661 node->dev.bus = &node_subsys;
0662 node->dev.release = node_device_release;
0663 node->dev.groups = node_dev_groups;
0664 error = device_register(&node->dev);
0665
0666 if (error)
0667 put_device(&node->dev);
0668 else {
0669 hugetlb_register_node(node);
0670
0671 compaction_register_node(node);
0672 }
0673 return error;
0674 }
0675
0676
0677
0678
0679
0680
0681
0682
0683 void unregister_node(struct node *node)
0684 {
0685 compaction_unregister_node(node);
0686 hugetlb_unregister_node(node);
0687 node_remove_accesses(node);
0688 node_remove_caches(node);
0689 device_unregister(&node->dev);
0690 }
0691
0692 struct node *node_devices[MAX_NUMNODES];
0693
0694
0695
0696
0697 int register_cpu_under_node(unsigned int cpu, unsigned int nid)
0698 {
0699 int ret;
0700 struct device *obj;
0701
0702 if (!node_online(nid))
0703 return 0;
0704
0705 obj = get_cpu_device(cpu);
0706 if (!obj)
0707 return 0;
0708
0709 ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
0710 &obj->kobj,
0711 kobject_name(&obj->kobj));
0712 if (ret)
0713 return ret;
0714
0715 return sysfs_create_link(&obj->kobj,
0716 &node_devices[nid]->dev.kobj,
0717 kobject_name(&node_devices[nid]->dev.kobj));
0718 }
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733 int register_memory_node_under_compute_node(unsigned int mem_nid,
0734 unsigned int cpu_nid,
0735 unsigned int access)
0736 {
0737 struct node *init_node, *targ_node;
0738 struct node_access_nodes *initiator, *target;
0739 int ret;
0740
0741 if (!node_online(cpu_nid) || !node_online(mem_nid))
0742 return -ENODEV;
0743
0744 init_node = node_devices[cpu_nid];
0745 targ_node = node_devices[mem_nid];
0746 initiator = node_init_node_access(init_node, access);
0747 target = node_init_node_access(targ_node, access);
0748 if (!initiator || !target)
0749 return -ENOMEM;
0750
0751 ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
0752 &targ_node->dev.kobj,
0753 dev_name(&targ_node->dev));
0754 if (ret)
0755 return ret;
0756
0757 ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
0758 &init_node->dev.kobj,
0759 dev_name(&init_node->dev));
0760 if (ret)
0761 goto err;
0762
0763 return 0;
0764 err:
0765 sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
0766 dev_name(&targ_node->dev));
0767 return ret;
0768 }
0769
0770 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
0771 {
0772 struct device *obj;
0773
0774 if (!node_online(nid))
0775 return 0;
0776
0777 obj = get_cpu_device(cpu);
0778 if (!obj)
0779 return 0;
0780
0781 sysfs_remove_link(&node_devices[nid]->dev.kobj,
0782 kobject_name(&obj->kobj));
0783 sysfs_remove_link(&obj->kobj,
0784 kobject_name(&node_devices[nid]->dev.kobj));
0785
0786 return 0;
0787 }
0788
0789 #ifdef CONFIG_MEMORY_HOTPLUG
0790 static int __ref get_nid_for_pfn(unsigned long pfn)
0791 {
0792 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
0793 if (system_state < SYSTEM_RUNNING)
0794 return early_pfn_to_nid(pfn);
0795 #endif
0796 return pfn_to_nid(pfn);
0797 }
0798
0799 static void do_register_memory_block_under_node(int nid,
0800 struct memory_block *mem_blk,
0801 enum meminit_context context)
0802 {
0803 int ret;
0804
0805 memory_block_add_nid(mem_blk, nid, context);
0806
0807 ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
0808 &mem_blk->dev.kobj,
0809 kobject_name(&mem_blk->dev.kobj));
0810 if (ret && ret != -EEXIST)
0811 dev_err_ratelimited(&node_devices[nid]->dev,
0812 "can't create link to %s in sysfs (%d)\n",
0813 kobject_name(&mem_blk->dev.kobj), ret);
0814
0815 ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
0816 &node_devices[nid]->dev.kobj,
0817 kobject_name(&node_devices[nid]->dev.kobj));
0818 if (ret && ret != -EEXIST)
0819 dev_err_ratelimited(&mem_blk->dev,
0820 "can't create link to %s in sysfs (%d)\n",
0821 kobject_name(&node_devices[nid]->dev.kobj),
0822 ret);
0823 }
0824
0825
0826 static int register_mem_block_under_node_early(struct memory_block *mem_blk,
0827 void *arg)
0828 {
0829 unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
0830 unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
0831 unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
0832 int nid = *(int *)arg;
0833 unsigned long pfn;
0834
0835 for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
0836 int page_nid;
0837
0838
0839
0840
0841
0842 if (!pfn_in_present_section(pfn)) {
0843 pfn = round_down(pfn + PAGES_PER_SECTION,
0844 PAGES_PER_SECTION) - 1;
0845 continue;
0846 }
0847
0848
0849
0850
0851
0852 page_nid = get_nid_for_pfn(pfn);
0853 if (page_nid < 0)
0854 continue;
0855 if (page_nid != nid)
0856 continue;
0857
0858 do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY);
0859 return 0;
0860 }
0861
0862 return 0;
0863 }
0864
0865
0866
0867
0868
0869 static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
0870 void *arg)
0871 {
0872 int nid = *(int *)arg;
0873
0874 do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
0875 return 0;
0876 }
0877
0878
0879
0880
0881
0882 void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
0883 {
0884 if (mem_blk->nid == NUMA_NO_NODE)
0885 return;
0886
0887 sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
0888 kobject_name(&mem_blk->dev.kobj));
0889 sysfs_remove_link(&mem_blk->dev.kobj,
0890 kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
0891 }
0892
0893 void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
0894 unsigned long end_pfn,
0895 enum meminit_context context)
0896 {
0897 walk_memory_blocks_func_t func;
0898
0899 if (context == MEMINIT_HOTPLUG)
0900 func = register_mem_block_under_node_hotplug;
0901 else
0902 func = register_mem_block_under_node_early;
0903
0904 walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
0905 (void *)&nid, func);
0906 return;
0907 }
0908
0909 #ifdef CONFIG_HUGETLBFS
0910
0911
0912
0913
0914 static void node_hugetlb_work(struct work_struct *work)
0915 {
0916 struct node *node = container_of(work, struct node, node_work);
0917
0918
0919
0920
0921
0922
0923
0924
0925
0926 if (!hugetlb_register_node(node))
0927 hugetlb_unregister_node(node);
0928 }
0929
0930 static void init_node_hugetlb_work(int nid)
0931 {
0932 INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
0933 }
0934
0935 static int node_memory_callback(struct notifier_block *self,
0936 unsigned long action, void *arg)
0937 {
0938 struct memory_notify *mnb = arg;
0939 int nid = mnb->status_change_nid;
0940
0941 switch (action) {
0942 case MEM_ONLINE:
0943 case MEM_OFFLINE:
0944
0945
0946
0947
0948 if (nid != NUMA_NO_NODE)
0949 schedule_work(&node_devices[nid]->node_work);
0950 break;
0951
0952 case MEM_GOING_ONLINE:
0953 case MEM_GOING_OFFLINE:
0954 case MEM_CANCEL_ONLINE:
0955 case MEM_CANCEL_OFFLINE:
0956 default:
0957 break;
0958 }
0959
0960 return NOTIFY_OK;
0961 }
0962 #endif
0963 #endif
0964
0965 #if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS)
0966 static inline int node_memory_callback(struct notifier_block *self,
0967 unsigned long action, void *arg)
0968 {
0969 return NOTIFY_OK;
0970 }
0971
0972 static void init_node_hugetlb_work(int nid) { }
0973
0974 #endif
0975
0976 int __register_one_node(int nid)
0977 {
0978 int error;
0979 int cpu;
0980
0981 node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
0982 if (!node_devices[nid])
0983 return -ENOMEM;
0984
0985 error = register_node(node_devices[nid], nid);
0986
0987
0988 for_each_present_cpu(cpu) {
0989 if (cpu_to_node(cpu) == nid)
0990 register_cpu_under_node(cpu, nid);
0991 }
0992
0993 INIT_LIST_HEAD(&node_devices[nid]->access_list);
0994
0995 init_node_hugetlb_work(nid);
0996 node_init_caches(nid);
0997
0998 return error;
0999 }
1000
1001 void unregister_one_node(int nid)
1002 {
1003 if (!node_devices[nid])
1004 return;
1005
1006 unregister_node(node_devices[nid]);
1007 node_devices[nid] = NULL;
1008 }
1009
1010
1011
1012
1013
1014 struct node_attr {
1015 struct device_attribute attr;
1016 enum node_states state;
1017 };
1018
1019 static ssize_t show_node_state(struct device *dev,
1020 struct device_attribute *attr, char *buf)
1021 {
1022 struct node_attr *na = container_of(attr, struct node_attr, attr);
1023
1024 return sysfs_emit(buf, "%*pbl\n",
1025 nodemask_pr_args(&node_states[na->state]));
1026 }
1027
1028 #define _NODE_ATTR(name, state) \
1029 { __ATTR(name, 0444, show_node_state, NULL), state }
1030
1031 static struct node_attr node_state_attr[] = {
1032 [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
1033 [N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
1034 [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
1035 #ifdef CONFIG_HIGHMEM
1036 [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
1037 #endif
1038 [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
1039 [N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
1040 [N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
1041 N_GENERIC_INITIATOR),
1042 };
1043
1044 static struct attribute *node_state_attrs[] = {
1045 &node_state_attr[N_POSSIBLE].attr.attr,
1046 &node_state_attr[N_ONLINE].attr.attr,
1047 &node_state_attr[N_NORMAL_MEMORY].attr.attr,
1048 #ifdef CONFIG_HIGHMEM
1049 &node_state_attr[N_HIGH_MEMORY].attr.attr,
1050 #endif
1051 &node_state_attr[N_MEMORY].attr.attr,
1052 &node_state_attr[N_CPU].attr.attr,
1053 &node_state_attr[N_GENERIC_INITIATOR].attr.attr,
1054 NULL
1055 };
1056
1057 static const struct attribute_group memory_root_attr_group = {
1058 .attrs = node_state_attrs,
1059 };
1060
1061 static const struct attribute_group *cpu_root_attr_groups[] = {
1062 &memory_root_attr_group,
1063 NULL,
1064 };
1065
1066 #define NODE_CALLBACK_PRI 2
1067 void __init node_dev_init(void)
1068 {
1069 static struct notifier_block node_memory_callback_nb = {
1070 .notifier_call = node_memory_callback,
1071 .priority = NODE_CALLBACK_PRI,
1072 };
1073 int ret, i;
1074
1075 BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
1076 BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
1077
1078 ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
1079 if (ret)
1080 panic("%s() failed to register subsystem: %d\n", __func__, ret);
1081
1082 register_hotmemory_notifier(&node_memory_callback_nb);
1083
1084
1085
1086
1087
1088 for_each_online_node(i) {
1089 ret = register_one_node(i);
1090 if (ret)
1091 panic("%s() failed to add node: %d\n", __func__, ret);
1092 }
1093 }