0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/ctype.h>
0010 #include <linux/delay.h>
0011 #include <linux/errno.h>
0012 #include <linux/fs.h>
0013 #include <linux/gfp.h>
0014 #include <linux/kthread.h>
0015 #include <linux/module.h>
0016 #include <linux/oom.h>
0017 #include <linux/reboot.h>
0018 #include <linux/sched.h>
0019 #include <linux/stringify.h>
0020 #include <linux/swap.h>
0021 #include <linux/device.h>
0022 #include <linux/balloon_compaction.h>
0023 #include <asm/firmware.h>
0024 #include <asm/hvcall.h>
0025 #include <asm/mmu.h>
0026 #include <linux/uaccess.h>
0027 #include <linux/memory.h>
0028 #include <asm/plpar_wrappers.h>
0029
0030 #include "pseries.h"
0031
0032 #define CMM_DRIVER_VERSION "1.0.0"
0033 #define CMM_DEFAULT_DELAY 1
0034 #define CMM_HOTPLUG_DELAY 5
0035 #define CMM_DEBUG 0
0036 #define CMM_DISABLE 0
0037 #define CMM_OOM_KB 1024
0038 #define CMM_MIN_MEM_MB 256
0039 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
0040 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
0041
0042 #define CMM_MEM_HOTPLUG_PRI 1
0043
0044 static unsigned int delay = CMM_DEFAULT_DELAY;
0045 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
0046 static unsigned int oom_kb = CMM_OOM_KB;
0047 static unsigned int cmm_debug = CMM_DEBUG;
0048 static unsigned int cmm_disabled = CMM_DISABLE;
0049 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
0050 static bool __read_mostly simulate;
0051 static unsigned long simulate_loan_target_kb;
0052 static struct device cmm_dev;
0053
0054 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
0055 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
0056 MODULE_LICENSE("GPL");
0057 MODULE_VERSION(CMM_DRIVER_VERSION);
0058
0059 module_param_named(delay, delay, uint, 0644);
0060 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
0061 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
0062 module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
0063 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
0064 "before loaning resumes. "
0065 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
0066 module_param_named(oom_kb, oom_kb, uint, 0644);
0067 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
0068 "[Default=" __stringify(CMM_OOM_KB) "]");
0069 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
0070 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
0071 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
0072 module_param_named(debug, cmm_debug, uint, 0644);
0073 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
0074 "[Default=" __stringify(CMM_DEBUG) "]");
0075 module_param_named(simulate, simulate, bool, 0444);
0076 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
0077
0078 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
0079
0080 static atomic_long_t loaned_pages;
0081 static unsigned long loaned_pages_target;
0082 static unsigned long oom_freed_pages;
0083
0084 static DEFINE_MUTEX(hotplug_mutex);
0085 static int hotplug_occurred;
0086
0087 static struct task_struct *cmm_thread_ptr;
0088 static struct balloon_dev_info b_dev_info;
0089
0090 static long plpar_page_set_loaned(struct page *page)
0091 {
0092 const unsigned long vpa = page_to_phys(page);
0093 unsigned long cmo_page_sz = cmo_get_page_size();
0094 long rc = 0;
0095 int i;
0096
0097 if (unlikely(simulate))
0098 return 0;
0099
0100 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
0101 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
0102
0103 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
0104 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
0105 vpa + i - cmo_page_sz, 0);
0106
0107 return rc;
0108 }
0109
0110 static long plpar_page_set_active(struct page *page)
0111 {
0112 const unsigned long vpa = page_to_phys(page);
0113 unsigned long cmo_page_sz = cmo_get_page_size();
0114 long rc = 0;
0115 int i;
0116
0117 if (unlikely(simulate))
0118 return 0;
0119
0120 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
0121 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
0122
0123 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
0124 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
0125 vpa + i - cmo_page_sz, 0);
0126
0127 return rc;
0128 }
0129
0130
0131
0132
0133
0134
0135
0136
0137 static long cmm_alloc_pages(long nr)
0138 {
0139 struct page *page;
0140 long rc;
0141
0142 cmm_dbg("Begin request for %ld pages\n", nr);
0143
0144 while (nr) {
0145
0146 if (mutex_trylock(&hotplug_mutex)) {
0147 if (hotplug_occurred) {
0148 mutex_unlock(&hotplug_mutex);
0149 break;
0150 }
0151 mutex_unlock(&hotplug_mutex);
0152 } else {
0153 break;
0154 }
0155
0156 page = balloon_page_alloc();
0157 if (!page)
0158 break;
0159 rc = plpar_page_set_loaned(page);
0160 if (rc) {
0161 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
0162 __free_page(page);
0163 break;
0164 }
0165
0166 balloon_page_enqueue(&b_dev_info, page);
0167 atomic_long_inc(&loaned_pages);
0168 adjust_managed_page_count(page, -1);
0169 nr--;
0170 }
0171
0172 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
0173 return nr;
0174 }
0175
0176
0177
0178
0179
0180
0181
0182
0183 static long cmm_free_pages(long nr)
0184 {
0185 struct page *page;
0186
0187 cmm_dbg("Begin free of %ld pages.\n", nr);
0188 while (nr) {
0189 page = balloon_page_dequeue(&b_dev_info);
0190 if (!page)
0191 break;
0192 plpar_page_set_active(page);
0193 adjust_managed_page_count(page, 1);
0194 __free_page(page);
0195 atomic_long_dec(&loaned_pages);
0196 nr--;
0197 }
0198 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
0199 return nr;
0200 }
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211 static int cmm_oom_notify(struct notifier_block *self,
0212 unsigned long dummy, void *parm)
0213 {
0214 unsigned long *freed = parm;
0215 long nr = KB2PAGES(oom_kb);
0216
0217 cmm_dbg("OOM processing started\n");
0218 nr = cmm_free_pages(nr);
0219 loaned_pages_target = atomic_long_read(&loaned_pages);
0220 *freed += KB2PAGES(oom_kb) - nr;
0221 oom_freed_pages += KB2PAGES(oom_kb) - nr;
0222 cmm_dbg("OOM processing complete\n");
0223 return NOTIFY_OK;
0224 }
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234 static void cmm_get_mpp(void)
0235 {
0236 const long __loaned_pages = atomic_long_read(&loaned_pages);
0237 const long total_pages = totalram_pages() + __loaned_pages;
0238 int rc;
0239 struct hvcall_mpp_data mpp_data;
0240 signed long active_pages_target, page_loan_request, target;
0241 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
0242
0243 if (likely(!simulate)) {
0244 rc = h_get_mpp(&mpp_data);
0245 if (rc != H_SUCCESS)
0246 return;
0247 page_loan_request = div_s64((s64)mpp_data.loan_request,
0248 PAGE_SIZE);
0249 target = page_loan_request + __loaned_pages;
0250 } else {
0251 target = KB2PAGES(simulate_loan_target_kb);
0252 page_loan_request = target - __loaned_pages;
0253 }
0254
0255 if (target < 0 || total_pages < min_mem_pages)
0256 target = 0;
0257
0258 if (target > oom_freed_pages)
0259 target -= oom_freed_pages;
0260 else
0261 target = 0;
0262
0263 active_pages_target = total_pages - target;
0264
0265 if (min_mem_pages > active_pages_target)
0266 target = total_pages - min_mem_pages;
0267
0268 if (target < 0)
0269 target = 0;
0270
0271 loaned_pages_target = target;
0272
0273 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
0274 page_loan_request, __loaned_pages, loaned_pages_target,
0275 oom_freed_pages, totalram_pages());
0276 }
0277
0278 static struct notifier_block cmm_oom_nb = {
0279 .notifier_call = cmm_oom_notify
0280 };
0281
0282
0283
0284
0285
0286
0287
0288
0289 static int cmm_thread(void *dummy)
0290 {
0291 unsigned long timeleft;
0292 long __loaned_pages;
0293
0294 while (1) {
0295 timeleft = msleep_interruptible(delay * 1000);
0296
0297 if (kthread_should_stop() || timeleft)
0298 break;
0299
0300 if (mutex_trylock(&hotplug_mutex)) {
0301 if (hotplug_occurred) {
0302 hotplug_occurred = 0;
0303 mutex_unlock(&hotplug_mutex);
0304 cmm_dbg("Hotplug operation has occurred, "
0305 "loaning activity suspended "
0306 "for %d seconds.\n",
0307 hotplug_delay);
0308 timeleft = msleep_interruptible(hotplug_delay *
0309 1000);
0310 if (kthread_should_stop() || timeleft)
0311 break;
0312 continue;
0313 }
0314 mutex_unlock(&hotplug_mutex);
0315 } else {
0316 cmm_dbg("Hotplug operation in progress, activity "
0317 "suspended\n");
0318 continue;
0319 }
0320
0321 cmm_get_mpp();
0322
0323 __loaned_pages = atomic_long_read(&loaned_pages);
0324 if (loaned_pages_target > __loaned_pages) {
0325 if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
0326 loaned_pages_target = __loaned_pages;
0327 } else if (loaned_pages_target < __loaned_pages)
0328 cmm_free_pages(__loaned_pages - loaned_pages_target);
0329 }
0330 return 0;
0331 }
0332
0333 #define CMM_SHOW(name, format, args...) \
0334 static ssize_t show_##name(struct device *dev, \
0335 struct device_attribute *attr, \
0336 char *buf) \
0337 { \
0338 return sprintf(buf, format, ##args); \
0339 } \
0340 static DEVICE_ATTR(name, 0444, show_##name, NULL)
0341
0342 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
0343 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
0344
0345 static ssize_t show_oom_pages(struct device *dev,
0346 struct device_attribute *attr, char *buf)
0347 {
0348 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
0349 }
0350
0351 static ssize_t store_oom_pages(struct device *dev,
0352 struct device_attribute *attr,
0353 const char *buf, size_t count)
0354 {
0355 unsigned long val = simple_strtoul (buf, NULL, 10);
0356
0357 if (!capable(CAP_SYS_ADMIN))
0358 return -EPERM;
0359 if (val != 0)
0360 return -EBADMSG;
0361
0362 oom_freed_pages = 0;
0363 return count;
0364 }
0365
0366 static DEVICE_ATTR(oom_freed_kb, 0644,
0367 show_oom_pages, store_oom_pages);
0368
0369 static struct device_attribute *cmm_attrs[] = {
0370 &dev_attr_loaned_kb,
0371 &dev_attr_loaned_target_kb,
0372 &dev_attr_oom_freed_kb,
0373 };
0374
0375 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
0376 simulate_loan_target_kb);
0377
0378 static struct bus_type cmm_subsys = {
0379 .name = "cmm",
0380 .dev_name = "cmm",
0381 };
0382
0383 static void cmm_release_device(struct device *dev)
0384 {
0385 }
0386
0387
0388
0389
0390
0391
0392
0393 static int cmm_sysfs_register(struct device *dev)
0394 {
0395 int i, rc;
0396
0397 if ((rc = subsys_system_register(&cmm_subsys, NULL)))
0398 return rc;
0399
0400 dev->id = 0;
0401 dev->bus = &cmm_subsys;
0402 dev->release = cmm_release_device;
0403
0404 if ((rc = device_register(dev)))
0405 goto subsys_unregister;
0406
0407 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
0408 if ((rc = device_create_file(dev, cmm_attrs[i])))
0409 goto fail;
0410 }
0411
0412 if (!simulate)
0413 return 0;
0414 rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
0415 if (rc)
0416 goto fail;
0417 return 0;
0418
0419 fail:
0420 while (--i >= 0)
0421 device_remove_file(dev, cmm_attrs[i]);
0422 device_unregister(dev);
0423 subsys_unregister:
0424 bus_unregister(&cmm_subsys);
0425 return rc;
0426 }
0427
0428
0429
0430
0431
0432 static void cmm_unregister_sysfs(struct device *dev)
0433 {
0434 int i;
0435
0436 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
0437 device_remove_file(dev, cmm_attrs[i]);
0438 device_unregister(dev);
0439 bus_unregister(&cmm_subsys);
0440 }
0441
0442
0443
0444
0445
0446 static int cmm_reboot_notifier(struct notifier_block *nb,
0447 unsigned long action, void *unused)
0448 {
0449 if (action == SYS_RESTART) {
0450 if (cmm_thread_ptr)
0451 kthread_stop(cmm_thread_ptr);
0452 cmm_thread_ptr = NULL;
0453 cmm_free_pages(atomic_long_read(&loaned_pages));
0454 }
0455 return NOTIFY_DONE;
0456 }
0457
0458 static struct notifier_block cmm_reboot_nb = {
0459 .notifier_call = cmm_reboot_notifier,
0460 };
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472 static int cmm_memory_cb(struct notifier_block *self,
0473 unsigned long action, void *arg)
0474 {
0475 switch (action) {
0476 case MEM_GOING_OFFLINE:
0477 mutex_lock(&hotplug_mutex);
0478 hotplug_occurred = 1;
0479 break;
0480 case MEM_OFFLINE:
0481 case MEM_CANCEL_OFFLINE:
0482 mutex_unlock(&hotplug_mutex);
0483 cmm_dbg("Memory offline operation complete.\n");
0484 break;
0485 case MEM_GOING_ONLINE:
0486 case MEM_ONLINE:
0487 case MEM_CANCEL_ONLINE:
0488 break;
0489 }
0490
0491 return NOTIFY_OK;
0492 }
0493
0494 static struct notifier_block cmm_mem_nb = {
0495 .notifier_call = cmm_memory_cb,
0496 .priority = CMM_MEM_HOTPLUG_PRI
0497 };
0498
0499 #ifdef CONFIG_BALLOON_COMPACTION
0500 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
0501 struct page *newpage, struct page *page,
0502 enum migrate_mode mode)
0503 {
0504 unsigned long flags;
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514 if (plpar_page_set_loaned(newpage)) {
0515
0516 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
0517 return -EBUSY;
0518 }
0519
0520
0521 get_page(newpage);
0522
0523
0524
0525
0526
0527
0528 if (page_zone(page) != page_zone(newpage)) {
0529 adjust_managed_page_count(page, 1);
0530 adjust_managed_page_count(newpage, -1);
0531 }
0532
0533 spin_lock_irqsave(&b_dev_info->pages_lock, flags);
0534 balloon_page_insert(b_dev_info, newpage);
0535 balloon_page_delete(page);
0536 b_dev_info->isolated_pages--;
0537 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
0538
0539
0540
0541
0542
0543 plpar_page_set_active(page);
0544
0545
0546 put_page(page);
0547
0548 return MIGRATEPAGE_SUCCESS;
0549 }
0550
0551 static void cmm_balloon_compaction_init(void)
0552 {
0553 balloon_devinfo_init(&b_dev_info);
0554 b_dev_info.migratepage = cmm_migratepage;
0555 }
0556 #else
0557 static void cmm_balloon_compaction_init(void)
0558 {
0559 }
0560 #endif
0561
0562
0563
0564
0565
0566
0567
0568 static int cmm_init(void)
0569 {
0570 int rc;
0571
0572 if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
0573 return -EOPNOTSUPP;
0574
0575 cmm_balloon_compaction_init();
0576
0577 rc = register_oom_notifier(&cmm_oom_nb);
0578 if (rc < 0)
0579 goto out_balloon_compaction;
0580
0581 if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
0582 goto out_oom_notifier;
0583
0584 if ((rc = cmm_sysfs_register(&cmm_dev)))
0585 goto out_reboot_notifier;
0586
0587 rc = register_memory_notifier(&cmm_mem_nb);
0588 if (rc)
0589 goto out_unregister_notifier;
0590
0591 if (cmm_disabled)
0592 return 0;
0593
0594 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
0595 if (IS_ERR(cmm_thread_ptr)) {
0596 rc = PTR_ERR(cmm_thread_ptr);
0597 goto out_unregister_notifier;
0598 }
0599
0600 return 0;
0601 out_unregister_notifier:
0602 unregister_memory_notifier(&cmm_mem_nb);
0603 cmm_unregister_sysfs(&cmm_dev);
0604 out_reboot_notifier:
0605 unregister_reboot_notifier(&cmm_reboot_nb);
0606 out_oom_notifier:
0607 unregister_oom_notifier(&cmm_oom_nb);
0608 out_balloon_compaction:
0609 return rc;
0610 }
0611
0612
0613
0614
0615
0616
0617
0618 static void cmm_exit(void)
0619 {
0620 if (cmm_thread_ptr)
0621 kthread_stop(cmm_thread_ptr);
0622 unregister_oom_notifier(&cmm_oom_nb);
0623 unregister_reboot_notifier(&cmm_reboot_nb);
0624 unregister_memory_notifier(&cmm_mem_nb);
0625 cmm_free_pages(atomic_long_read(&loaned_pages));
0626 cmm_unregister_sysfs(&cmm_dev);
0627 }
0628
0629
0630
0631
0632
0633
0634
0635 static int cmm_set_disable(const char *val, const struct kernel_param *kp)
0636 {
0637 int disable = simple_strtoul(val, NULL, 10);
0638
0639 if (disable != 0 && disable != 1)
0640 return -EINVAL;
0641
0642 if (disable && !cmm_disabled) {
0643 if (cmm_thread_ptr)
0644 kthread_stop(cmm_thread_ptr);
0645 cmm_thread_ptr = NULL;
0646 cmm_free_pages(atomic_long_read(&loaned_pages));
0647 } else if (!disable && cmm_disabled) {
0648 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
0649 if (IS_ERR(cmm_thread_ptr))
0650 return PTR_ERR(cmm_thread_ptr);
0651 }
0652
0653 cmm_disabled = disable;
0654 return 0;
0655 }
0656
0657 module_param_call(disable, cmm_set_disable, param_get_uint,
0658 &cmm_disabled, 0644);
0659 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
0660 "[Default=" __stringify(CMM_DISABLE) "]");
0661
0662 module_init(cmm_init);
0663 module_exit(cmm_exit);