0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
0040
0041 #include <linux/cpu.h>
0042 #include <linux/kernel.h>
0043 #include <linux/sched.h>
0044 #include <linux/cred.h>
0045 #include <linux/errno.h>
0046 #include <linux/freezer.h>
0047 #include <linux/kthread.h>
0048 #include <linux/mm.h>
0049 #include <linux/memblock.h>
0050 #include <linux/pagemap.h>
0051 #include <linux/highmem.h>
0052 #include <linux/mutex.h>
0053 #include <linux/list.h>
0054 #include <linux/gfp.h>
0055 #include <linux/notifier.h>
0056 #include <linux/memory.h>
0057 #include <linux/memory_hotplug.h>
0058 #include <linux/percpu-defs.h>
0059 #include <linux/slab.h>
0060 #include <linux/sysctl.h>
0061 #include <linux/moduleparam.h>
0062 #include <linux/jiffies.h>
0063
0064 #include <asm/page.h>
0065 #include <asm/tlb.h>
0066
0067 #include <asm/xen/hypervisor.h>
0068 #include <asm/xen/hypercall.h>
0069
0070 #include <xen/xen.h>
0071 #include <xen/interface/xen.h>
0072 #include <xen/interface/memory.h>
0073 #include <xen/balloon.h>
0074 #include <xen/features.h>
0075 #include <xen/page.h>
0076 #include <xen/mem-reservation.h>
0077
0078 #undef MODULE_PARAM_PREFIX
0079 #define MODULE_PARAM_PREFIX "xen."
0080
0081 static uint __read_mostly balloon_boot_timeout = 180;
0082 module_param(balloon_boot_timeout, uint, 0444);
0083
0084 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
0085 static int xen_hotplug_unpopulated;
0086
0087 static struct ctl_table balloon_table[] = {
0088 {
0089 .procname = "hotplug_unpopulated",
0090 .data = &xen_hotplug_unpopulated,
0091 .maxlen = sizeof(int),
0092 .mode = 0644,
0093 .proc_handler = proc_dointvec_minmax,
0094 .extra1 = SYSCTL_ZERO,
0095 .extra2 = SYSCTL_ONE,
0096 },
0097 { }
0098 };
0099
0100 static struct ctl_table balloon_root[] = {
0101 {
0102 .procname = "balloon",
0103 .mode = 0555,
0104 .child = balloon_table,
0105 },
0106 { }
0107 };
0108
0109 static struct ctl_table xen_root[] = {
0110 {
0111 .procname = "xen",
0112 .mode = 0555,
0113 .child = balloon_root,
0114 },
0115 { }
0116 };
0117
0118 #else
0119 #define xen_hotplug_unpopulated 0
0120 #endif
0121
0122
0123
0124
0125
0126 #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137 static enum bp_state {
0138 BP_DONE,
0139 BP_WAIT,
0140 BP_EAGAIN,
0141 BP_ECANCELED
0142 } balloon_state = BP_DONE;
0143
0144
0145 static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
0146
0147 static DEFINE_MUTEX(balloon_mutex);
0148
0149 struct balloon_stats balloon_stats;
0150 EXPORT_SYMBOL_GPL(balloon_stats);
0151
0152
0153 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
0154
0155
0156
0157 static LIST_HEAD(ballooned_pages);
0158 static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
0159
0160
0161
0162 #define GFP_BALLOON \
0163 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
0164
0165
0166 static void balloon_append(struct page *page)
0167 {
0168 __SetPageOffline(page);
0169
0170
0171 if (PageHighMem(page)) {
0172 list_add_tail(&page->lru, &ballooned_pages);
0173 balloon_stats.balloon_high++;
0174 } else {
0175 list_add(&page->lru, &ballooned_pages);
0176 balloon_stats.balloon_low++;
0177 }
0178 wake_up(&balloon_wq);
0179 }
0180
0181
0182 static struct page *balloon_retrieve(bool require_lowmem)
0183 {
0184 struct page *page;
0185
0186 if (list_empty(&ballooned_pages))
0187 return NULL;
0188
0189 page = list_entry(ballooned_pages.next, struct page, lru);
0190 if (require_lowmem && PageHighMem(page))
0191 return NULL;
0192 list_del(&page->lru);
0193
0194 if (PageHighMem(page))
0195 balloon_stats.balloon_high--;
0196 else
0197 balloon_stats.balloon_low--;
0198
0199 __ClearPageOffline(page);
0200 return page;
0201 }
0202
0203 static struct page *balloon_next_page(struct page *page)
0204 {
0205 struct list_head *next = page->lru.next;
0206 if (next == &ballooned_pages)
0207 return NULL;
0208 return list_entry(next, struct page, lru);
0209 }
0210
0211 static void update_schedule(void)
0212 {
0213 if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED)
0214 return;
0215
0216 if (balloon_state == BP_DONE) {
0217 balloon_stats.schedule_delay = 1;
0218 balloon_stats.retry_count = 1;
0219 return;
0220 }
0221
0222 ++balloon_stats.retry_count;
0223
0224 if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
0225 balloon_stats.retry_count > balloon_stats.max_retry_count) {
0226 balloon_stats.schedule_delay = 1;
0227 balloon_stats.retry_count = 1;
0228 balloon_state = BP_ECANCELED;
0229 return;
0230 }
0231
0232 balloon_stats.schedule_delay <<= 1;
0233
0234 if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
0235 balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
0236
0237 balloon_state = BP_EAGAIN;
0238 }
0239
0240 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
0241 static void release_memory_resource(struct resource *resource)
0242 {
0243 if (!resource)
0244 return;
0245
0246
0247
0248
0249
0250 release_resource(resource);
0251 kfree(resource);
0252 }
0253
0254 static struct resource *additional_memory_resource(phys_addr_t size)
0255 {
0256 struct resource *res;
0257 int ret;
0258
0259 res = kzalloc(sizeof(*res), GFP_KERNEL);
0260 if (!res)
0261 return NULL;
0262
0263 res->name = "System RAM";
0264 res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
0265
0266 ret = allocate_resource(&iomem_resource, res,
0267 size, 0, -1,
0268 PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
0269 if (ret < 0) {
0270 pr_err("Cannot allocate new System RAM resource\n");
0271 kfree(res);
0272 return NULL;
0273 }
0274
0275 return res;
0276 }
0277
0278 static enum bp_state reserve_additional_memory(void)
0279 {
0280 long credit;
0281 struct resource *resource;
0282 int nid, rc;
0283 unsigned long balloon_hotplug;
0284
0285 credit = balloon_stats.target_pages + balloon_stats.target_unpopulated
0286 - balloon_stats.total_pages;
0287
0288
0289
0290
0291
0292 if (credit <= 0)
0293 return BP_WAIT;
0294
0295 balloon_hotplug = round_up(credit, PAGES_PER_SECTION);
0296
0297 resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE);
0298 if (!resource)
0299 goto err;
0300
0301 nid = memory_add_physaddr_to_nid(resource->start);
0302
0303 #ifdef CONFIG_XEN_HAVE_PVMMU
0304
0305
0306
0307
0308 BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
0320 unsigned long pfn, i;
0321
0322 pfn = PFN_DOWN(resource->start);
0323 for (i = 0; i < balloon_hotplug; i++) {
0324 if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
0325 pr_warn("set_phys_to_machine() failed, no memory added\n");
0326 goto err;
0327 }
0328 }
0329 }
0330 #endif
0331
0332
0333
0334
0335
0336
0337
0338 mutex_unlock(&balloon_mutex);
0339
0340 lock_device_hotplug();
0341 rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE);
0342 unlock_device_hotplug();
0343 mutex_lock(&balloon_mutex);
0344
0345 if (rc) {
0346 pr_warn("Cannot add additional memory (%i)\n", rc);
0347 goto err;
0348 }
0349
0350 balloon_stats.total_pages += balloon_hotplug;
0351
0352 return BP_WAIT;
0353 err:
0354 release_memory_resource(resource);
0355 return BP_ECANCELED;
0356 }
0357
0358 static void xen_online_page(struct page *page, unsigned int order)
0359 {
0360 unsigned long i, size = (1 << order);
0361 unsigned long start_pfn = page_to_pfn(page);
0362 struct page *p;
0363
0364 pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn);
0365 mutex_lock(&balloon_mutex);
0366 for (i = 0; i < size; i++) {
0367 p = pfn_to_page(start_pfn + i);
0368 balloon_append(p);
0369 }
0370 mutex_unlock(&balloon_mutex);
0371 }
0372
0373 static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
0374 {
0375 if (val == MEM_ONLINE)
0376 wake_up(&balloon_thread_wq);
0377
0378 return NOTIFY_OK;
0379 }
0380
0381 static struct notifier_block xen_memory_nb = {
0382 .notifier_call = xen_memory_notifier,
0383 .priority = 0
0384 };
0385 #else
0386 static enum bp_state reserve_additional_memory(void)
0387 {
0388 balloon_stats.target_pages = balloon_stats.current_pages +
0389 balloon_stats.target_unpopulated;
0390 return BP_ECANCELED;
0391 }
0392 #endif
0393
0394 static long current_credit(void)
0395 {
0396 return balloon_stats.target_pages - balloon_stats.current_pages;
0397 }
0398
0399 static bool balloon_is_inflated(void)
0400 {
0401 return balloon_stats.balloon_low || balloon_stats.balloon_high;
0402 }
0403
0404 static enum bp_state increase_reservation(unsigned long nr_pages)
0405 {
0406 int rc;
0407 unsigned long i;
0408 struct page *page;
0409
0410 if (nr_pages > ARRAY_SIZE(frame_list))
0411 nr_pages = ARRAY_SIZE(frame_list);
0412
0413 page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
0414 for (i = 0; i < nr_pages; i++) {
0415 if (!page) {
0416 nr_pages = i;
0417 break;
0418 }
0419
0420 frame_list[i] = page_to_xen_pfn(page);
0421 page = balloon_next_page(page);
0422 }
0423
0424 rc = xenmem_reservation_increase(nr_pages, frame_list);
0425 if (rc <= 0)
0426 return BP_EAGAIN;
0427
0428 for (i = 0; i < rc; i++) {
0429 page = balloon_retrieve(false);
0430 BUG_ON(page == NULL);
0431
0432 xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
0433
0434
0435 free_reserved_page(page);
0436 }
0437
0438 balloon_stats.current_pages += rc;
0439
0440 return BP_DONE;
0441 }
0442
0443 static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
0444 {
0445 enum bp_state state = BP_DONE;
0446 unsigned long i;
0447 struct page *page, *tmp;
0448 int ret;
0449 LIST_HEAD(pages);
0450
0451 if (nr_pages > ARRAY_SIZE(frame_list))
0452 nr_pages = ARRAY_SIZE(frame_list);
0453
0454 for (i = 0; i < nr_pages; i++) {
0455 page = alloc_page(gfp);
0456 if (page == NULL) {
0457 nr_pages = i;
0458 state = BP_EAGAIN;
0459 break;
0460 }
0461 adjust_managed_page_count(page, -1);
0462 xenmem_reservation_scrub_page(page);
0463 list_add(&page->lru, &pages);
0464 }
0465
0466
0467
0468
0469
0470
0471
0472
0473 kmap_flush_unused();
0474
0475
0476
0477
0478
0479 i = 0;
0480 list_for_each_entry_safe(page, tmp, &pages, lru) {
0481 frame_list[i++] = xen_page_to_gfn(page);
0482
0483 xenmem_reservation_va_mapping_reset(1, &page);
0484
0485 list_del(&page->lru);
0486
0487 balloon_append(page);
0488 }
0489
0490 flush_tlb_all();
0491
0492 ret = xenmem_reservation_decrease(nr_pages, frame_list);
0493 BUG_ON(ret != nr_pages);
0494
0495 balloon_stats.current_pages -= nr_pages;
0496
0497 return state;
0498 }
0499
0500
0501
0502
0503
0504 static bool balloon_thread_cond(long credit)
0505 {
0506 if (balloon_state == BP_DONE)
0507 credit = 0;
0508
0509 return current_credit() != credit || kthread_should_stop();
0510 }
0511
0512
0513
0514
0515
0516
0517
0518 static int balloon_thread(void *unused)
0519 {
0520 long credit;
0521 unsigned long timeout;
0522
0523 set_freezable();
0524 for (;;) {
0525 switch (balloon_state) {
0526 case BP_DONE:
0527 case BP_ECANCELED:
0528 timeout = 3600 * HZ;
0529 break;
0530 case BP_EAGAIN:
0531 timeout = balloon_stats.schedule_delay * HZ;
0532 break;
0533 case BP_WAIT:
0534 timeout = HZ;
0535 break;
0536 }
0537
0538 credit = current_credit();
0539
0540 wait_event_freezable_timeout(balloon_thread_wq,
0541 balloon_thread_cond(credit), timeout);
0542
0543 if (kthread_should_stop())
0544 return 0;
0545
0546 mutex_lock(&balloon_mutex);
0547
0548 credit = current_credit();
0549
0550 if (credit > 0) {
0551 if (balloon_is_inflated())
0552 balloon_state = increase_reservation(credit);
0553 else
0554 balloon_state = reserve_additional_memory();
0555 }
0556
0557 if (credit < 0) {
0558 long n_pages;
0559
0560 n_pages = min(-credit, si_mem_available());
0561 balloon_state = decrease_reservation(n_pages,
0562 GFP_BALLOON);
0563 if (balloon_state == BP_DONE && n_pages != -credit &&
0564 n_pages < totalreserve_pages)
0565 balloon_state = BP_EAGAIN;
0566 }
0567
0568 update_schedule();
0569
0570 mutex_unlock(&balloon_mutex);
0571
0572 cond_resched();
0573 }
0574 }
0575
0576
0577 void balloon_set_new_target(unsigned long target)
0578 {
0579
0580 balloon_stats.target_pages = target;
0581 wake_up(&balloon_thread_wq);
0582 }
0583 EXPORT_SYMBOL_GPL(balloon_set_new_target);
0584
0585 static int add_ballooned_pages(unsigned int nr_pages)
0586 {
0587 enum bp_state st;
0588
0589 if (xen_hotplug_unpopulated) {
0590 st = reserve_additional_memory();
0591 if (st != BP_ECANCELED) {
0592 int rc;
0593
0594 mutex_unlock(&balloon_mutex);
0595 rc = wait_event_interruptible(balloon_wq,
0596 !list_empty(&ballooned_pages));
0597 mutex_lock(&balloon_mutex);
0598 return rc ? -ENOMEM : 0;
0599 }
0600 }
0601
0602 if (si_mem_available() < nr_pages)
0603 return -ENOMEM;
0604
0605 st = decrease_reservation(nr_pages, GFP_USER);
0606 if (st != BP_DONE)
0607 return -ENOMEM;
0608
0609 return 0;
0610 }
0611
0612
0613
0614
0615
0616
0617
0618 int xen_alloc_ballooned_pages(unsigned int nr_pages, struct page **pages)
0619 {
0620 unsigned int pgno = 0;
0621 struct page *page;
0622 int ret;
0623
0624 mutex_lock(&balloon_mutex);
0625
0626 balloon_stats.target_unpopulated += nr_pages;
0627
0628 while (pgno < nr_pages) {
0629 page = balloon_retrieve(true);
0630 if (page) {
0631 pages[pgno++] = page;
0632 #ifdef CONFIG_XEN_HAVE_PVMMU
0633
0634
0635
0636
0637 BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
0638
0639 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
0640 ret = xen_alloc_p2m_entry(page_to_pfn(page));
0641 if (ret < 0)
0642 goto out_undo;
0643 }
0644 #endif
0645 } else {
0646 ret = add_ballooned_pages(nr_pages - pgno);
0647 if (ret < 0)
0648 goto out_undo;
0649 }
0650 }
0651 mutex_unlock(&balloon_mutex);
0652 return 0;
0653 out_undo:
0654 mutex_unlock(&balloon_mutex);
0655 xen_free_ballooned_pages(pgno, pages);
0656
0657
0658
0659
0660
0661 balloon_stats.target_unpopulated -= nr_pages - pgno;
0662 return ret;
0663 }
0664 EXPORT_SYMBOL(xen_alloc_ballooned_pages);
0665
0666
0667
0668
0669
0670
0671 void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages)
0672 {
0673 unsigned int i;
0674
0675 mutex_lock(&balloon_mutex);
0676
0677 for (i = 0; i < nr_pages; i++) {
0678 if (pages[i])
0679 balloon_append(pages[i]);
0680 }
0681
0682 balloon_stats.target_unpopulated -= nr_pages;
0683
0684
0685 if (current_credit())
0686 wake_up(&balloon_thread_wq);
0687
0688 mutex_unlock(&balloon_mutex);
0689 }
0690 EXPORT_SYMBOL(xen_free_ballooned_pages);
0691
0692 static void __init balloon_add_regions(void)
0693 {
0694 #if defined(CONFIG_XEN_PV)
0695 unsigned long start_pfn, pages;
0696 unsigned long pfn, extra_pfn_end;
0697 unsigned int i;
0698
0699 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
0700 pages = xen_extra_mem[i].n_pfns;
0701 if (!pages)
0702 continue;
0703
0704 start_pfn = xen_extra_mem[i].start_pfn;
0705
0706
0707
0708
0709
0710
0711 extra_pfn_end = min(max_pfn, start_pfn + pages);
0712
0713 for (pfn = start_pfn; pfn < extra_pfn_end; pfn++)
0714 balloon_append(pfn_to_page(pfn));
0715
0716 balloon_stats.total_pages += extra_pfn_end - start_pfn;
0717 }
0718 #endif
0719 }
0720
0721 static int __init balloon_init(void)
0722 {
0723 struct task_struct *task;
0724
0725 if (!xen_domain())
0726 return -ENODEV;
0727
0728 pr_info("Initialising balloon driver\n");
0729
0730 #ifdef CONFIG_XEN_PV
0731 balloon_stats.current_pages = xen_pv_domain()
0732 ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
0733 : get_num_physpages();
0734 #else
0735 balloon_stats.current_pages = get_num_physpages();
0736 #endif
0737 balloon_stats.target_pages = balloon_stats.current_pages;
0738 balloon_stats.balloon_low = 0;
0739 balloon_stats.balloon_high = 0;
0740 balloon_stats.total_pages = balloon_stats.current_pages;
0741
0742 balloon_stats.schedule_delay = 1;
0743 balloon_stats.max_schedule_delay = 32;
0744 balloon_stats.retry_count = 1;
0745 balloon_stats.max_retry_count = 4;
0746
0747 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
0748 set_online_page_callback(&xen_online_page);
0749 register_memory_notifier(&xen_memory_nb);
0750 register_sysctl_table(xen_root);
0751 #endif
0752
0753 balloon_add_regions();
0754
0755 task = kthread_run(balloon_thread, NULL, "xen-balloon");
0756 if (IS_ERR(task)) {
0757 pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
0758 return PTR_ERR(task);
0759 }
0760
0761
0762 xen_balloon_init();
0763
0764 return 0;
0765 }
0766 subsys_initcall(balloon_init);
0767
0768 static int __init balloon_wait_finish(void)
0769 {
0770 long credit, last_credit = 0;
0771 unsigned long last_changed = 0;
0772
0773 if (!xen_domain())
0774 return -ENODEV;
0775
0776
0777 if (xen_pv_domain() || !current_credit())
0778 return 0;
0779
0780 pr_notice("Waiting for initial ballooning down having finished.\n");
0781
0782 while ((credit = current_credit()) < 0) {
0783 if (credit != last_credit) {
0784 last_changed = jiffies;
0785 last_credit = credit;
0786 }
0787 if (balloon_state == BP_ECANCELED) {
0788 pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n",
0789 -credit);
0790 if (time_is_before_eq_jiffies(last_changed + HZ * balloon_boot_timeout))
0791 panic("Initial ballooning failed!\n");
0792 }
0793
0794 schedule_timeout_interruptible(HZ / 10);
0795 }
0796
0797 pr_notice("Initial ballooning down finished.\n");
0798
0799 return 0;
0800 }
0801 late_initcall_sync(balloon_wait_finish);