0001
0002
0003
0004
0005
0006
0007
0008
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010
0011 #include <linux/kernel.h>
0012 #include <linux/jiffies.h>
0013 #include <linux/mman.h>
0014 #include <linux/debugfs.h>
0015 #include <linux/delay.h>
0016 #include <linux/init.h>
0017 #include <linux/module.h>
0018 #include <linux/slab.h>
0019 #include <linux/kthread.h>
0020 #include <linux/completion.h>
0021 #include <linux/count_zeros.h>
0022 #include <linux/memory_hotplug.h>
0023 #include <linux/memory.h>
0024 #include <linux/notifier.h>
0025 #include <linux/percpu_counter.h>
0026 #include <linux/page_reporting.h>
0027
0028 #include <linux/hyperv.h>
0029 #include <asm/hyperv-tlfs.h>
0030
0031 #include <asm/mshyperv.h>
0032
0033 #define CREATE_TRACE_POINTS
0034 #include "hv_trace_balloon.h"
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 #define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor)))
0058 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
0059 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
0060
0061 enum {
0062 DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
0063 DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
0064 DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
0065
0066 DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
0067 DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
0068 DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
0069
0070 DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
0071 };
0072
0073
0074
0075
0076
0077
0078
0079 enum dm_message_type {
0080
0081
0082
0083 DM_ERROR = 0,
0084 DM_VERSION_REQUEST = 1,
0085 DM_VERSION_RESPONSE = 2,
0086 DM_CAPABILITIES_REPORT = 3,
0087 DM_CAPABILITIES_RESPONSE = 4,
0088 DM_STATUS_REPORT = 5,
0089 DM_BALLOON_REQUEST = 6,
0090 DM_BALLOON_RESPONSE = 7,
0091 DM_UNBALLOON_REQUEST = 8,
0092 DM_UNBALLOON_RESPONSE = 9,
0093 DM_MEM_HOT_ADD_REQUEST = 10,
0094 DM_MEM_HOT_ADD_RESPONSE = 11,
0095 DM_VERSION_03_MAX = 11,
0096
0097
0098
0099 DM_INFO_MESSAGE = 12,
0100 DM_VERSION_1_MAX = 12
0101 };
0102
0103
0104
0105
0106
0107
0108
0109 union dm_version {
0110 struct {
0111 __u16 minor_version;
0112 __u16 major_version;
0113 };
0114 __u32 version;
0115 } __packed;
0116
0117
0118 union dm_caps {
0119 struct {
0120 __u64 balloon:1;
0121 __u64 hot_add:1;
0122
0123
0124
0125
0126
0127
0128 __u64 hot_add_alignment:4;
0129 __u64 reservedz:58;
0130 } cap_bits;
0131 __u64 caps;
0132 } __packed;
0133
0134 union dm_mem_page_range {
0135 struct {
0136
0137
0138
0139
0140
0141 __u64 start_page:40;
0142
0143
0144
0145 __u64 page_cnt:24;
0146 } finfo;
0147 __u64 page_range;
0148 } __packed;
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160 struct dm_header {
0161 __u16 type;
0162 __u16 size;
0163 __u32 trans_id;
0164 } __packed;
0165
0166
0167
0168
0169
0170
0171 struct dm_message {
0172 struct dm_header hdr;
0173 __u8 data[];
0174 } __packed;
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191 struct dm_version_request {
0192 struct dm_header hdr;
0193 union dm_version version;
0194 __u32 is_last_attempt:1;
0195 __u32 reservedz:31;
0196 } __packed;
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209 struct dm_version_response {
0210 struct dm_header hdr;
0211 __u64 is_accepted:1;
0212 __u64 reservedz:63;
0213 } __packed;
0214
0215
0216
0217
0218
0219
0220 struct dm_capabilities {
0221 struct dm_header hdr;
0222 union dm_caps caps;
0223 __u64 min_page_cnt;
0224 __u64 max_page_number;
0225 } __packed;
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 struct dm_capabilities_resp_msg {
0238 struct dm_header hdr;
0239 __u64 is_accepted:1;
0240 __u64 reservedz:63;
0241 } __packed;
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263 struct dm_status {
0264 struct dm_header hdr;
0265 __u64 num_avail;
0266 __u64 num_committed;
0267 __u64 page_file_size;
0268 __u64 zero_free;
0269 __u32 page_file_writes;
0270 __u32 io_diff;
0271 } __packed;
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282 struct dm_balloon {
0283 struct dm_header hdr;
0284 __u32 num_pages;
0285 __u32 reservedz;
0286 } __packed;
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303 struct dm_balloon_response {
0304 struct dm_header hdr;
0305 __u32 reservedz;
0306 __u32 more_pages:1;
0307 __u32 range_count:31;
0308 union dm_mem_page_range range_array[];
0309 } __packed;
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326 struct dm_unballoon_request {
0327 struct dm_header hdr;
0328 __u32 more_pages:1;
0329 __u32 reservedz:31;
0330 __u32 range_count;
0331 union dm_mem_page_range range_array[];
0332 } __packed;
0333
0334
0335
0336
0337
0338
0339
0340 struct dm_unballoon_response {
0341 struct dm_header hdr;
0342 } __packed;
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352 struct dm_hot_add {
0353 struct dm_header hdr;
0354 union dm_mem_page_range range;
0355 } __packed;
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377 struct dm_hot_add_response {
0378 struct dm_header hdr;
0379 __u32 page_count;
0380 __u32 result;
0381 } __packed;
0382
0383
0384
0385
0386
0387 enum dm_info_type {
0388 INFO_TYPE_MAX_PAGE_CNT = 0,
0389 MAX_INFO_TYPE
0390 };
0391
0392
0393
0394
0395
0396
0397 struct dm_info_header {
0398 enum dm_info_type type;
0399 __u32 data_size;
0400 } __packed;
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411 struct dm_info_msg {
0412 struct dm_header hdr;
0413 __u32 reserved;
0414 __u32 info_size;
0415 __u8 info[];
0416 };
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434 struct hv_hotadd_state {
0435 struct list_head list;
0436 unsigned long start_pfn;
0437 unsigned long covered_start_pfn;
0438 unsigned long covered_end_pfn;
0439 unsigned long ha_end_pfn;
0440 unsigned long end_pfn;
0441
0442
0443
0444 struct list_head gap_list;
0445 };
0446
0447 struct hv_hotadd_gap {
0448 struct list_head list;
0449 unsigned long start_pfn;
0450 unsigned long end_pfn;
0451 };
0452
0453 struct balloon_state {
0454 __u32 num_pages;
0455 struct work_struct wrk;
0456 };
0457
0458 struct hot_add_wrk {
0459 union dm_mem_page_range ha_page_range;
0460 union dm_mem_page_range ha_region_range;
0461 struct work_struct wrk;
0462 };
0463
0464 static bool allow_hibernation;
0465 static bool hot_add = true;
0466 static bool do_hot_add;
0467
0468
0469
0470
0471 static uint pressure_report_delay = 45;
0472
0473
0474
0475
0476 static unsigned long last_post_time;
0477
0478 module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
0479 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
0480
0481 module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
0482 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
0483 static atomic_t trans_id = ATOMIC_INIT(0);
0484
0485 static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024);
0486
0487
0488
0489
0490
0491 enum hv_dm_state {
0492 DM_INITIALIZING = 0,
0493 DM_INITIALIZED,
0494 DM_BALLOON_UP,
0495 DM_BALLOON_DOWN,
0496 DM_HOT_ADD,
0497 DM_INIT_ERROR
0498 };
0499
0500
0501 static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
0502 static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];
0503 #define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE)
0504 #define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE)
0505
0506 struct hv_dynmem_device {
0507 struct hv_device *dev;
0508 enum hv_dm_state state;
0509 struct completion host_event;
0510 struct completion config_event;
0511
0512
0513
0514
0515 unsigned int num_pages_ballooned;
0516 unsigned int num_pages_onlined;
0517 unsigned int num_pages_added;
0518
0519
0520
0521
0522 struct balloon_state balloon_wrk;
0523
0524
0525
0526
0527 struct hot_add_wrk ha_wrk;
0528
0529
0530
0531
0532
0533 bool host_specified_ha_region;
0534
0535
0536
0537
0538 struct completion ol_waitevent;
0539
0540
0541
0542
0543
0544
0545 struct task_struct *thread;
0546
0547
0548
0549
0550
0551 spinlock_t ha_lock;
0552
0553
0554
0555
0556 struct list_head ha_region_list;
0557
0558
0559
0560
0561
0562
0563 __u32 next_version;
0564
0565
0566
0567
0568 __u32 version;
0569
0570 struct page_reporting_dev_info pr_dev_info;
0571
0572
0573
0574
0575 __u64 max_dynamic_page_count;
0576 };
0577
0578 static struct hv_dynmem_device dm_device;
0579
0580 static void post_status(struct hv_dynmem_device *dm);
0581
0582 #ifdef CONFIG_MEMORY_HOTPLUG
0583 static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
0584 unsigned long pfn)
0585 {
0586 struct hv_hotadd_gap *gap;
0587
0588
0589 if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn))
0590 return false;
0591
0592
0593 list_for_each_entry(gap, &has->gap_list, list) {
0594 if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn))
0595 return false;
0596 }
0597
0598 return true;
0599 }
0600
0601 static unsigned long hv_page_offline_check(unsigned long start_pfn,
0602 unsigned long nr_pages)
0603 {
0604 unsigned long pfn = start_pfn, count = 0;
0605 struct hv_hotadd_state *has;
0606 bool found;
0607
0608 while (pfn < start_pfn + nr_pages) {
0609
0610
0611
0612
0613 found = false;
0614 list_for_each_entry(has, &dm_device.ha_region_list, list) {
0615 while ((pfn >= has->start_pfn) &&
0616 (pfn < has->end_pfn) &&
0617 (pfn < start_pfn + nr_pages)) {
0618 found = true;
0619 if (has_pfn_is_backed(has, pfn))
0620 count++;
0621 pfn++;
0622 }
0623 }
0624
0625
0626
0627
0628
0629
0630 if (!found)
0631 pfn++;
0632 }
0633
0634 return count;
0635 }
0636
0637 static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
0638 void *v)
0639 {
0640 struct memory_notify *mem = (struct memory_notify *)v;
0641 unsigned long flags, pfn_count;
0642
0643 switch (val) {
0644 case MEM_ONLINE:
0645 case MEM_CANCEL_ONLINE:
0646 complete(&dm_device.ol_waitevent);
0647 break;
0648
0649 case MEM_OFFLINE:
0650 spin_lock_irqsave(&dm_device.ha_lock, flags);
0651 pfn_count = hv_page_offline_check(mem->start_pfn,
0652 mem->nr_pages);
0653 if (pfn_count <= dm_device.num_pages_onlined) {
0654 dm_device.num_pages_onlined -= pfn_count;
0655 } else {
0656
0657
0658
0659
0660
0661 WARN_ON_ONCE(1);
0662 dm_device.num_pages_onlined = 0;
0663 }
0664 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0665 break;
0666 case MEM_GOING_ONLINE:
0667 case MEM_GOING_OFFLINE:
0668 case MEM_CANCEL_OFFLINE:
0669 break;
0670 }
0671 return NOTIFY_OK;
0672 }
0673
0674 static struct notifier_block hv_memory_nb = {
0675 .notifier_call = hv_memory_notifier,
0676 .priority = 0
0677 };
0678
0679
0680 static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
0681 {
0682 if (!has_pfn_is_backed(has, page_to_pfn(pg))) {
0683 if (!PageOffline(pg))
0684 __SetPageOffline(pg);
0685 return;
0686 }
0687 if (PageOffline(pg))
0688 __ClearPageOffline(pg);
0689
0690
0691 generic_online_page(pg, 0);
0692
0693 lockdep_assert_held(&dm_device.ha_lock);
0694 dm_device.num_pages_onlined++;
0695 }
0696
0697 static void hv_bring_pgs_online(struct hv_hotadd_state *has,
0698 unsigned long start_pfn, unsigned long size)
0699 {
0700 int i;
0701
0702 pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn);
0703 for (i = 0; i < size; i++)
0704 hv_page_online_one(has, pfn_to_page(start_pfn + i));
0705 }
0706
0707 static void hv_mem_hot_add(unsigned long start, unsigned long size,
0708 unsigned long pfn_count,
0709 struct hv_hotadd_state *has)
0710 {
0711 int ret = 0;
0712 int i, nid;
0713 unsigned long start_pfn;
0714 unsigned long processed_pfn;
0715 unsigned long total_pfn = pfn_count;
0716 unsigned long flags;
0717
0718 for (i = 0; i < (size/HA_CHUNK); i++) {
0719 start_pfn = start + (i * HA_CHUNK);
0720
0721 spin_lock_irqsave(&dm_device.ha_lock, flags);
0722 has->ha_end_pfn += HA_CHUNK;
0723
0724 if (total_pfn > HA_CHUNK) {
0725 processed_pfn = HA_CHUNK;
0726 total_pfn -= HA_CHUNK;
0727 } else {
0728 processed_pfn = total_pfn;
0729 total_pfn = 0;
0730 }
0731
0732 has->covered_end_pfn += processed_pfn;
0733 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0734
0735 reinit_completion(&dm_device.ol_waitevent);
0736
0737 nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
0738 ret = add_memory(nid, PFN_PHYS((start_pfn)),
0739 (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
0740
0741 if (ret) {
0742 pr_err("hot_add memory failed error is %d\n", ret);
0743 if (ret == -EEXIST) {
0744
0745
0746
0747
0748
0749
0750
0751 do_hot_add = false;
0752 }
0753 spin_lock_irqsave(&dm_device.ha_lock, flags);
0754 has->ha_end_pfn -= HA_CHUNK;
0755 has->covered_end_pfn -= processed_pfn;
0756 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0757 break;
0758 }
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768 wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ);
0769 post_status(&dm_device);
0770 }
0771 }
0772
0773 static void hv_online_page(struct page *pg, unsigned int order)
0774 {
0775 struct hv_hotadd_state *has;
0776 unsigned long flags;
0777 unsigned long pfn = page_to_pfn(pg);
0778
0779 spin_lock_irqsave(&dm_device.ha_lock, flags);
0780 list_for_each_entry(has, &dm_device.ha_region_list, list) {
0781
0782 if ((pfn < has->start_pfn) ||
0783 (pfn + (1UL << order) > has->end_pfn))
0784 continue;
0785
0786 hv_bring_pgs_online(has, pfn, 1UL << order);
0787 break;
0788 }
0789 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0790 }
0791
0792 static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
0793 {
0794 struct hv_hotadd_state *has;
0795 struct hv_hotadd_gap *gap;
0796 unsigned long residual, new_inc;
0797 int ret = 0;
0798 unsigned long flags;
0799
0800 spin_lock_irqsave(&dm_device.ha_lock, flags);
0801 list_for_each_entry(has, &dm_device.ha_region_list, list) {
0802
0803
0804
0805
0806 if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
0807 continue;
0808
0809
0810
0811
0812
0813 if (has->covered_end_pfn != start_pfn) {
0814 gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
0815 if (!gap) {
0816 ret = -ENOMEM;
0817 break;
0818 }
0819
0820 INIT_LIST_HEAD(&gap->list);
0821 gap->start_pfn = has->covered_end_pfn;
0822 gap->end_pfn = start_pfn;
0823 list_add_tail(&gap->list, &has->gap_list);
0824
0825 has->covered_end_pfn = start_pfn;
0826 }
0827
0828
0829
0830
0831
0832 if ((start_pfn + pfn_cnt) > has->end_pfn) {
0833 residual = (start_pfn + pfn_cnt - has->end_pfn);
0834
0835
0836
0837 new_inc = (residual / HA_CHUNK) * HA_CHUNK;
0838 if (residual % HA_CHUNK)
0839 new_inc += HA_CHUNK;
0840
0841 has->end_pfn += new_inc;
0842 }
0843
0844 ret = 1;
0845 break;
0846 }
0847 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0848
0849 return ret;
0850 }
0851
0852 static unsigned long handle_pg_range(unsigned long pg_start,
0853 unsigned long pg_count)
0854 {
0855 unsigned long start_pfn = pg_start;
0856 unsigned long pfn_cnt = pg_count;
0857 unsigned long size;
0858 struct hv_hotadd_state *has;
0859 unsigned long pgs_ol = 0;
0860 unsigned long old_covered_state;
0861 unsigned long res = 0, flags;
0862
0863 pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count,
0864 pg_start);
0865
0866 spin_lock_irqsave(&dm_device.ha_lock, flags);
0867 list_for_each_entry(has, &dm_device.ha_region_list, list) {
0868
0869
0870
0871
0872 if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
0873 continue;
0874
0875 old_covered_state = has->covered_end_pfn;
0876
0877 if (start_pfn < has->ha_end_pfn) {
0878
0879
0880
0881
0882
0883 pgs_ol = has->ha_end_pfn - start_pfn;
0884 if (pgs_ol > pfn_cnt)
0885 pgs_ol = pfn_cnt;
0886
0887 has->covered_end_pfn += pgs_ol;
0888 pfn_cnt -= pgs_ol;
0889
0890
0891
0892
0893
0894
0895
0896
0897 if (start_pfn > has->start_pfn &&
0898 online_section_nr(pfn_to_section_nr(start_pfn)))
0899 hv_bring_pgs_online(has, start_pfn, pgs_ol);
0900
0901 }
0902
0903 if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
0904
0905
0906
0907
0908
0909
0910
0911 size = (has->end_pfn - has->ha_end_pfn);
0912 if (pfn_cnt <= size) {
0913 size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
0914 if (pfn_cnt % HA_CHUNK)
0915 size += HA_CHUNK;
0916 } else {
0917 pfn_cnt = size;
0918 }
0919 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0920 hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
0921 spin_lock_irqsave(&dm_device.ha_lock, flags);
0922 }
0923
0924
0925
0926
0927 res = has->covered_end_pfn - old_covered_state;
0928 break;
0929 }
0930 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0931
0932 return res;
0933 }
0934
0935 static unsigned long process_hot_add(unsigned long pg_start,
0936 unsigned long pfn_cnt,
0937 unsigned long rg_start,
0938 unsigned long rg_size)
0939 {
0940 struct hv_hotadd_state *ha_region = NULL;
0941 int covered;
0942 unsigned long flags;
0943
0944 if (pfn_cnt == 0)
0945 return 0;
0946
0947 if (!dm_device.host_specified_ha_region) {
0948 covered = pfn_covered(pg_start, pfn_cnt);
0949 if (covered < 0)
0950 return 0;
0951
0952 if (covered)
0953 goto do_pg_range;
0954 }
0955
0956
0957
0958
0959
0960 if (rg_size != 0) {
0961 ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL);
0962 if (!ha_region)
0963 return 0;
0964
0965 INIT_LIST_HEAD(&ha_region->list);
0966 INIT_LIST_HEAD(&ha_region->gap_list);
0967
0968 ha_region->start_pfn = rg_start;
0969 ha_region->ha_end_pfn = rg_start;
0970 ha_region->covered_start_pfn = pg_start;
0971 ha_region->covered_end_pfn = pg_start;
0972 ha_region->end_pfn = rg_start + rg_size;
0973
0974 spin_lock_irqsave(&dm_device.ha_lock, flags);
0975 list_add_tail(&ha_region->list, &dm_device.ha_region_list);
0976 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
0977 }
0978
0979 do_pg_range:
0980
0981
0982
0983
0984 return handle_pg_range(pg_start, pfn_cnt);
0985 }
0986
0987 #endif
0988
0989 static void hot_add_req(struct work_struct *dummy)
0990 {
0991 struct dm_hot_add_response resp;
0992 #ifdef CONFIG_MEMORY_HOTPLUG
0993 unsigned long pg_start, pfn_cnt;
0994 unsigned long rg_start, rg_sz;
0995 #endif
0996 struct hv_dynmem_device *dm = &dm_device;
0997
0998 memset(&resp, 0, sizeof(struct dm_hot_add_response));
0999 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
1000 resp.hdr.size = sizeof(struct dm_hot_add_response);
1001
1002 #ifdef CONFIG_MEMORY_HOTPLUG
1003 pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
1004 pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
1005
1006 rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
1007 rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;
1008
1009 if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
1010 unsigned long region_size;
1011 unsigned long region_start;
1012
1013
1014
1015
1016
1017
1018
1019
1020 region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
1021 if (pfn_cnt % HA_CHUNK)
1022 region_size += HA_CHUNK;
1023
1024 region_start = (pg_start / HA_CHUNK) * HA_CHUNK;
1025
1026 rg_start = region_start;
1027 rg_sz = region_size;
1028 }
1029
1030 if (do_hot_add)
1031 resp.page_count = process_hot_add(pg_start, pfn_cnt,
1032 rg_start, rg_sz);
1033
1034 dm->num_pages_added += resp.page_count;
1035 #endif
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 if (resp.page_count > 0)
1053 resp.result = 1;
1054 else if (!do_hot_add)
1055 resp.result = 1;
1056 else
1057 resp.result = 0;
1058
1059 if (!do_hot_add || resp.page_count == 0) {
1060 if (!allow_hibernation)
1061 pr_err("Memory hot add failed\n");
1062 else
1063 pr_info("Ignore hot-add request!\n");
1064 }
1065
1066 dm->state = DM_INITIALIZED;
1067 resp.hdr.trans_id = atomic_inc_return(&trans_id);
1068 vmbus_sendpacket(dm->dev->channel, &resp,
1069 sizeof(struct dm_hot_add_response),
1070 (unsigned long)NULL,
1071 VM_PKT_DATA_INBAND, 0);
1072 }
1073
1074 static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
1075 {
1076 struct dm_info_header *info_hdr;
1077
1078 info_hdr = (struct dm_info_header *)msg->info;
1079
1080 switch (info_hdr->type) {
1081 case INFO_TYPE_MAX_PAGE_CNT:
1082 if (info_hdr->data_size == sizeof(__u64)) {
1083 __u64 *max_page_count = (__u64 *)&info_hdr[1];
1084
1085 pr_info("Max. dynamic memory size: %llu MB\n",
1086 (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT));
1087 dm->max_dynamic_page_count = *max_page_count;
1088 }
1089
1090 break;
1091 default:
1092 pr_warn("Received Unknown type: %d\n", info_hdr->type);
1093 }
1094 }
1095
1096 static unsigned long compute_balloon_floor(void)
1097 {
1098 unsigned long min_pages;
1099 unsigned long nr_pages = totalram_pages();
1100 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112 if (nr_pages < MB2PAGES(128))
1113 min_pages = MB2PAGES(8) + (nr_pages >> 1);
1114 else if (nr_pages < MB2PAGES(512))
1115 min_pages = MB2PAGES(40) + (nr_pages >> 2);
1116 else if (nr_pages < MB2PAGES(2048))
1117 min_pages = MB2PAGES(104) + (nr_pages >> 3);
1118 else if (nr_pages < MB2PAGES(8192))
1119 min_pages = MB2PAGES(232) + (nr_pages >> 4);
1120 else
1121 min_pages = MB2PAGES(488) + (nr_pages >> 5);
1122 #undef MB2PAGES
1123 return min_pages;
1124 }
1125
1126
1127
1128
1129
1130 static unsigned long get_pages_committed(struct hv_dynmem_device *dm)
1131 {
1132 return vm_memory_committed() +
1133 dm->num_pages_ballooned +
1134 (dm->num_pages_added > dm->num_pages_onlined ?
1135 dm->num_pages_added - dm->num_pages_onlined : 0) +
1136 compute_balloon_floor();
1137 }
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 static void post_status(struct hv_dynmem_device *dm)
1150 {
1151 struct dm_status status;
1152 unsigned long now = jiffies;
1153 unsigned long last_post = last_post_time;
1154 unsigned long num_pages_avail, num_pages_committed;
1155
1156 if (pressure_report_delay > 0) {
1157 --pressure_report_delay;
1158 return;
1159 }
1160
1161 if (!time_after(now, (last_post_time + HZ)))
1162 return;
1163
1164 memset(&status, 0, sizeof(struct dm_status));
1165 status.hdr.type = DM_STATUS_REPORT;
1166 status.hdr.size = sizeof(struct dm_status);
1167 status.hdr.trans_id = atomic_inc_return(&trans_id);
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179 num_pages_avail = si_mem_available();
1180 num_pages_committed = get_pages_committed(dm);
1181
1182 trace_balloon_status(num_pages_avail, num_pages_committed,
1183 vm_memory_committed(), dm->num_pages_ballooned,
1184 dm->num_pages_added, dm->num_pages_onlined);
1185
1186
1187 status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE;
1188 status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE;
1189
1190
1191
1192
1193
1194
1195 if (status.hdr.trans_id != atomic_read(&trans_id))
1196 return;
1197
1198
1199
1200
1201
1202 if (last_post != last_post_time)
1203 return;
1204
1205 last_post_time = jiffies;
1206 vmbus_sendpacket(dm->dev->channel, &status,
1207 sizeof(struct dm_status),
1208 (unsigned long)NULL,
1209 VM_PKT_DATA_INBAND, 0);
1210
1211 }
1212
1213 static void free_balloon_pages(struct hv_dynmem_device *dm,
1214 union dm_mem_page_range *range_array)
1215 {
1216 int num_pages = range_array->finfo.page_cnt;
1217 __u64 start_frame = range_array->finfo.start_page;
1218 struct page *pg;
1219 int i;
1220
1221 for (i = 0; i < num_pages; i++) {
1222 pg = pfn_to_page(i + start_frame);
1223 __ClearPageOffline(pg);
1224 __free_page(pg);
1225 dm->num_pages_ballooned--;
1226 adjust_managed_page_count(pg, 1);
1227 }
1228 }
1229
1230
1231
1232 static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
1233 unsigned int num_pages,
1234 struct dm_balloon_response *bl_resp,
1235 int alloc_unit)
1236 {
1237 unsigned int i, j;
1238 struct page *pg;
1239
1240 for (i = 0; i < num_pages / alloc_unit; i++) {
1241 if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
1242 HV_HYP_PAGE_SIZE)
1243 return i * alloc_unit;
1244
1245
1246
1247
1248
1249 pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY |
1250 __GFP_NOMEMALLOC | __GFP_NOWARN,
1251 get_order(alloc_unit << PAGE_SHIFT));
1252
1253 if (!pg)
1254 return i * alloc_unit;
1255
1256 dm->num_pages_ballooned += alloc_unit;
1257
1258
1259
1260
1261
1262
1263 if (alloc_unit != 1)
1264 split_page(pg, get_order(alloc_unit << PAGE_SHIFT));
1265
1266
1267 for (j = 0; j < alloc_unit; j++) {
1268 __SetPageOffline(pg + j);
1269 adjust_managed_page_count(pg + j, -1);
1270 }
1271
1272 bl_resp->range_count++;
1273 bl_resp->range_array[i].finfo.start_page =
1274 page_to_pfn(pg);
1275 bl_resp->range_array[i].finfo.page_cnt = alloc_unit;
1276 bl_resp->hdr.size += sizeof(union dm_mem_page_range);
1277
1278 }
1279
1280 return i * alloc_unit;
1281 }
1282
1283 static void balloon_up(struct work_struct *dummy)
1284 {
1285 unsigned int num_pages = dm_device.balloon_wrk.num_pages;
1286 unsigned int num_ballooned = 0;
1287 struct dm_balloon_response *bl_resp;
1288 int alloc_unit;
1289 int ret;
1290 bool done = false;
1291 int i;
1292 long avail_pages;
1293 unsigned long floor;
1294
1295
1296
1297
1298
1299 alloc_unit = PAGES_IN_2M;
1300
1301 avail_pages = si_mem_available();
1302 floor = compute_balloon_floor();
1303
1304
1305 if (avail_pages < num_pages || avail_pages - num_pages < floor) {
1306 pr_info("Balloon request will be partially fulfilled. %s\n",
1307 avail_pages < num_pages ? "Not enough memory." :
1308 "Balloon floor reached.");
1309
1310 num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
1311 }
1312
1313 while (!done) {
1314 memset(balloon_up_send_buffer, 0, HV_HYP_PAGE_SIZE);
1315 bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
1316 bl_resp->hdr.type = DM_BALLOON_RESPONSE;
1317 bl_resp->hdr.size = sizeof(struct dm_balloon_response);
1318 bl_resp->more_pages = 1;
1319
1320 num_pages -= num_ballooned;
1321 num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
1322 bl_resp, alloc_unit);
1323
1324 if (alloc_unit != 1 && num_ballooned == 0) {
1325 alloc_unit = 1;
1326 continue;
1327 }
1328
1329 if (num_ballooned == 0 || num_ballooned == num_pages) {
1330 pr_debug("Ballooned %u out of %u requested pages.\n",
1331 num_pages, dm_device.balloon_wrk.num_pages);
1332
1333 bl_resp->more_pages = 0;
1334 done = true;
1335 dm_device.state = DM_INITIALIZED;
1336 }
1337
1338
1339
1340
1341
1342
1343
1344 do {
1345 bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
1346 ret = vmbus_sendpacket(dm_device.dev->channel,
1347 bl_resp,
1348 bl_resp->hdr.size,
1349 (unsigned long)NULL,
1350 VM_PKT_DATA_INBAND, 0);
1351
1352 if (ret == -EAGAIN)
1353 msleep(20);
1354 post_status(&dm_device);
1355 } while (ret == -EAGAIN);
1356
1357 if (ret) {
1358
1359
1360
1361 pr_err("Balloon response failed\n");
1362
1363 for (i = 0; i < bl_resp->range_count; i++)
1364 free_balloon_pages(&dm_device,
1365 &bl_resp->range_array[i]);
1366
1367 done = true;
1368 }
1369 }
1370
1371 }
1372
1373 static void balloon_down(struct hv_dynmem_device *dm,
1374 struct dm_unballoon_request *req)
1375 {
1376 union dm_mem_page_range *range_array = req->range_array;
1377 int range_count = req->range_count;
1378 struct dm_unballoon_response resp;
1379 int i;
1380 unsigned int prev_pages_ballooned = dm->num_pages_ballooned;
1381
1382 for (i = 0; i < range_count; i++) {
1383 free_balloon_pages(dm, &range_array[i]);
1384 complete(&dm_device.config_event);
1385 }
1386
1387 pr_debug("Freed %u ballooned pages.\n",
1388 prev_pages_ballooned - dm->num_pages_ballooned);
1389
1390 if (req->more_pages == 1)
1391 return;
1392
1393 memset(&resp, 0, sizeof(struct dm_unballoon_response));
1394 resp.hdr.type = DM_UNBALLOON_RESPONSE;
1395 resp.hdr.trans_id = atomic_inc_return(&trans_id);
1396 resp.hdr.size = sizeof(struct dm_unballoon_response);
1397
1398 vmbus_sendpacket(dm_device.dev->channel, &resp,
1399 sizeof(struct dm_unballoon_response),
1400 (unsigned long)NULL,
1401 VM_PKT_DATA_INBAND, 0);
1402
1403 dm->state = DM_INITIALIZED;
1404 }
1405
1406 static void balloon_onchannelcallback(void *context);
1407
1408 static int dm_thread_func(void *dm_dev)
1409 {
1410 struct hv_dynmem_device *dm = dm_dev;
1411
1412 while (!kthread_should_stop()) {
1413 wait_for_completion_interruptible_timeout(
1414 &dm_device.config_event, 1*HZ);
1415
1416
1417
1418
1419 reinit_completion(&dm_device.config_event);
1420 post_status(dm);
1421 }
1422
1423 return 0;
1424 }
1425
1426
1427 static void version_resp(struct hv_dynmem_device *dm,
1428 struct dm_version_response *vresp)
1429 {
1430 struct dm_version_request version_req;
1431 int ret;
1432
1433 if (vresp->is_accepted) {
1434
1435
1436
1437
1438
1439 complete(&dm->host_event);
1440 return;
1441 }
1442
1443
1444
1445
1446
1447
1448
1449 if (dm->next_version == 0)
1450 goto version_error;
1451
1452 memset(&version_req, 0, sizeof(struct dm_version_request));
1453 version_req.hdr.type = DM_VERSION_REQUEST;
1454 version_req.hdr.size = sizeof(struct dm_version_request);
1455 version_req.hdr.trans_id = atomic_inc_return(&trans_id);
1456 version_req.version.version = dm->next_version;
1457 dm->version = version_req.version.version;
1458
1459
1460
1461
1462
1463 switch (version_req.version.version) {
1464 case DYNMEM_PROTOCOL_VERSION_WIN8:
1465 dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
1466 version_req.is_last_attempt = 0;
1467 break;
1468 default:
1469 dm->next_version = 0;
1470 version_req.is_last_attempt = 1;
1471 }
1472
1473 ret = vmbus_sendpacket(dm->dev->channel, &version_req,
1474 sizeof(struct dm_version_request),
1475 (unsigned long)NULL,
1476 VM_PKT_DATA_INBAND, 0);
1477
1478 if (ret)
1479 goto version_error;
1480
1481 return;
1482
1483 version_error:
1484 dm->state = DM_INIT_ERROR;
1485 complete(&dm->host_event);
1486 }
1487
1488 static void cap_resp(struct hv_dynmem_device *dm,
1489 struct dm_capabilities_resp_msg *cap_resp)
1490 {
1491 if (!cap_resp->is_accepted) {
1492 pr_err("Capabilities not accepted by host\n");
1493 dm->state = DM_INIT_ERROR;
1494 }
1495 complete(&dm->host_event);
1496 }
1497
1498 static void balloon_onchannelcallback(void *context)
1499 {
1500 struct hv_device *dev = context;
1501 u32 recvlen;
1502 u64 requestid;
1503 struct dm_message *dm_msg;
1504 struct dm_header *dm_hdr;
1505 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1506 struct dm_balloon *bal_msg;
1507 struct dm_hot_add *ha_msg;
1508 union dm_mem_page_range *ha_pg_range;
1509 union dm_mem_page_range *ha_region;
1510
1511 memset(recv_buffer, 0, sizeof(recv_buffer));
1512 vmbus_recvpacket(dev->channel, recv_buffer,
1513 HV_HYP_PAGE_SIZE, &recvlen, &requestid);
1514
1515 if (recvlen > 0) {
1516 dm_msg = (struct dm_message *)recv_buffer;
1517 dm_hdr = &dm_msg->hdr;
1518
1519 switch (dm_hdr->type) {
1520 case DM_VERSION_RESPONSE:
1521 version_resp(dm,
1522 (struct dm_version_response *)dm_msg);
1523 break;
1524
1525 case DM_CAPABILITIES_RESPONSE:
1526 cap_resp(dm,
1527 (struct dm_capabilities_resp_msg *)dm_msg);
1528 break;
1529
1530 case DM_BALLOON_REQUEST:
1531 if (allow_hibernation) {
1532 pr_info("Ignore balloon-up request!\n");
1533 break;
1534 }
1535
1536 if (dm->state == DM_BALLOON_UP)
1537 pr_warn("Currently ballooning\n");
1538 bal_msg = (struct dm_balloon *)recv_buffer;
1539 dm->state = DM_BALLOON_UP;
1540 dm_device.balloon_wrk.num_pages = bal_msg->num_pages;
1541 schedule_work(&dm_device.balloon_wrk.wrk);
1542 break;
1543
1544 case DM_UNBALLOON_REQUEST:
1545 if (allow_hibernation) {
1546 pr_info("Ignore balloon-down request!\n");
1547 break;
1548 }
1549
1550 dm->state = DM_BALLOON_DOWN;
1551 balloon_down(dm,
1552 (struct dm_unballoon_request *)recv_buffer);
1553 break;
1554
1555 case DM_MEM_HOT_ADD_REQUEST:
1556 if (dm->state == DM_HOT_ADD)
1557 pr_warn("Currently hot-adding\n");
1558 dm->state = DM_HOT_ADD;
1559 ha_msg = (struct dm_hot_add *)recv_buffer;
1560 if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) {
1561
1562
1563
1564
1565 dm->host_specified_ha_region = false;
1566 ha_pg_range = &ha_msg->range;
1567 dm->ha_wrk.ha_page_range = *ha_pg_range;
1568 dm->ha_wrk.ha_region_range.page_range = 0;
1569 } else {
1570
1571
1572
1573
1574
1575 dm->host_specified_ha_region = true;
1576 ha_pg_range = &ha_msg->range;
1577 ha_region = &ha_pg_range[1];
1578 dm->ha_wrk.ha_page_range = *ha_pg_range;
1579 dm->ha_wrk.ha_region_range = *ha_region;
1580 }
1581 schedule_work(&dm_device.ha_wrk.wrk);
1582 break;
1583
1584 case DM_INFO_MESSAGE:
1585 process_info(dm, (struct dm_info_msg *)dm_msg);
1586 break;
1587
1588 default:
1589 pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type);
1590
1591 }
1592 }
1593
1594 }
1595
1596
1597 #define HV_MIN_PAGE_REPORTING_ORDER 9
1598 #define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER)
1599 static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
1600 struct scatterlist *sgl, unsigned int nents)
1601 {
1602 unsigned long flags;
1603 struct hv_memory_hint *hint;
1604 int i;
1605 u64 status;
1606 struct scatterlist *sg;
1607
1608 WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
1609 WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN);
1610 local_irq_save(flags);
1611 hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
1612 if (!hint) {
1613 local_irq_restore(flags);
1614 return -ENOSPC;
1615 }
1616
1617 hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD;
1618 hint->reserved = 0;
1619 for_each_sg(sgl, sg, nents, i) {
1620 union hv_gpa_page_range *range;
1621
1622 range = &hint->ranges[i];
1623 range->address_space = 0;
1624
1625 range->page.largepage = 1;
1626 range->page.additional_pages =
1627 (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1;
1628 range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
1629 range->base_large_pfn =
1630 page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER;
1631 }
1632
1633 status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0,
1634 hint, NULL);
1635 local_irq_restore(flags);
1636 if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
1637 pr_err("Cold memory discard hypercall failed with status %llx\n",
1638 status);
1639 return -EINVAL;
1640 }
1641
1642 return 0;
1643 }
1644
1645 static void enable_page_reporting(void)
1646 {
1647 int ret;
1648
1649
1650 if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) {
1651 pr_debug("Cold memory discard is only supported on 2MB pages and above\n");
1652 return;
1653 }
1654
1655 if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) {
1656 pr_debug("Cold memory discard hint not supported by Hyper-V\n");
1657 return;
1658 }
1659
1660 BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
1661 dm_device.pr_dev_info.report = hv_free_page_report;
1662 ret = page_reporting_register(&dm_device.pr_dev_info);
1663 if (ret < 0) {
1664 dm_device.pr_dev_info.report = NULL;
1665 pr_err("Failed to enable cold memory discard: %d\n", ret);
1666 } else {
1667 pr_info("Cold memory discard hint enabled\n");
1668 }
1669 }
1670
1671 static void disable_page_reporting(void)
1672 {
1673 if (dm_device.pr_dev_info.report) {
1674 page_reporting_unregister(&dm_device.pr_dev_info);
1675 dm_device.pr_dev_info.report = NULL;
1676 }
1677 }
1678
1679 static int ballooning_enabled(void)
1680 {
1681
1682
1683
1684
1685
1686 if (PAGE_SIZE != HV_HYP_PAGE_SIZE) {
1687 pr_info("Ballooning disabled because page size is not 4096 bytes\n");
1688 return 0;
1689 }
1690
1691 return 1;
1692 }
1693
1694 static int hot_add_enabled(void)
1695 {
1696
1697
1698
1699
1700
1701
1702
1703 if (IS_ENABLED(CONFIG_ARM64)) {
1704 pr_info("Memory hot add disabled on ARM64\n");
1705 return 0;
1706 }
1707
1708 return 1;
1709 }
1710
1711 static int balloon_connect_vsp(struct hv_device *dev)
1712 {
1713 struct dm_version_request version_req;
1714 struct dm_capabilities cap_msg;
1715 unsigned long t;
1716 int ret;
1717
1718
1719
1720
1721
1722
1723 dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
1724
1725 ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
1726 balloon_onchannelcallback, dev);
1727 if (ret)
1728 return ret;
1729
1730
1731
1732
1733
1734
1735
1736 memset(&version_req, 0, sizeof(struct dm_version_request));
1737 version_req.hdr.type = DM_VERSION_REQUEST;
1738 version_req.hdr.size = sizeof(struct dm_version_request);
1739 version_req.hdr.trans_id = atomic_inc_return(&trans_id);
1740 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10;
1741 version_req.is_last_attempt = 0;
1742 dm_device.version = version_req.version.version;
1743
1744 ret = vmbus_sendpacket(dev->channel, &version_req,
1745 sizeof(struct dm_version_request),
1746 (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
1747 if (ret)
1748 goto out;
1749
1750 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
1751 if (t == 0) {
1752 ret = -ETIMEDOUT;
1753 goto out;
1754 }
1755
1756
1757
1758
1759
1760 if (dm_device.state == DM_INIT_ERROR) {
1761 ret = -EPROTO;
1762 goto out;
1763 }
1764
1765 pr_info("Using Dynamic Memory protocol version %u.%u\n",
1766 DYNMEM_MAJOR_VERSION(dm_device.version),
1767 DYNMEM_MINOR_VERSION(dm_device.version));
1768
1769
1770
1771
1772 memset(&cap_msg, 0, sizeof(struct dm_capabilities));
1773 cap_msg.hdr.type = DM_CAPABILITIES_REPORT;
1774 cap_msg.hdr.size = sizeof(struct dm_capabilities);
1775 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
1776
1777
1778
1779
1780
1781
1782 cap_msg.caps.cap_bits.balloon = ballooning_enabled();
1783 cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
1784
1785
1786
1787
1788
1789 cap_msg.caps.cap_bits.hot_add_alignment = 7;
1790
1791
1792
1793
1794
1795
1796 cap_msg.min_page_cnt = 0;
1797 cap_msg.max_page_number = -1;
1798
1799 ret = vmbus_sendpacket(dev->channel, &cap_msg,
1800 sizeof(struct dm_capabilities),
1801 (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
1802 if (ret)
1803 goto out;
1804
1805 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
1806 if (t == 0) {
1807 ret = -ETIMEDOUT;
1808 goto out;
1809 }
1810
1811
1812
1813
1814
1815 if (dm_device.state == DM_INIT_ERROR) {
1816 ret = -EPROTO;
1817 goto out;
1818 }
1819
1820 return 0;
1821 out:
1822 vmbus_close(dev->channel);
1823 return ret;
1824 }
1825
1826
1827
1828
1829 #ifdef CONFIG_DEBUG_FS
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 static int hv_balloon_debug_show(struct seq_file *f, void *offset)
1841 {
1842 struct hv_dynmem_device *dm = f->private;
1843 char *sname;
1844
1845 seq_printf(f, "%-22s: %u.%u\n", "host_version",
1846 DYNMEM_MAJOR_VERSION(dm->version),
1847 DYNMEM_MINOR_VERSION(dm->version));
1848
1849 seq_printf(f, "%-22s:", "capabilities");
1850 if (ballooning_enabled())
1851 seq_puts(f, " enabled");
1852
1853 if (hot_add_enabled())
1854 seq_puts(f, " hot_add");
1855
1856 seq_puts(f, "\n");
1857
1858 seq_printf(f, "%-22s: %u", "state", dm->state);
1859 switch (dm->state) {
1860 case DM_INITIALIZING:
1861 sname = "Initializing";
1862 break;
1863 case DM_INITIALIZED:
1864 sname = "Initialized";
1865 break;
1866 case DM_BALLOON_UP:
1867 sname = "Balloon Up";
1868 break;
1869 case DM_BALLOON_DOWN:
1870 sname = "Balloon Down";
1871 break;
1872 case DM_HOT_ADD:
1873 sname = "Hot Add";
1874 break;
1875 case DM_INIT_ERROR:
1876 sname = "Error";
1877 break;
1878 default:
1879 sname = "Unknown";
1880 }
1881 seq_printf(f, " (%s)\n", sname);
1882
1883
1884 seq_printf(f, "%-22s: %ld\n", "page_size", HV_HYP_PAGE_SIZE);
1885
1886
1887 seq_printf(f, "%-22s: %u\n", "pages_added", dm->num_pages_added);
1888
1889
1890 seq_printf(f, "%-22s: %u\n", "pages_onlined", dm->num_pages_onlined);
1891
1892
1893 seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned);
1894
1895 seq_printf(f, "%-22s: %lu\n", "total_pages_committed",
1896 get_pages_committed(dm));
1897
1898 seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count",
1899 dm->max_dynamic_page_count);
1900
1901 return 0;
1902 }
1903
1904 DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug);
1905
1906 static void hv_balloon_debugfs_init(struct hv_dynmem_device *b)
1907 {
1908 debugfs_create_file("hv-balloon", 0444, NULL, b,
1909 &hv_balloon_debug_fops);
1910 }
1911
1912 static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
1913 {
1914 debugfs_remove(debugfs_lookup("hv-balloon", NULL));
1915 }
1916
1917 #else
1918
1919 static inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b)
1920 {
1921 }
1922
1923 static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b)
1924 {
1925 }
1926
1927 #endif
1928
1929 static int balloon_probe(struct hv_device *dev,
1930 const struct hv_vmbus_device_id *dev_id)
1931 {
1932 int ret;
1933
1934 allow_hibernation = hv_is_hibernation_supported();
1935 if (allow_hibernation)
1936 hot_add = false;
1937
1938 #ifdef CONFIG_MEMORY_HOTPLUG
1939 do_hot_add = hot_add;
1940 #else
1941 do_hot_add = false;
1942 #endif
1943 dm_device.dev = dev;
1944 dm_device.state = DM_INITIALIZING;
1945 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
1946 init_completion(&dm_device.host_event);
1947 init_completion(&dm_device.config_event);
1948 INIT_LIST_HEAD(&dm_device.ha_region_list);
1949 spin_lock_init(&dm_device.ha_lock);
1950 INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
1951 INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
1952 dm_device.host_specified_ha_region = false;
1953
1954 #ifdef CONFIG_MEMORY_HOTPLUG
1955 set_online_page_callback(&hv_online_page);
1956 init_completion(&dm_device.ol_waitevent);
1957 register_memory_notifier(&hv_memory_nb);
1958 #endif
1959
1960 hv_set_drvdata(dev, &dm_device);
1961
1962 ret = balloon_connect_vsp(dev);
1963 if (ret != 0)
1964 goto connect_error;
1965
1966 enable_page_reporting();
1967 dm_device.state = DM_INITIALIZED;
1968
1969 dm_device.thread =
1970 kthread_run(dm_thread_func, &dm_device, "hv_balloon");
1971 if (IS_ERR(dm_device.thread)) {
1972 ret = PTR_ERR(dm_device.thread);
1973 goto probe_error;
1974 }
1975
1976 hv_balloon_debugfs_init(&dm_device);
1977
1978 return 0;
1979
1980 probe_error:
1981 dm_device.state = DM_INIT_ERROR;
1982 dm_device.thread = NULL;
1983 disable_page_reporting();
1984 vmbus_close(dev->channel);
1985 connect_error:
1986 #ifdef CONFIG_MEMORY_HOTPLUG
1987 unregister_memory_notifier(&hv_memory_nb);
1988 restore_online_page_callback(&hv_online_page);
1989 #endif
1990 return ret;
1991 }
1992
1993 static int balloon_remove(struct hv_device *dev)
1994 {
1995 struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1996 struct hv_hotadd_state *has, *tmp;
1997 struct hv_hotadd_gap *gap, *tmp_gap;
1998 unsigned long flags;
1999
2000 if (dm->num_pages_ballooned != 0)
2001 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
2002
2003 hv_balloon_debugfs_exit(dm);
2004
2005 cancel_work_sync(&dm->balloon_wrk.wrk);
2006 cancel_work_sync(&dm->ha_wrk.wrk);
2007
2008 kthread_stop(dm->thread);
2009
2010
2011
2012
2013
2014
2015 if (dm_device.state != DM_INIT_ERROR) {
2016 disable_page_reporting();
2017 vmbus_close(dev->channel);
2018 #ifdef CONFIG_MEMORY_HOTPLUG
2019 unregister_memory_notifier(&hv_memory_nb);
2020 restore_online_page_callback(&hv_online_page);
2021 #endif
2022 }
2023
2024 spin_lock_irqsave(&dm_device.ha_lock, flags);
2025 list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
2026 list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
2027 list_del(&gap->list);
2028 kfree(gap);
2029 }
2030 list_del(&has->list);
2031 kfree(has);
2032 }
2033 spin_unlock_irqrestore(&dm_device.ha_lock, flags);
2034
2035 return 0;
2036 }
2037
2038 static int balloon_suspend(struct hv_device *hv_dev)
2039 {
2040 struct hv_dynmem_device *dm = hv_get_drvdata(hv_dev);
2041
2042 tasklet_disable(&hv_dev->channel->callback_event);
2043
2044 cancel_work_sync(&dm->balloon_wrk.wrk);
2045 cancel_work_sync(&dm->ha_wrk.wrk);
2046
2047 if (dm->thread) {
2048 kthread_stop(dm->thread);
2049 dm->thread = NULL;
2050 vmbus_close(hv_dev->channel);
2051 }
2052
2053 tasklet_enable(&hv_dev->channel->callback_event);
2054
2055 return 0;
2056
2057 }
2058
2059 static int balloon_resume(struct hv_device *dev)
2060 {
2061 int ret;
2062
2063 dm_device.state = DM_INITIALIZING;
2064
2065 ret = balloon_connect_vsp(dev);
2066
2067 if (ret != 0)
2068 goto out;
2069
2070 dm_device.thread =
2071 kthread_run(dm_thread_func, &dm_device, "hv_balloon");
2072 if (IS_ERR(dm_device.thread)) {
2073 ret = PTR_ERR(dm_device.thread);
2074 dm_device.thread = NULL;
2075 goto close_channel;
2076 }
2077
2078 dm_device.state = DM_INITIALIZED;
2079 return 0;
2080 close_channel:
2081 vmbus_close(dev->channel);
2082 out:
2083 dm_device.state = DM_INIT_ERROR;
2084 disable_page_reporting();
2085 #ifdef CONFIG_MEMORY_HOTPLUG
2086 unregister_memory_notifier(&hv_memory_nb);
2087 restore_online_page_callback(&hv_online_page);
2088 #endif
2089 return ret;
2090 }
2091
2092 static const struct hv_vmbus_device_id id_table[] = {
2093
2094
2095 { HV_DM_GUID, },
2096 { },
2097 };
2098
2099 MODULE_DEVICE_TABLE(vmbus, id_table);
2100
2101 static struct hv_driver balloon_drv = {
2102 .name = "hv_balloon",
2103 .id_table = id_table,
2104 .probe = balloon_probe,
2105 .remove = balloon_remove,
2106 .suspend = balloon_suspend,
2107 .resume = balloon_resume,
2108 .driver = {
2109 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
2110 },
2111 };
2112
2113 static int __init init_balloon_drv(void)
2114 {
2115
2116 return vmbus_driver_register(&balloon_drv);
2117 }
2118
2119 module_init(init_balloon_drv);
2120
2121 MODULE_DESCRIPTION("Hyper-V Balloon");
2122 MODULE_LICENSE("GPL");