0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038 #include <inttypes.h>
0039 #include <limits.h>
0040 #include <pthread.h>
0041 #include <sys/mman.h>
0042 #include <sys/types.h>
0043 #include <sys/stat.h>
0044
0045 #include "kvm_util.h"
0046 #include "test_util.h"
0047 #include "perf_test_util.h"
0048 #include "guest_modes.h"
0049
0050
0051 static int iteration;
0052
0053
0054 static enum {
0055
0056 ITERATION_ACCESS_MEMORY,
0057
0058 ITERATION_MARK_IDLE,
0059 } iteration_work;
0060
0061
0062 static bool done;
0063
0064
0065 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
0066
0067
0068 static bool overlap_memory_access;
0069
0070 struct test_params {
0071
0072 enum vm_mem_backing_src_type backing_src;
0073
0074
0075 uint64_t vcpu_memory_bytes;
0076
0077
0078 int nr_vcpus;
0079 };
0080
0081 static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
0082 {
0083 uint64_t value;
0084 off_t offset = index * sizeof(value);
0085
0086 TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
0087 "pread from %s offset 0x%" PRIx64 " failed!",
0088 filename, offset);
0089
0090 return value;
0091
0092 }
0093
0094 #define PAGEMAP_PRESENT (1ULL << 63)
0095 #define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
0096
0097 static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
0098 {
0099 uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
0100 uint64_t entry;
0101 uint64_t pfn;
0102
0103 entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
0104 if (!(entry & PAGEMAP_PRESENT))
0105 return 0;
0106
0107 pfn = entry & PAGEMAP_PFN_MASK;
0108 __TEST_REQUIRE(pfn, "Looking up PFNs requires CAP_SYS_ADMIN");
0109
0110 return pfn;
0111 }
0112
0113 static bool is_page_idle(int page_idle_fd, uint64_t pfn)
0114 {
0115 uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
0116
0117 return !!((bits >> (pfn % 64)) & 1);
0118 }
0119
0120 static void mark_page_idle(int page_idle_fd, uint64_t pfn)
0121 {
0122 uint64_t bits = 1ULL << (pfn % 64);
0123
0124 TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
0125 "Set page_idle bits for PFN 0x%" PRIx64, pfn);
0126 }
0127
0128 static void mark_vcpu_memory_idle(struct kvm_vm *vm,
0129 struct perf_test_vcpu_args *vcpu_args)
0130 {
0131 int vcpu_idx = vcpu_args->vcpu_idx;
0132 uint64_t base_gva = vcpu_args->gva;
0133 uint64_t pages = vcpu_args->pages;
0134 uint64_t page;
0135 uint64_t still_idle = 0;
0136 uint64_t no_pfn = 0;
0137 int page_idle_fd;
0138 int pagemap_fd;
0139
0140
0141 if (overlap_memory_access && vcpu_idx)
0142 return;
0143
0144 page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
0145 TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
0146
0147 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
0148 TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
0149
0150 for (page = 0; page < pages; page++) {
0151 uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
0152 uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
0153
0154 if (!pfn) {
0155 no_pfn++;
0156 continue;
0157 }
0158
0159 if (is_page_idle(page_idle_fd, pfn)) {
0160 still_idle++;
0161 continue;
0162 }
0163
0164 mark_page_idle(page_idle_fd, pfn);
0165 }
0166
0167
0168
0169
0170
0171 TEST_ASSERT(no_pfn < pages / 100,
0172 "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
0173 vcpu_idx, no_pfn, pages);
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188 if (still_idle < pages / 10)
0189 printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64
0190 "out of %" PRIu64 "), this will affect performance results"
0191 ".\n",
0192 vcpu_idx, still_idle, pages);
0193
0194 close(page_idle_fd);
0195 close(pagemap_fd);
0196 }
0197
0198 static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
0199 {
0200 struct ucall uc;
0201 uint64_t actual_ucall = get_ucall(vcpu, &uc);
0202
0203 TEST_ASSERT(expected_ucall == actual_ucall,
0204 "Guest exited unexpectedly (expected ucall %" PRIu64
0205 ", got %" PRIu64 ")",
0206 expected_ucall, actual_ucall);
0207 }
0208
0209 static bool spin_wait_for_next_iteration(int *current_iteration)
0210 {
0211 int last_iteration = *current_iteration;
0212
0213 do {
0214 if (READ_ONCE(done))
0215 return false;
0216
0217 *current_iteration = READ_ONCE(iteration);
0218 } while (last_iteration == *current_iteration);
0219
0220 return true;
0221 }
0222
0223 static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
0224 {
0225 struct kvm_vcpu *vcpu = vcpu_args->vcpu;
0226 struct kvm_vm *vm = perf_test_args.vm;
0227 int vcpu_idx = vcpu_args->vcpu_idx;
0228 int current_iteration = 0;
0229
0230 while (spin_wait_for_next_iteration(¤t_iteration)) {
0231 switch (READ_ONCE(iteration_work)) {
0232 case ITERATION_ACCESS_MEMORY:
0233 vcpu_run(vcpu);
0234 assert_ucall(vcpu, UCALL_SYNC);
0235 break;
0236 case ITERATION_MARK_IDLE:
0237 mark_vcpu_memory_idle(vm, vcpu_args);
0238 break;
0239 };
0240
0241 vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
0242 }
0243 }
0244
0245 static void spin_wait_for_vcpu(int vcpu_idx, int target_iteration)
0246 {
0247 while (READ_ONCE(vcpu_last_completed_iteration[vcpu_idx]) !=
0248 target_iteration) {
0249 continue;
0250 }
0251 }
0252
0253
0254 enum access_type {
0255 ACCESS_READ,
0256 ACCESS_WRITE,
0257 };
0258
0259 static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *description)
0260 {
0261 struct timespec ts_start;
0262 struct timespec ts_elapsed;
0263 int next_iteration, i;
0264
0265
0266 next_iteration = ++iteration;
0267
0268 clock_gettime(CLOCK_MONOTONIC, &ts_start);
0269
0270
0271 for (i = 0; i < nr_vcpus; i++)
0272 spin_wait_for_vcpu(i, next_iteration);
0273
0274 ts_elapsed = timespec_elapsed(ts_start);
0275 pr_info("%-30s: %ld.%09lds\n",
0276 description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
0277 }
0278
0279 static void access_memory(struct kvm_vm *vm, int nr_vcpus,
0280 enum access_type access, const char *description)
0281 {
0282 perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
0283 iteration_work = ITERATION_ACCESS_MEMORY;
0284 run_iteration(vm, nr_vcpus, description);
0285 }
0286
0287 static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
0288 {
0289
0290
0291
0292
0293
0294
0295 pr_debug("Marking VM memory idle (slow)...\n");
0296 iteration_work = ITERATION_MARK_IDLE;
0297 run_iteration(vm, nr_vcpus, "Mark memory idle");
0298 }
0299
0300 static void run_test(enum vm_guest_mode mode, void *arg)
0301 {
0302 struct test_params *params = arg;
0303 struct kvm_vm *vm;
0304 int nr_vcpus = params->nr_vcpus;
0305
0306 vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
0307 params->backing_src, !overlap_memory_access);
0308
0309 perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
0310
0311 pr_info("\n");
0312 access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
0313
0314
0315 access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
0316 access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
0317
0318
0319 mark_memory_idle(vm, nr_vcpus);
0320 access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to idle memory");
0321 mark_memory_idle(vm, nr_vcpus);
0322 access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
0323
0324
0325 done = true;
0326
0327 perf_test_join_vcpu_threads(nr_vcpus);
0328 perf_test_destroy_vm(vm);
0329 }
0330
0331 static void help(char *name)
0332 {
0333 puts("");
0334 printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
0335 name);
0336 puts("");
0337 printf(" -h: Display this help message.");
0338 guest_modes_help();
0339 printf(" -b: specify the size of the memory region which should be\n"
0340 " dirtied by each vCPU. e.g. 10M or 3G.\n"
0341 " (default: 1G)\n");
0342 printf(" -v: specify the number of vCPUs to run.\n");
0343 printf(" -o: Overlap guest memory accesses instead of partitioning\n"
0344 " them into a separate region of memory for each vCPU.\n");
0345 backing_src_help("-s");
0346 puts("");
0347 exit(0);
0348 }
0349
0350 int main(int argc, char *argv[])
0351 {
0352 struct test_params params = {
0353 .backing_src = DEFAULT_VM_MEM_SRC,
0354 .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
0355 .nr_vcpus = 1,
0356 };
0357 int page_idle_fd;
0358 int opt;
0359
0360 guest_modes_append_default();
0361
0362 while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
0363 switch (opt) {
0364 case 'm':
0365 guest_modes_cmdline(optarg);
0366 break;
0367 case 'b':
0368 params.vcpu_memory_bytes = parse_size(optarg);
0369 break;
0370 case 'v':
0371 params.nr_vcpus = atoi(optarg);
0372 break;
0373 case 'o':
0374 overlap_memory_access = true;
0375 break;
0376 case 's':
0377 params.backing_src = parse_backing_src_type(optarg);
0378 break;
0379 case 'h':
0380 default:
0381 help(argv[0]);
0382 break;
0383 }
0384 }
0385
0386 page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
0387 __TEST_REQUIRE(page_idle_fd >= 0,
0388 "CONFIG_IDLE_PAGE_TRACKING is not enabled");
0389 close(page_idle_fd);
0390
0391 for_each_guest_mode(run_test, ¶ms);
0392
0393 return 0;
0394 }