Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * xapic_ipi_test
0004  *
0005  * Copyright (C) 2020, Google LLC.
0006  *
0007  * This work is licensed under the terms of the GNU GPL, version 2.
0008  *
0009  * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
0010  * another vCPU that is halted when KVM's backing page for the APIC access
0011  * address has been moved by mm.
0012  *
0013  * The test starts two vCPUs: one that sends IPIs and one that continually
0014  * executes HLT. The sender checks that the halter has woken from the HLT and
0015  * has reentered HLT before sending the next IPI. While the vCPUs are running,
0016  * the host continually calls migrate_pages to move all of the process' pages
0017  * amongst the available numa nodes on the machine.
0018  *
0019  * Migration is a command line option. When used on non-numa machines will 
0020  * exit with error. Test is still usefull on non-numa for testing IPIs.
0021  */
0022 
0023 #define _GNU_SOURCE /* for program_invocation_short_name */
0024 #include <getopt.h>
0025 #include <pthread.h>
0026 #include <inttypes.h>
0027 #include <string.h>
0028 #include <time.h>
0029 
0030 #include "kvm_util.h"
0031 #include "numaif.h"
0032 #include "processor.h"
0033 #include "test_util.h"
0034 #include "vmx.h"
0035 
0036 /* Default running time for the test */
0037 #define DEFAULT_RUN_SECS 3
0038 
0039 /* Default delay between migrate_pages calls (microseconds) */
0040 #define DEFAULT_DELAY_USECS 500000
0041 
0042 /*
0043  * Vector for IPI from sender vCPU to halting vCPU.
0044  * Value is arbitrary and was chosen for the alternating bit pattern. Any
0045  * value should work.
0046  */
0047 #define IPI_VECTOR   0xa5
0048 
0049 /*
0050  * Incremented in the IPI handler. Provides evidence to the sender that the IPI
0051  * arrived at the destination
0052  */
0053 static volatile uint64_t ipis_rcvd;
0054 
0055 /* Data struct shared between host main thread and vCPUs */
0056 struct test_data_page {
0057     uint32_t halter_apic_id;
0058     volatile uint64_t hlt_count;
0059     volatile uint64_t wake_count;
0060     uint64_t ipis_sent;
0061     uint64_t migrations_attempted;
0062     uint64_t migrations_completed;
0063     uint32_t icr;
0064     uint32_t icr2;
0065     uint32_t halter_tpr;
0066     uint32_t halter_ppr;
0067 
0068     /*
0069      *  Record local version register as a cross-check that APIC access
0070      *  worked. Value should match what KVM reports (APIC_VERSION in
0071      *  arch/x86/kvm/lapic.c). If test is failing, check that values match
0072      *  to determine whether APIC access exits are working.
0073      */
0074     uint32_t halter_lvr;
0075 };
0076 
0077 struct thread_params {
0078     struct test_data_page *data;
0079     struct kvm_vcpu *vcpu;
0080     uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
0081 };
0082 
0083 void verify_apic_base_addr(void)
0084 {
0085     uint64_t msr = rdmsr(MSR_IA32_APICBASE);
0086     uint64_t base = GET_APIC_BASE(msr);
0087 
0088     GUEST_ASSERT(base == APIC_DEFAULT_GPA);
0089 }
0090 
0091 static void halter_guest_code(struct test_data_page *data)
0092 {
0093     verify_apic_base_addr();
0094     xapic_enable();
0095 
0096     data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
0097     data->halter_lvr = xapic_read_reg(APIC_LVR);
0098 
0099     /*
0100      * Loop forever HLTing and recording halts & wakes. Disable interrupts
0101      * each time around to minimize window between signaling the pending
0102      * halt to the sender vCPU and executing the halt. No need to disable on
0103      * first run as this vCPU executes first and the host waits for it to
0104      * signal going into first halt before starting the sender vCPU. Record
0105      * TPR and PPR for diagnostic purposes in case the test fails.
0106      */
0107     for (;;) {
0108         data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
0109         data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
0110         data->hlt_count++;
0111         asm volatile("sti; hlt; cli");
0112         data->wake_count++;
0113     }
0114 }
0115 
0116 /*
0117  * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
0118  * enable diagnosing errant writes to the APIC access address backing page in
0119  * case of test failure.
0120  */
0121 static void guest_ipi_handler(struct ex_regs *regs)
0122 {
0123     ipis_rcvd++;
0124     xapic_write_reg(APIC_EOI, 77);
0125 }
0126 
0127 static void sender_guest_code(struct test_data_page *data)
0128 {
0129     uint64_t last_wake_count;
0130     uint64_t last_hlt_count;
0131     uint64_t last_ipis_rcvd_count;
0132     uint32_t icr_val;
0133     uint32_t icr2_val;
0134     uint64_t tsc_start;
0135 
0136     verify_apic_base_addr();
0137     xapic_enable();
0138 
0139     /*
0140      * Init interrupt command register for sending IPIs
0141      *
0142      * Delivery mode=fixed, per SDM:
0143      *   "Delivers the interrupt specified in the vector field to the target
0144      *    processor."
0145      *
0146      * Destination mode=physical i.e. specify target by its local APIC
0147      * ID. This vCPU assumes that the halter vCPU has already started and
0148      * set data->halter_apic_id.
0149      */
0150     icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
0151     icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
0152     data->icr = icr_val;
0153     data->icr2 = icr2_val;
0154 
0155     last_wake_count = data->wake_count;
0156     last_hlt_count = data->hlt_count;
0157     last_ipis_rcvd_count = ipis_rcvd;
0158     for (;;) {
0159         /*
0160          * Send IPI to halter vCPU.
0161          * First IPI can be sent unconditionally because halter vCPU
0162          * starts earlier.
0163          */
0164         xapic_write_reg(APIC_ICR2, icr2_val);
0165         xapic_write_reg(APIC_ICR, icr_val);
0166         data->ipis_sent++;
0167 
0168         /*
0169          * Wait up to ~1 sec for halter to indicate that it has:
0170          * 1. Received the IPI
0171          * 2. Woken up from the halt
0172          * 3. Gone back into halt
0173          * Current CPUs typically run at 2.x Ghz which is ~2
0174          * billion ticks per second.
0175          */
0176         tsc_start = rdtsc();
0177         while (rdtsc() - tsc_start < 2000000000) {
0178             if ((ipis_rcvd != last_ipis_rcvd_count) &&
0179                 (data->wake_count != last_wake_count) &&
0180                 (data->hlt_count != last_hlt_count))
0181                 break;
0182         }
0183 
0184         GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
0185                  (data->wake_count != last_wake_count) &&
0186                  (data->hlt_count != last_hlt_count));
0187 
0188         last_wake_count = data->wake_count;
0189         last_hlt_count = data->hlt_count;
0190         last_ipis_rcvd_count = ipis_rcvd;
0191     }
0192 }
0193 
0194 static void *vcpu_thread(void *arg)
0195 {
0196     struct thread_params *params = (struct thread_params *)arg;
0197     struct kvm_vcpu *vcpu = params->vcpu;
0198     struct ucall uc;
0199     int old;
0200     int r;
0201     unsigned int exit_reason;
0202 
0203     r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
0204     TEST_ASSERT(r == 0,
0205             "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
0206             vcpu->id, r);
0207 
0208     fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
0209     vcpu_run(vcpu);
0210     exit_reason = vcpu->run->exit_reason;
0211 
0212     TEST_ASSERT(exit_reason == KVM_EXIT_IO,
0213             "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
0214             vcpu->id, exit_reason, exit_reason_str(exit_reason));
0215 
0216     if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
0217         TEST_ASSERT(false,
0218                 "vCPU %u exited with error: %s.\n"
0219                 "Sending vCPU sent %lu IPIs to halting vCPU\n"
0220                 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
0221                 "Halter TPR=%#x PPR=%#x LVR=%#x\n"
0222                 "Migrations attempted: %lu\n"
0223                 "Migrations completed: %lu\n",
0224                 vcpu->id, (const char *)uc.args[0],
0225                 params->data->ipis_sent, params->data->hlt_count,
0226                 params->data->wake_count,
0227                 *params->pipis_rcvd, params->data->halter_tpr,
0228                 params->data->halter_ppr, params->data->halter_lvr,
0229                 params->data->migrations_attempted,
0230                 params->data->migrations_completed);
0231     }
0232 
0233     return NULL;
0234 }
0235 
0236 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
0237 {
0238     void *retval;
0239     int r;
0240 
0241     r = pthread_cancel(thread);
0242     TEST_ASSERT(r == 0,
0243             "pthread_cancel on vcpu_id=%d failed with errno=%d",
0244             vcpu->id, r);
0245 
0246     r = pthread_join(thread, &retval);
0247     TEST_ASSERT(r == 0,
0248             "pthread_join on vcpu_id=%d failed with errno=%d",
0249             vcpu->id, r);
0250     TEST_ASSERT(retval == PTHREAD_CANCELED,
0251             "expected retval=%p, got %p", PTHREAD_CANCELED,
0252             retval);
0253 }
0254 
0255 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
0256            uint64_t *pipis_rcvd)
0257 {
0258     long pages_not_moved;
0259     unsigned long nodemask = 0;
0260     unsigned long nodemasks[sizeof(nodemask) * 8];
0261     int nodes = 0;
0262     time_t start_time, last_update, now;
0263     time_t interval_secs = 1;
0264     int i, r;
0265     int from, to;
0266     unsigned long bit;
0267     uint64_t hlt_count;
0268     uint64_t wake_count;
0269     uint64_t ipis_sent;
0270 
0271     fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
0272         delay_usecs);
0273 
0274     /* Get set of first 64 numa nodes available */
0275     r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0276               0, MPOL_F_MEMS_ALLOWED);
0277     TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
0278 
0279     fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
0280         "(each 1-bit indicates node is present): %#lx\n",
0281         sizeof(nodemask) * 8, nodemask);
0282 
0283     /* Init array of masks containing a single-bit in each, one for each
0284      * available node. migrate_pages called below requires specifying nodes
0285      * as bit masks.
0286      */
0287     for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
0288         if (nodemask & bit) {
0289             nodemasks[nodes] = nodemask & bit;
0290             nodes++;
0291         }
0292     }
0293 
0294     TEST_ASSERT(nodes > 1,
0295             "Did not find at least 2 numa nodes. Can't do migration\n");
0296 
0297     fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
0298 
0299     from = 0;
0300     to = 1;
0301     start_time = time(NULL);
0302     last_update = start_time;
0303 
0304     ipis_sent = data->ipis_sent;
0305     hlt_count = data->hlt_count;
0306     wake_count = data->wake_count;
0307 
0308     while ((int)(time(NULL) - start_time) < run_secs) {
0309         data->migrations_attempted++;
0310 
0311         /*
0312          * migrate_pages with PID=0 will migrate all pages of this
0313          * process between the nodes specified as bitmasks. The page
0314          * backing the APIC access address belongs to this process
0315          * because it is allocated by KVM in the context of the
0316          * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
0317          * test may break or give a false positive signal.
0318          */
0319         pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
0320                         &nodemasks[from],
0321                         &nodemasks[to]);
0322         if (pages_not_moved < 0)
0323             fprintf(stderr,
0324                 "migrate_pages failed, errno=%d\n", errno);
0325         else if (pages_not_moved > 0)
0326             fprintf(stderr,
0327                 "migrate_pages could not move %ld pages\n",
0328                 pages_not_moved);
0329         else
0330             data->migrations_completed++;
0331 
0332         from = to;
0333         to++;
0334         if (to == nodes)
0335             to = 0;
0336 
0337         now = time(NULL);
0338         if (((now - start_time) % interval_secs == 0) &&
0339             (now != last_update)) {
0340             last_update = now;
0341             fprintf(stderr,
0342                 "%lu seconds: Migrations attempted=%lu completed=%lu, "
0343                 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
0344                 now - start_time, data->migrations_attempted,
0345                 data->migrations_completed,
0346                 data->ipis_sent, *pipis_rcvd,
0347                 data->hlt_count, data->wake_count);
0348 
0349             TEST_ASSERT(ipis_sent != data->ipis_sent &&
0350                     hlt_count != data->hlt_count &&
0351                     wake_count != data->wake_count,
0352                     "IPI, HLT and wake count have not increased "
0353                     "in the last %lu seconds. "
0354                     "HLTer is likely hung.\n", interval_secs);
0355 
0356             ipis_sent = data->ipis_sent;
0357             hlt_count = data->hlt_count;
0358             wake_count = data->wake_count;
0359         }
0360         usleep(delay_usecs);
0361     }
0362 }
0363 
0364 void get_cmdline_args(int argc, char *argv[], int *run_secs,
0365               bool *migrate, int *delay_usecs)
0366 {
0367     for (;;) {
0368         int opt = getopt(argc, argv, "s:d:m");
0369 
0370         if (opt == -1)
0371             break;
0372         switch (opt) {
0373         case 's':
0374             *run_secs = parse_size(optarg);
0375             break;
0376         case 'm':
0377             *migrate = true;
0378             break;
0379         case 'd':
0380             *delay_usecs = parse_size(optarg);
0381             break;
0382         default:
0383             TEST_ASSERT(false,
0384                     "Usage: -s <runtime seconds>. Default is %d seconds.\n"
0385                     "-m adds calls to migrate_pages while vCPUs are running."
0386                     " Default is no migrations.\n"
0387                     "-d <delay microseconds> - delay between migrate_pages() calls."
0388                     " Default is %d microseconds.\n",
0389                     DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
0390         }
0391     }
0392 }
0393 
0394 int main(int argc, char *argv[])
0395 {
0396     int r;
0397     int wait_secs;
0398     const int max_halter_wait = 10;
0399     int run_secs = 0;
0400     int delay_usecs = 0;
0401     struct test_data_page *data;
0402     vm_vaddr_t test_data_page_vaddr;
0403     bool migrate = false;
0404     pthread_t threads[2];
0405     struct thread_params params[2];
0406     struct kvm_vm *vm;
0407     uint64_t *pipis_rcvd;
0408 
0409     get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
0410     if (run_secs <= 0)
0411         run_secs = DEFAULT_RUN_SECS;
0412     if (delay_usecs <= 0)
0413         delay_usecs = DEFAULT_DELAY_USECS;
0414 
0415     vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
0416 
0417     vm_init_descriptor_tables(vm);
0418     vcpu_init_descriptor_tables(params[0].vcpu);
0419     vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
0420 
0421     virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
0422 
0423     params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
0424 
0425     test_data_page_vaddr = vm_vaddr_alloc_page(vm);
0426     data = addr_gva2hva(vm, test_data_page_vaddr);
0427     memset(data, 0, sizeof(*data));
0428     params[0].data = data;
0429     params[1].data = data;
0430 
0431     vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
0432     vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
0433 
0434     pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
0435     params[0].pipis_rcvd = pipis_rcvd;
0436     params[1].pipis_rcvd = pipis_rcvd;
0437 
0438     /* Start halter vCPU thread and wait for it to execute first HLT. */
0439     r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
0440     TEST_ASSERT(r == 0,
0441             "pthread_create halter failed errno=%d", errno);
0442     fprintf(stderr, "Halter vCPU thread started\n");
0443 
0444     wait_secs = 0;
0445     while ((wait_secs < max_halter_wait) && !data->hlt_count) {
0446         sleep(1);
0447         wait_secs++;
0448     }
0449 
0450     TEST_ASSERT(data->hlt_count,
0451             "Halter vCPU did not execute first HLT within %d seconds",
0452             max_halter_wait);
0453 
0454     fprintf(stderr,
0455         "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
0456         data->halter_apic_id, wait_secs);
0457 
0458     r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
0459     TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
0460 
0461     fprintf(stderr,
0462         "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
0463         run_secs);
0464 
0465     if (!migrate)
0466         sleep(run_secs);
0467     else
0468         do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
0469 
0470     /*
0471      * Cancel threads and wait for them to stop.
0472      */
0473     cancel_join_vcpu_thread(threads[0], params[0].vcpu);
0474     cancel_join_vcpu_thread(threads[1], params[1].vcpu);
0475 
0476     fprintf(stderr,
0477         "Test successful after running for %d seconds.\n"
0478         "Sending vCPU sent %lu IPIs to halting vCPU\n"
0479         "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
0480         "Halter APIC ID=%#x\n"
0481         "Sender ICR value=%#x ICR2 value=%#x\n"
0482         "Halter TPR=%#x PPR=%#x LVR=%#x\n"
0483         "Migrations attempted: %lu\n"
0484         "Migrations completed: %lu\n",
0485         run_secs, data->ipis_sent,
0486         data->hlt_count, data->wake_count, *pipis_rcvd,
0487         data->halter_apic_id,
0488         data->icr, data->icr2,
0489         data->halter_tpr, data->halter_ppr, data->halter_lvr,
0490         data->migrations_attempted, data->migrations_completed);
0491 
0492     kvm_vm_free(vm);
0493 
0494     return 0;
0495 }