![]() |
|
|||
0001 // SPDX-License-Identifier: GPL-2.0-only 0002 #define _GNU_SOURCE /* for program_invocation_short_name */ 0003 #include <errno.h> 0004 #include <fcntl.h> 0005 #include <pthread.h> 0006 #include <sched.h> 0007 #include <stdio.h> 0008 #include <stdlib.h> 0009 #include <string.h> 0010 #include <signal.h> 0011 #include <syscall.h> 0012 #include <sys/ioctl.h> 0013 #include <sys/sysinfo.h> 0014 #include <asm/barrier.h> 0015 #include <linux/atomic.h> 0016 #include <linux/rseq.h> 0017 #include <linux/unistd.h> 0018 0019 #include "kvm_util.h" 0020 #include "processor.h" 0021 #include "test_util.h" 0022 0023 #include "../rseq/rseq.c" 0024 0025 /* 0026 * Any bug related to task migration is likely to be timing-dependent; perform 0027 * a large number of migrations to reduce the odds of a false negative. 0028 */ 0029 #define NR_TASK_MIGRATIONS 100000 0030 0031 static pthread_t migration_thread; 0032 static cpu_set_t possible_mask; 0033 static int min_cpu, max_cpu; 0034 static bool done; 0035 0036 static atomic_t seq_cnt; 0037 0038 static void guest_code(void) 0039 { 0040 for (;;) 0041 GUEST_SYNC(0); 0042 } 0043 0044 /* 0045 * We have to perform direct system call for getcpu() because it's 0046 * not available until glic 2.29. 0047 */ 0048 static void sys_getcpu(unsigned *cpu) 0049 { 0050 int r; 0051 0052 r = syscall(__NR_getcpu, cpu, NULL, NULL); 0053 TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno)); 0054 } 0055 0056 static int next_cpu(int cpu) 0057 { 0058 /* 0059 * Advance to the next CPU, skipping those that weren't in the original 0060 * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's 0061 * data storage is considered as opaque. Note, if this task is pinned 0062 * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will 0063 * burn a lot cycles and the test will take longer than normal to 0064 * complete. 0065 */ 0066 do { 0067 cpu++; 0068 if (cpu > max_cpu) { 0069 cpu = min_cpu; 0070 TEST_ASSERT(CPU_ISSET(cpu, &possible_mask), 0071 "Min CPU = %d must always be usable", cpu); 0072 break; 0073 } 0074 } while (!CPU_ISSET(cpu, &possible_mask)); 0075 0076 return cpu; 0077 } 0078 0079 static void *migration_worker(void *__rseq_tid) 0080 { 0081 pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; 0082 cpu_set_t allowed_mask; 0083 int r, i, cpu; 0084 0085 CPU_ZERO(&allowed_mask); 0086 0087 for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) { 0088 CPU_SET(cpu, &allowed_mask); 0089 0090 /* 0091 * Bump the sequence count twice to allow the reader to detect 0092 * that a migration may have occurred in between rseq and sched 0093 * CPU ID reads. An odd sequence count indicates a migration 0094 * is in-progress, while a completely different count indicates 0095 * a migration occurred since the count was last read. 0096 */ 0097 atomic_inc(&seq_cnt); 0098 0099 /* 0100 * Ensure the odd count is visible while getcpu() isn't 0101 * stable, i.e. while changing affinity is in-progress. 0102 */ 0103 smp_wmb(); 0104 r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); 0105 TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", 0106 errno, strerror(errno)); 0107 smp_wmb(); 0108 atomic_inc(&seq_cnt); 0109 0110 CPU_CLR(cpu, &allowed_mask); 0111 0112 /* 0113 * Wait 1-10us before proceeding to the next iteration and more 0114 * specifically, before bumping seq_cnt again. A delay is 0115 * needed on three fronts: 0116 * 0117 * 1. To allow sched_setaffinity() to prompt migration before 0118 * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME 0119 * (or TIF_NEED_RESCHED, which indirectly leads to handling 0120 * NOTIFY_RESUME) is handled in KVM context. 0121 * 0122 * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters 0123 * the guest, the guest will trigger a IO/MMIO exit all the 0124 * way to userspace and the TIF flags will be handled by 0125 * the generic "exit to userspace" logic, not by KVM. The 0126 * exit to userspace is necessary to give the test a chance 0127 * to check the rseq CPU ID (see #2). 0128 * 0129 * Alternatively, guest_code() could include an instruction 0130 * to trigger an exit that is handled by KVM, but any such 0131 * exit requires architecture specific code. 0132 * 0133 * 2. To let ioctl(KVM_RUN) make its way back to the test 0134 * before the next round of migration. The test's check on 0135 * the rseq CPU ID must wait for migration to complete in 0136 * order to avoid false positive, thus any kernel rseq bug 0137 * will be missed if the next migration starts before the 0138 * check completes. 0139 * 0140 * 3. To ensure the read-side makes efficient forward progress, 0141 * e.g. if getcpu() involves a syscall. Stalling the read-side 0142 * means the test will spend more time waiting for getcpu() 0143 * to stabilize and less time trying to hit the timing-dependent 0144 * bug. 0145 * 0146 * Because any bug in this area is likely to be timing-dependent, 0147 * run with a range of delays at 1us intervals from 1us to 10us 0148 * as a best effort to avoid tuning the test to the point where 0149 * it can hit _only_ the original bug and not detect future 0150 * regressions. 0151 * 0152 * The original bug can reproduce with a delay up to ~500us on 0153 * x86-64, but starts to require more iterations to reproduce 0154 * as the delay creeps above ~10us, and the average runtime of 0155 * each iteration obviously increases as well. Cap the delay 0156 * at 10us to keep test runtime reasonable while minimizing 0157 * potential coverage loss. 0158 * 0159 * The lower bound for reproducing the bug is likely below 1us, 0160 * e.g. failures occur on x86-64 with nanosleep(0), but at that 0161 * point the overhead of the syscall likely dominates the delay. 0162 * Use usleep() for simplicity and to avoid unnecessary kernel 0163 * dependencies. 0164 */ 0165 usleep((i % 10) + 1); 0166 } 0167 done = true; 0168 return NULL; 0169 } 0170 0171 static void calc_min_max_cpu(void) 0172 { 0173 int i, cnt, nproc; 0174 0175 TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2); 0176 0177 /* 0178 * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that 0179 * this task is affined to in order to reduce the time spent querying 0180 * unusable CPUs, e.g. if this task is pinned to a small percentage of 0181 * total CPUs. 0182 */ 0183 nproc = get_nprocs_conf(); 0184 min_cpu = -1; 0185 max_cpu = -1; 0186 cnt = 0; 0187 0188 for (i = 0; i < nproc; i++) { 0189 if (!CPU_ISSET(i, &possible_mask)) 0190 continue; 0191 if (min_cpu == -1) 0192 min_cpu = i; 0193 max_cpu = i; 0194 cnt++; 0195 } 0196 0197 __TEST_REQUIRE(cnt >= 2, 0198 "Only one usable CPU, task migration not possible"); 0199 } 0200 0201 int main(int argc, char *argv[]) 0202 { 0203 int r, i, snapshot; 0204 struct kvm_vm *vm; 0205 struct kvm_vcpu *vcpu; 0206 u32 cpu, rseq_cpu; 0207 0208 /* Tell stdout not to buffer its content */ 0209 setbuf(stdout, NULL); 0210 0211 r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 0212 TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, 0213 strerror(errno)); 0214 0215 calc_min_max_cpu(); 0216 0217 r = rseq_register_current_thread(); 0218 TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)", 0219 errno, strerror(errno)); 0220 0221 /* 0222 * Create and run a dummy VM that immediately exits to userspace via 0223 * GUEST_SYNC, while concurrently migrating the process by setting its 0224 * CPU affinity. 0225 */ 0226 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 0227 ucall_init(vm, NULL); 0228 0229 pthread_create(&migration_thread, NULL, migration_worker, 0230 (void *)(unsigned long)syscall(SYS_gettid)); 0231 0232 for (i = 0; !done; i++) { 0233 vcpu_run(vcpu); 0234 TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, 0235 "Guest failed?"); 0236 0237 /* 0238 * Verify rseq's CPU matches sched's CPU. Ensure migration 0239 * doesn't occur between getcpu() and reading the rseq cpu_id 0240 * by rereading both if the sequence count changes, or if the 0241 * count is odd (migration in-progress). 0242 */ 0243 do { 0244 /* 0245 * Drop bit 0 to force a mismatch if the count is odd, 0246 * i.e. if a migration is in-progress. 0247 */ 0248 snapshot = atomic_read(&seq_cnt) & ~1; 0249 0250 /* 0251 * Ensure calling getcpu() and reading rseq.cpu_id complete 0252 * in a single "no migration" window, i.e. are not reordered 0253 * across the seq_cnt reads. 0254 */ 0255 smp_rmb(); 0256 sys_getcpu(&cpu); 0257 rseq_cpu = rseq_current_cpu_raw(); 0258 smp_rmb(); 0259 } while (snapshot != atomic_read(&seq_cnt)); 0260 0261 TEST_ASSERT(rseq_cpu == cpu, 0262 "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); 0263 } 0264 0265 /* 0266 * Sanity check that the test was able to enter the guest a reasonable 0267 * number of times, e.g. didn't get stalled too often/long waiting for 0268 * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly 0269 * conservative ratio on x86-64, which can do _more_ KVM_RUNs than 0270 * migrations given the 1us+ delay in the migration task. 0271 */ 0272 TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), 0273 "Only performed %d KVM_RUNs, task stalled too much?\n", i); 0274 0275 pthread_join(migration_thread, NULL); 0276 0277 kvm_vm_free(vm); 0278 0279 rseq_unregister_current_thread(); 0280 0281 return 0; 0282 }
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |
![]() ![]() |