Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
0004  *
0005  * This test starts a transaction and triggers a signal, forcing a pagefault to
0006  * happen when the kernel signal handling code touches the user signal stack.
0007  *
0008  * In order to avoid pre-faulting the signal stack memory and to force the
0009  * pagefault to happen precisely in the kernel signal handling code, the
0010  * pagefault handling is done in userspace using the userfaultfd facility.
0011  *
0012  * Further pagefaults are triggered by crafting the signal handler's ucontext
0013  * to point to additional memory regions managed by the userfaultfd, so using
0014  * the same mechanism used to avoid pre-faulting the signal stack memory.
0015  *
0016  * On failure (bug is present) kernel crashes or never returns control back to
0017  * userspace. If bug is not present, tests completes almost immediately.
0018  */
0019 
0020 #include <stdio.h>
0021 #include <stdlib.h>
0022 #include <string.h>
0023 #include <linux/userfaultfd.h>
0024 #include <poll.h>
0025 #include <unistd.h>
0026 #include <sys/ioctl.h>
0027 #include <sys/syscall.h>
0028 #include <fcntl.h>
0029 #include <sys/mman.h>
0030 #include <pthread.h>
0031 #include <signal.h>
0032 #include <errno.h>
0033 
0034 #include "tm.h"
0035 
0036 
0037 #define UF_MEM_SIZE 655360  /* 10 x 64k pages */
0038 
0039 /* Memory handled by userfaultfd */
0040 static char *uf_mem;
0041 static size_t uf_mem_offset = 0;
0042 
0043 /*
0044  * Data that will be copied into the faulting pages (instead of zero-filled
0045  * pages). This is used to make the test more reliable and avoid segfaulting
0046  * when we return from the signal handler. Since we are making the signal
0047  * handler's ucontext point to newly allocated memory, when that memory is
0048  * paged-in it will contain the expected content.
0049  */
0050 static char backing_mem[UF_MEM_SIZE];
0051 
0052 static size_t pagesize;
0053 
0054 /*
0055  * Return a chunk of at least 'size' bytes of memory that will be handled by
0056  * userfaultfd. If 'backing_data' is not NULL, its content will be save to
0057  * 'backing_mem' and then copied into the faulting pages when the page fault
0058  * is handled.
0059  */
0060 void *get_uf_mem(size_t size, void *backing_data)
0061 {
0062     void *ret;
0063 
0064     if (uf_mem_offset + size > UF_MEM_SIZE) {
0065         fprintf(stderr, "Requesting more uf_mem than expected!\n");
0066         exit(EXIT_FAILURE);
0067     }
0068 
0069     ret = &uf_mem[uf_mem_offset];
0070 
0071     /* Save the data that will be copied into the faulting page */
0072     if (backing_data != NULL)
0073         memcpy(&backing_mem[uf_mem_offset], backing_data, size);
0074 
0075     /* Reserve the requested amount of uf_mem */
0076     uf_mem_offset += size;
0077     /* Keep uf_mem_offset aligned to the page size (round up) */
0078     uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
0079 
0080     return ret;
0081 }
0082 
0083 void *fault_handler_thread(void *arg)
0084 {
0085     struct uffd_msg msg;    /* Data read from userfaultfd */
0086     long uffd;      /* userfaultfd file descriptor */
0087     struct uffdio_copy uffdio_copy;
0088     struct pollfd pollfd;
0089     ssize_t nread, offset;
0090 
0091     uffd = (long) arg;
0092 
0093     for (;;) {
0094         pollfd.fd = uffd;
0095         pollfd.events = POLLIN;
0096         if (poll(&pollfd, 1, -1) == -1) {
0097             perror("poll() failed");
0098             exit(EXIT_FAILURE);
0099         }
0100 
0101         nread = read(uffd, &msg, sizeof(msg));
0102         if (nread == 0) {
0103             fprintf(stderr, "read(): EOF on userfaultfd\n");
0104             exit(EXIT_FAILURE);
0105         }
0106 
0107         if (nread == -1) {
0108             perror("read() failed");
0109             exit(EXIT_FAILURE);
0110         }
0111 
0112         /* We expect only one kind of event */
0113         if (msg.event != UFFD_EVENT_PAGEFAULT) {
0114             fprintf(stderr, "Unexpected event on userfaultfd\n");
0115             exit(EXIT_FAILURE);
0116         }
0117 
0118         /*
0119          * We need to handle page faults in units of pages(!).
0120          * So, round faulting address down to page boundary.
0121          */
0122         uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
0123 
0124         offset = (char *) uffdio_copy.dst - uf_mem;
0125         uffdio_copy.src = (unsigned long) &backing_mem[offset];
0126 
0127         uffdio_copy.len = pagesize;
0128         uffdio_copy.mode = 0;
0129         uffdio_copy.copy = 0;
0130         if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
0131             perror("ioctl-UFFDIO_COPY failed");
0132             exit(EXIT_FAILURE);
0133         }
0134     }
0135 }
0136 
0137 void setup_uf_mem(void)
0138 {
0139     long uffd;      /* userfaultfd file descriptor */
0140     pthread_t thr;
0141     struct uffdio_api uffdio_api;
0142     struct uffdio_register uffdio_register;
0143     int ret;
0144 
0145     pagesize = sysconf(_SC_PAGE_SIZE);
0146 
0147     /* Create and enable userfaultfd object */
0148     uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
0149     if (uffd == -1) {
0150         perror("userfaultfd() failed");
0151         exit(EXIT_FAILURE);
0152     }
0153     uffdio_api.api = UFFD_API;
0154     uffdio_api.features = 0;
0155     if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
0156         perror("ioctl-UFFDIO_API failed");
0157         exit(EXIT_FAILURE);
0158     }
0159 
0160     /*
0161      * Create a private anonymous mapping. The memory will be demand-zero
0162      * paged, that is, not yet allocated. When we actually touch the memory
0163      * the related page will be allocated via the userfaultfd mechanism.
0164      */
0165     uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
0166               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
0167     if (uf_mem == MAP_FAILED) {
0168         perror("mmap() failed");
0169         exit(EXIT_FAILURE);
0170     }
0171 
0172     /*
0173      * Register the memory range of the mapping we've just mapped to be
0174      * handled by the userfaultfd object. In 'mode' we request to track
0175      * missing pages (i.e. pages that have not yet been faulted-in).
0176      */
0177     uffdio_register.range.start = (unsigned long) uf_mem;
0178     uffdio_register.range.len = UF_MEM_SIZE;
0179     uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
0180     if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
0181         perror("ioctl-UFFDIO_REGISTER");
0182         exit(EXIT_FAILURE);
0183     }
0184 
0185     /* Create a thread that will process the userfaultfd events */
0186     ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
0187     if (ret != 0) {
0188         fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
0189         exit(EXIT_FAILURE);
0190     }
0191 }
0192 
0193 /*
0194  * Assumption: the signal was delivered while userspace was in transactional or
0195  * suspended state, i.e. uc->uc_link != NULL.
0196  */
0197 void signal_handler(int signo, siginfo_t *si, void *uc)
0198 {
0199     ucontext_t *ucp = uc;
0200 
0201     /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
0202     ucp->uc_link->uc_mcontext.regs->nip += 4;
0203 
0204     ucp->uc_mcontext.v_regs =
0205         get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
0206 
0207     ucp->uc_link->uc_mcontext.v_regs =
0208         get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
0209 
0210     ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
0211 }
0212 
0213 bool have_userfaultfd(void)
0214 {
0215     long rc;
0216 
0217     errno = 0;
0218     rc = syscall(__NR_userfaultfd, -1);
0219 
0220     return rc == 0 || errno != ENOSYS;
0221 }
0222 
0223 int tm_signal_pagefault(void)
0224 {
0225     struct sigaction sa;
0226     stack_t ss;
0227 
0228     SKIP_IF(!have_htm());
0229     SKIP_IF(htm_is_synthetic());
0230     SKIP_IF(!have_userfaultfd());
0231 
0232     setup_uf_mem();
0233 
0234     /*
0235      * Set an alternative stack that will generate a page fault when the
0236      * signal is raised. The page fault will be treated via userfaultfd,
0237      * i.e. via fault_handler_thread.
0238      */
0239     ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
0240     ss.ss_size = SIGSTKSZ;
0241     ss.ss_flags = 0;
0242     if (sigaltstack(&ss, NULL) == -1) {
0243         perror("sigaltstack() failed");
0244         exit(EXIT_FAILURE);
0245     }
0246 
0247     sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
0248     sa.sa_sigaction = signal_handler;
0249     if (sigaction(SIGTRAP, &sa, NULL) == -1) {
0250         perror("sigaction() failed");
0251         exit(EXIT_FAILURE);
0252     }
0253 
0254     /* Trigger a SIGTRAP in transactional state */
0255     asm __volatile__(
0256             "tbegin.;"
0257             "beq    1f;"
0258             "trap;"
0259             "1: ;"
0260             : : : "memory");
0261 
0262     /* Trigger a SIGTRAP in suspended state */
0263     asm __volatile__(
0264             "tbegin.;"
0265             "beq    1f;"
0266             "tsuspend.;"
0267             "trap;"
0268             "tresume.;"
0269             "1: ;"
0270             : : : "memory");
0271 
0272     return EXIT_SUCCESS;
0273 }
0274 
0275 int main(int argc, char **argv)
0276 {
0277     /*
0278      * Depending on kernel config, the TM Bad Thing might not result in a
0279      * crash, instead the kernel never returns control back to userspace, so
0280      * set a tight timeout. If the test passes it completes almost
0281      * immediately.
0282      */
0283     test_harness_set_timeout(2);
0284     return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
0285 }