0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #include <linux/init.h>
0027 #include <linux/init_task.h>
0028 #include <linux/kernel.h>
0029 #include <linux/percpu.h>
0030 #include <linux/gfp.h>
0031 #include <linux/random.h>
0032 #include <linux/pgtable.h>
0033 #include <asm/pgalloc.h>
0034 #include <asm/setup.h>
0035 #include <asm/espfix.h>
0036
0037
0038
0039
0040
0041 #define ESPFIX_STACK_SIZE (8*8UL)
0042 #define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE)
0043
0044
0045 #define ESPFIX_PAGE_SPACE (1UL << (P4D_SHIFT-PAGE_SHIFT-16))
0046
0047 #define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE)
0048 #if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
0049 # error "Need more virtual address space for the ESPFIX hack"
0050 #endif
0051
0052 #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
0053
0054
0055 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
0056 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
0057
0058
0059 static DEFINE_MUTEX(espfix_init_mutex);
0060
0061
0062 #define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
0063 static void *espfix_pages[ESPFIX_MAX_PAGES];
0064
0065 static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
0066 __aligned(PAGE_SIZE);
0067
0068 static unsigned int page_random, slot_random;
0069
0070
0071
0072
0073
0074
0075 static inline unsigned long espfix_base_addr(unsigned int cpu)
0076 {
0077 unsigned long page, slot;
0078 unsigned long addr;
0079
0080 page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random;
0081 slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE;
0082 addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE);
0083 addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16);
0084 addr += ESPFIX_BASE_ADDR;
0085 return addr;
0086 }
0087
0088 #define PTE_STRIDE (65536/PAGE_SIZE)
0089 #define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE)
0090 #define ESPFIX_PMD_CLONES PTRS_PER_PMD
0091 #define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES))
0092
0093 #define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX)
0094
0095 static void init_espfix_random(void)
0096 {
0097 unsigned long rand;
0098
0099
0100
0101
0102
0103 if (!arch_get_random_longs(&rand, 1)) {
0104
0105 rand = rdtsc();
0106 rand *= 0xc345c6b72fd16123UL;
0107 }
0108
0109 slot_random = rand % ESPFIX_STACKS_PER_PAGE;
0110 page_random = (rand / ESPFIX_STACKS_PER_PAGE)
0111 & (ESPFIX_PAGE_SPACE - 1);
0112 }
0113
0114 void __init init_espfix_bsp(void)
0115 {
0116 pgd_t *pgd;
0117 p4d_t *p4d;
0118
0119
0120 pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)];
0121 p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
0122 p4d_populate(&init_mm, p4d, espfix_pud_page);
0123
0124
0125 init_espfix_random();
0126
0127
0128 init_espfix_ap(0);
0129 }
0130
0131 void init_espfix_ap(int cpu)
0132 {
0133 unsigned int page;
0134 unsigned long addr;
0135 pud_t pud, *pud_p;
0136 pmd_t pmd, *pmd_p;
0137 pte_t pte, *pte_p;
0138 int n, node;
0139 void *stack_page;
0140 pteval_t ptemask;
0141
0142
0143 if (likely(per_cpu(espfix_stack, cpu)))
0144 return;
0145
0146 addr = espfix_base_addr(cpu);
0147 page = cpu/ESPFIX_STACKS_PER_PAGE;
0148
0149
0150 stack_page = READ_ONCE(espfix_pages[page]);
0151 if (likely(stack_page))
0152 goto done;
0153
0154 mutex_lock(&espfix_init_mutex);
0155
0156
0157 stack_page = READ_ONCE(espfix_pages[page]);
0158 if (stack_page)
0159 goto unlock_done;
0160
0161 node = cpu_to_node(cpu);
0162 ptemask = __supported_pte_mask;
0163
0164 pud_p = &espfix_pud_page[pud_index(addr)];
0165 pud = *pud_p;
0166 if (!pud_present(pud)) {
0167 struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
0168
0169 pmd_p = (pmd_t *)page_address(page);
0170 pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
0171 paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
0172 for (n = 0; n < ESPFIX_PUD_CLONES; n++)
0173 set_pud(&pud_p[n], pud);
0174 }
0175
0176 pmd_p = pmd_offset(&pud, addr);
0177 pmd = *pmd_p;
0178 if (!pmd_present(pmd)) {
0179 struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
0180
0181 pte_p = (pte_t *)page_address(page);
0182 pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
0183 paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
0184 for (n = 0; n < ESPFIX_PMD_CLONES; n++)
0185 set_pmd(&pmd_p[n], pmd);
0186 }
0187
0188 pte_p = pte_offset_kernel(&pmd, addr);
0189 stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
0190
0191
0192
0193
0194 pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
0195 for (n = 0; n < ESPFIX_PTE_CLONES; n++)
0196 set_pte(&pte_p[n*PTE_STRIDE], pte);
0197
0198
0199 WRITE_ONCE(espfix_pages[page], stack_page);
0200
0201 unlock_done:
0202 mutex_unlock(&espfix_init_mutex);
0203 done:
0204 per_cpu(espfix_stack, cpu) = addr;
0205 per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
0206 + (addr & ~PAGE_MASK);
0207 }