x86/hyperv/mmu.c

0001 #define pr_fmt(fmt)  "Hyper-V: " fmt
0002
0003 #include <linux/hyperv.h>
0004 #include <linux/log2.h>
0005 #include <linux/slab.h>
0006 #include <linux/types.h>
0007
0008 #include <asm/fpu/api.h>
0009 #include <asm/mshyperv.h>
0010 #include <asm/msr.h>
0011 #include <asm/tlbflush.h>
0012 #include <asm/tlb.h>
0013
0014 #define CREATE_TRACE_POINTS
0015 #include <asm/trace/hyperv.h>
0016
0017 /* Each gva in gva_list encodes up to 4096 pages to flush */
0018 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
0019
0020 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
0021                       const struct flush_tlb_info *info);
0022
0023 /*
0024  * Fills in gva_list starting from offset. Returns the number of items added.
0025  */
0026 static inline int fill_gva_list(u64 gva_list[], int offset,
0027                 unsigned long start, unsigned long end)
0028 {
0029     int gva_n = offset;
0030     unsigned long cur = start, diff;
0031
0032     do {
0033         diff = end > cur ? end - cur : 0;
0034
0035         gva_list[gva_n] = cur & PAGE_MASK;
0036         /*
0037          * Lower 12 bits encode the number of additional
0038          * pages to flush (in addition to the 'cur' page).
0039          */
0040         if (diff >= HV_TLB_FLUSH_UNIT) {
0041             gva_list[gva_n] |= ~PAGE_MASK;
0042             cur += HV_TLB_FLUSH_UNIT;
0043         }  else if (diff) {
0044             gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
0045             cur = end;
0046         }
0047
0048         gva_n++;
0049
0050     } while (cur < end);
0051
0052     return gva_n - offset;
0053 }
0054
0055 static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
0056                    const struct flush_tlb_info *info)
0057 {
0058     int cpu, vcpu, gva_n, max_gvas;
0059     struct hv_tlb_flush **flush_pcpu;
0060     struct hv_tlb_flush *flush;
0061     u64 status;
0062     unsigned long flags;
0063
0064     trace_hyperv_mmu_flush_tlb_multi(cpus, info);
0065
0066     if (!hv_hypercall_pg)
0067         goto do_native;
0068
0069     local_irq_save(flags);
0070
0071     flush_pcpu = (struct hv_tlb_flush **)
0072              this_cpu_ptr(hyperv_pcpu_input_arg);
0073
0074     flush = *flush_pcpu;
0075
0076     if (unlikely(!flush)) {
0077         local_irq_restore(flags);
0078         goto do_native;
0079     }
0080
0081     if (info->mm) {
0082         /*
0083          * AddressSpace argument must match the CR3 with PCID bits
0084          * stripped out.
0085          */
0086         flush->address_space = virt_to_phys(info->mm->pgd);
0087         flush->address_space &= CR3_ADDR_MASK;
0088         flush->flags = 0;
0089     } else {
0090         flush->address_space = 0;
0091         flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
0092     }
0093
0094     flush->processor_mask = 0;
0095     if (cpumask_equal(cpus, cpu_present_mask)) {
0096         flush->flags |= HV_FLUSH_ALL_PROCESSORS;
0097     } else {
0098         /*
0099          * From the supplied CPU set we need to figure out if we can get
0100          * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
0101          * hypercalls. This is possible when the highest VP number in
0102          * the set is < 64. As VP numbers are usually in ascending order
0103          * and match Linux CPU ids, here is an optimization: we check
0104          * the VP number for the highest bit in the supplied set first
0105          * so we can quickly find out if using *_EX hypercalls is a
0106          * must. We will also check all VP numbers when walking the
0107          * supplied CPU set to remain correct in all cases.
0108          */
0109         cpu = cpumask_last(cpus);
0110
0111         if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
0112             goto do_ex_hypercall;
0113
0114         for_each_cpu(cpu, cpus) {
0115             vcpu = hv_cpu_number_to_vp_number(cpu);
0116             if (vcpu == VP_INVAL) {
0117                 local_irq_restore(flags);
0118                 goto do_native;
0119             }
0120
0121             if (vcpu >= 64)
0122                 goto do_ex_hypercall;
0123
0124             __set_bit(vcpu, (unsigned long *)
0125                   &flush->processor_mask);
0126         }
0127
0128         /* nothing to flush if 'processor_mask' ends up being empty */
0129         if (!flush->processor_mask) {
0130             local_irq_restore(flags);
0131             return;
0132         }
0133     }
0134
0135     /*
0136      * We can flush not more than max_gvas with one hypercall. Flush the
0137      * whole address space if we were asked to do more.
0138      */
0139     max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
0140
0141     if (info->end == TLB_FLUSH_ALL) {
0142         flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
0143         status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
0144                      flush, NULL);
0145     } else if (info->end &&
0146            ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
0147         status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
0148                      flush, NULL);
0149     } else {
0150         gva_n = fill_gva_list(flush->gva_list, 0,
0151                       info->start, info->end);
0152         status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
0153                          gva_n, 0, flush, NULL);
0154     }
0155     goto check_status;
0156
0157 do_ex_hypercall:
0158     status = hyperv_flush_tlb_others_ex(cpus, info);
0159
0160 check_status:
0161     local_irq_restore(flags);
0162
0163     if (hv_result_success(status))
0164         return;
0165 do_native:
0166     native_flush_tlb_multi(cpus, info);
0167 }
0168
0169 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
0170                       const struct flush_tlb_info *info)
0171 {
0172     int nr_bank = 0, max_gvas, gva_n;
0173     struct hv_tlb_flush_ex **flush_pcpu;
0174     struct hv_tlb_flush_ex *flush;
0175     u64 status;
0176
0177     if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
0178         return HV_STATUS_INVALID_PARAMETER;
0179
0180     flush_pcpu = (struct hv_tlb_flush_ex **)
0181              this_cpu_ptr(hyperv_pcpu_input_arg);
0182
0183     flush = *flush_pcpu;
0184
0185     if (info->mm) {
0186         /*
0187          * AddressSpace argument must match the CR3 with PCID bits
0188          * stripped out.
0189          */
0190         flush->address_space = virt_to_phys(info->mm->pgd);
0191         flush->address_space &= CR3_ADDR_MASK;
0192         flush->flags = 0;
0193     } else {
0194         flush->address_space = 0;
0195         flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
0196     }
0197
0198     flush->hv_vp_set.valid_bank_mask = 0;
0199
0200     flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
0201     nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
0202     if (nr_bank < 0)
0203         return HV_STATUS_INVALID_PARAMETER;
0204
0205     /*
0206      * We can flush not more than max_gvas with one hypercall. Flush the
0207      * whole address space if we were asked to do more.
0208      */
0209     max_gvas =
0210         (PAGE_SIZE - sizeof(*flush) - nr_bank *
0211          sizeof(flush->hv_vp_set.bank_contents[0])) /
0212         sizeof(flush->gva_list[0]);
0213
0214     if (info->end == TLB_FLUSH_ALL) {
0215         flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
0216         status = hv_do_rep_hypercall(
0217             HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
0218             0, nr_bank, flush, NULL);
0219     } else if (info->end &&
0220            ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
0221         status = hv_do_rep_hypercall(
0222             HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
0223             0, nr_bank, flush, NULL);
0224     } else {
0225         gva_n = fill_gva_list(flush->gva_list, nr_bank,
0226                       info->start, info->end);
0227         status = hv_do_rep_hypercall(
0228             HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
0229             gva_n, nr_bank, flush, NULL);
0230     }
0231
0232     return status;
0233 }
0234
0235 void hyperv_setup_mmu_ops(void)
0236 {
0237     if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
0238         return;
0239
0240     pr_info("Using hypercall for remote TLB flush\n");
0241     pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
0242     pv_ops.mmu.tlb_remove_table = tlb_remove_table;
0243 }