Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2009, Microsoft Corporation.
0004  *
0005  * Authors:
0006  *   Haiyang Zhang <haiyangz@microsoft.com>
0007  *   Hank Janssen  <hjanssen@microsoft.com>
0008  */
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010 
0011 #include <linux/io.h>
0012 #include <linux/kernel.h>
0013 #include <linux/mm.h>
0014 #include <linux/slab.h>
0015 #include <linux/vmalloc.h>
0016 #include <linux/hyperv.h>
0017 #include <linux/random.h>
0018 #include <linux/clockchips.h>
0019 #include <linux/delay.h>
0020 #include <linux/interrupt.h>
0021 #include <clocksource/hyperv_timer.h>
0022 #include <asm/mshyperv.h>
0023 #include "hyperv_vmbus.h"
0024 
0025 /* The one and only */
0026 struct hv_context hv_context;
0027 
0028 /*
0029  * hv_init - Main initialization routine.
0030  *
0031  * This routine must be called before any other routines in here are called
0032  */
0033 int hv_init(void)
0034 {
0035     hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
0036     if (!hv_context.cpu_context)
0037         return -ENOMEM;
0038     return 0;
0039 }
0040 
0041 /*
0042  * Functions for allocating and freeing memory with size and
0043  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
0044  * the guest page size may not be the same as the Hyper-V page
0045  * size. We depend upon kmalloc() aligning power-of-two size
0046  * allocations to the allocation size boundary, so that the
0047  * allocated memory appears to Hyper-V as a page of the size
0048  * it expects.
0049  */
0050 
0051 void *hv_alloc_hyperv_page(void)
0052 {
0053     BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
0054 
0055     if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
0056         return (void *)__get_free_page(GFP_KERNEL);
0057     else
0058         return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
0059 }
0060 
0061 void *hv_alloc_hyperv_zeroed_page(void)
0062 {
0063     if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
0064         return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
0065     else
0066         return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
0067 }
0068 
0069 void hv_free_hyperv_page(unsigned long addr)
0070 {
0071     if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
0072         free_page(addr);
0073     else
0074         kfree((void *)addr);
0075 }
0076 
0077 /*
0078  * hv_post_message - Post a message using the hypervisor message IPC.
0079  *
0080  * This involves a hypercall.
0081  */
0082 int hv_post_message(union hv_connection_id connection_id,
0083           enum hv_message_type message_type,
0084           void *payload, size_t payload_size)
0085 {
0086     struct hv_input_post_message *aligned_msg;
0087     struct hv_per_cpu_context *hv_cpu;
0088     u64 status;
0089 
0090     if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
0091         return -EMSGSIZE;
0092 
0093     hv_cpu = get_cpu_ptr(hv_context.cpu_context);
0094     aligned_msg = hv_cpu->post_msg_page;
0095     aligned_msg->connectionid = connection_id;
0096     aligned_msg->reserved = 0;
0097     aligned_msg->message_type = message_type;
0098     aligned_msg->payload_size = payload_size;
0099     memcpy((void *)aligned_msg->payload, payload, payload_size);
0100 
0101     if (hv_isolation_type_snp())
0102         status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
0103                 (void *)aligned_msg, NULL,
0104                 sizeof(*aligned_msg));
0105     else
0106         status = hv_do_hypercall(HVCALL_POST_MESSAGE,
0107                 aligned_msg, NULL);
0108 
0109     /* Preemption must remain disabled until after the hypercall
0110      * so some other thread can't get scheduled onto this cpu and
0111      * corrupt the per-cpu post_msg_page
0112      */
0113     put_cpu_ptr(hv_cpu);
0114 
0115     return hv_result(status);
0116 }
0117 
0118 int hv_synic_alloc(void)
0119 {
0120     int cpu;
0121     struct hv_per_cpu_context *hv_cpu;
0122 
0123     /*
0124      * First, zero all per-cpu memory areas so hv_synic_free() can
0125      * detect what memory has been allocated and cleanup properly
0126      * after any failures.
0127      */
0128     for_each_present_cpu(cpu) {
0129         hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
0130         memset(hv_cpu, 0, sizeof(*hv_cpu));
0131     }
0132 
0133     hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
0134                      GFP_KERNEL);
0135     if (hv_context.hv_numa_map == NULL) {
0136         pr_err("Unable to allocate NUMA map\n");
0137         goto err;
0138     }
0139 
0140     for_each_present_cpu(cpu) {
0141         hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
0142 
0143         tasklet_init(&hv_cpu->msg_dpc,
0144                  vmbus_on_msg_dpc, (unsigned long) hv_cpu);
0145 
0146         /*
0147          * Synic message and event pages are allocated by paravisor.
0148          * Skip these pages allocation here.
0149          */
0150         if (!hv_isolation_type_snp()) {
0151             hv_cpu->synic_message_page =
0152                 (void *)get_zeroed_page(GFP_ATOMIC);
0153             if (hv_cpu->synic_message_page == NULL) {
0154                 pr_err("Unable to allocate SYNIC message page\n");
0155                 goto err;
0156             }
0157 
0158             hv_cpu->synic_event_page =
0159                 (void *)get_zeroed_page(GFP_ATOMIC);
0160             if (hv_cpu->synic_event_page == NULL) {
0161                 pr_err("Unable to allocate SYNIC event page\n");
0162                 goto err;
0163             }
0164         }
0165 
0166         hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
0167         if (hv_cpu->post_msg_page == NULL) {
0168             pr_err("Unable to allocate post msg page\n");
0169             goto err;
0170         }
0171     }
0172 
0173     return 0;
0174 err:
0175     /*
0176      * Any memory allocations that succeeded will be freed when
0177      * the caller cleans up by calling hv_synic_free()
0178      */
0179     return -ENOMEM;
0180 }
0181 
0182 
0183 void hv_synic_free(void)
0184 {
0185     int cpu;
0186 
0187     for_each_present_cpu(cpu) {
0188         struct hv_per_cpu_context *hv_cpu
0189             = per_cpu_ptr(hv_context.cpu_context, cpu);
0190 
0191         free_page((unsigned long)hv_cpu->synic_event_page);
0192         free_page((unsigned long)hv_cpu->synic_message_page);
0193         free_page((unsigned long)hv_cpu->post_msg_page);
0194     }
0195 
0196     kfree(hv_context.hv_numa_map);
0197 }
0198 
0199 /*
0200  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
0201  *
0202  * If it is already initialized by another entity (ie x2v shim), we need to
0203  * retrieve the initialized message and event pages.  Otherwise, we create and
0204  * initialize the message and event pages.
0205  */
0206 void hv_synic_enable_regs(unsigned int cpu)
0207 {
0208     struct hv_per_cpu_context *hv_cpu
0209         = per_cpu_ptr(hv_context.cpu_context, cpu);
0210     union hv_synic_simp simp;
0211     union hv_synic_siefp siefp;
0212     union hv_synic_sint shared_sint;
0213     union hv_synic_scontrol sctrl;
0214 
0215     /* Setup the Synic's message page */
0216     simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
0217     simp.simp_enabled = 1;
0218 
0219     if (hv_isolation_type_snp()) {
0220         hv_cpu->synic_message_page
0221             = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
0222                    HV_HYP_PAGE_SIZE, MEMREMAP_WB);
0223         if (!hv_cpu->synic_message_page)
0224             pr_err("Fail to map syinc message page.\n");
0225     } else {
0226         simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
0227             >> HV_HYP_PAGE_SHIFT;
0228     }
0229 
0230     hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
0231 
0232     /* Setup the Synic's event page */
0233     siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
0234     siefp.siefp_enabled = 1;
0235 
0236     if (hv_isolation_type_snp()) {
0237         hv_cpu->synic_event_page =
0238             memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
0239                  HV_HYP_PAGE_SIZE, MEMREMAP_WB);
0240 
0241         if (!hv_cpu->synic_event_page)
0242             pr_err("Fail to map syinc event page.\n");
0243     } else {
0244         siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
0245             >> HV_HYP_PAGE_SHIFT;
0246     }
0247 
0248     hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
0249 
0250     /* Setup the shared SINT. */
0251     if (vmbus_irq != -1)
0252         enable_percpu_irq(vmbus_irq, 0);
0253     shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
0254                     VMBUS_MESSAGE_SINT);
0255 
0256     shared_sint.vector = vmbus_interrupt;
0257     shared_sint.masked = false;
0258 
0259     /*
0260      * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
0261      * it doesn't provide a recommendation flag and AEOI must be disabled.
0262      */
0263 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
0264     shared_sint.auto_eoi =
0265             !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
0266 #else
0267     shared_sint.auto_eoi = 0;
0268 #endif
0269     hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
0270                 shared_sint.as_uint64);
0271 
0272     /* Enable the global synic bit */
0273     sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
0274     sctrl.enable = 1;
0275 
0276     hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
0277 }
0278 
0279 int hv_synic_init(unsigned int cpu)
0280 {
0281     hv_synic_enable_regs(cpu);
0282 
0283     hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
0284 
0285     return 0;
0286 }
0287 
0288 /*
0289  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
0290  */
0291 void hv_synic_disable_regs(unsigned int cpu)
0292 {
0293     struct hv_per_cpu_context *hv_cpu
0294         = per_cpu_ptr(hv_context.cpu_context, cpu);
0295     union hv_synic_sint shared_sint;
0296     union hv_synic_simp simp;
0297     union hv_synic_siefp siefp;
0298     union hv_synic_scontrol sctrl;
0299 
0300     shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
0301                     VMBUS_MESSAGE_SINT);
0302 
0303     shared_sint.masked = 1;
0304 
0305     /* Need to correctly cleanup in the case of SMP!!! */
0306     /* Disable the interrupt */
0307     hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
0308                 shared_sint.as_uint64);
0309 
0310     simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
0311     /*
0312      * In Isolation VM, sim and sief pages are allocated by
0313      * paravisor. These pages also will be used by kdump
0314      * kernel. So just reset enable bit here and keep page
0315      * addresses.
0316      */
0317     simp.simp_enabled = 0;
0318     if (hv_isolation_type_snp())
0319         memunmap(hv_cpu->synic_message_page);
0320     else
0321         simp.base_simp_gpa = 0;
0322 
0323     hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
0324 
0325     siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
0326     siefp.siefp_enabled = 0;
0327 
0328     if (hv_isolation_type_snp())
0329         memunmap(hv_cpu->synic_event_page);
0330     else
0331         siefp.base_siefp_gpa = 0;
0332 
0333     hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
0334 
0335     /* Disable the global synic bit */
0336     sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
0337     sctrl.enable = 0;
0338     hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
0339 
0340     if (vmbus_irq != -1)
0341         disable_percpu_irq(vmbus_irq);
0342 }
0343 
0344 #define HV_MAX_TRIES 3
0345 /*
0346  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
0347  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
0348  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
0349  *
0350  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
0351  * that the normal interrupt handling mechanism will find and process the channel interrupt
0352  * "very soon", and in the process clear the bit.
0353  */
0354 static bool hv_synic_event_pending(void)
0355 {
0356     struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
0357     union hv_synic_event_flags *event =
0358         (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
0359     unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
0360     bool pending;
0361     u32 relid;
0362     int tries = 0;
0363 
0364 retry:
0365     pending = false;
0366     for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
0367         /* Special case - VMBus channel protocol messages */
0368         if (relid == 0)
0369             continue;
0370         pending = true;
0371         break;
0372     }
0373     if (pending && tries++ < HV_MAX_TRIES) {
0374         usleep_range(10000, 20000);
0375         goto retry;
0376     }
0377     return pending;
0378 }
0379 
0380 int hv_synic_cleanup(unsigned int cpu)
0381 {
0382     struct vmbus_channel *channel, *sc;
0383     bool channel_found = false;
0384 
0385     if (vmbus_connection.conn_state != CONNECTED)
0386         goto always_cleanup;
0387 
0388     /*
0389      * Hyper-V does not provide a way to change the connect CPU once
0390      * it is set; we must prevent the connect CPU from going offline
0391      * while the VM is running normally. But in the panic or kexec()
0392      * path where the vmbus is already disconnected, the CPU must be
0393      * allowed to shut down.
0394      */
0395     if (cpu == VMBUS_CONNECT_CPU)
0396         return -EBUSY;
0397 
0398     /*
0399      * Search for channels which are bound to the CPU we're about to
0400      * cleanup.  In case we find one and vmbus is still connected, we
0401      * fail; this will effectively prevent CPU offlining.
0402      *
0403      * TODO: Re-bind the channels to different CPUs.
0404      */
0405     mutex_lock(&vmbus_connection.channel_mutex);
0406     list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
0407         if (channel->target_cpu == cpu) {
0408             channel_found = true;
0409             break;
0410         }
0411         list_for_each_entry(sc, &channel->sc_list, sc_list) {
0412             if (sc->target_cpu == cpu) {
0413                 channel_found = true;
0414                 break;
0415             }
0416         }
0417         if (channel_found)
0418             break;
0419     }
0420     mutex_unlock(&vmbus_connection.channel_mutex);
0421 
0422     if (channel_found)
0423         return -EBUSY;
0424 
0425     /*
0426      * channel_found == false means that any channels that were previously
0427      * assigned to the CPU have been reassigned elsewhere with a call of
0428      * vmbus_send_modifychannel().  Scan the event flags page looking for
0429      * bits that are set and waiting with a timeout for vmbus_chan_sched()
0430      * to process such bits.  If bits are still set after this operation
0431      * and VMBus is connected, fail the CPU offlining operation.
0432      */
0433     if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
0434         return -EBUSY;
0435 
0436 always_cleanup:
0437     hv_stimer_legacy_cleanup(cpu);
0438 
0439     hv_synic_disable_regs(cpu);
0440 
0441     return 0;
0442 }