Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *
0004  * Copyright (c) 2009, Microsoft Corporation.
0005  *
0006  * Authors:
0007  *   Haiyang Zhang <haiyangz@microsoft.com>
0008  *   Hank Janssen  <hjanssen@microsoft.com>
0009  */
0010 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0011 
0012 #include <linux/kernel.h>
0013 #include <linux/sched.h>
0014 #include <linux/wait.h>
0015 #include <linux/delay.h>
0016 #include <linux/mm.h>
0017 #include <linux/module.h>
0018 #include <linux/slab.h>
0019 #include <linux/vmalloc.h>
0020 #include <linux/hyperv.h>
0021 #include <linux/export.h>
0022 #include <linux/io.h>
0023 #include <linux/set_memory.h>
0024 #include <asm/mshyperv.h>
0025 
0026 #include "hyperv_vmbus.h"
0027 
0028 
0029 struct vmbus_connection vmbus_connection = {
0030     .conn_state     = DISCONNECTED,
0031     .unload_event       = COMPLETION_INITIALIZER(
0032                   vmbus_connection.unload_event),
0033     .next_gpadl_handle  = ATOMIC_INIT(0xE1E10),
0034 
0035     .ready_for_suspend_event = COMPLETION_INITIALIZER(
0036                   vmbus_connection.ready_for_suspend_event),
0037     .ready_for_resume_event = COMPLETION_INITIALIZER(
0038                   vmbus_connection.ready_for_resume_event),
0039 };
0040 EXPORT_SYMBOL_GPL(vmbus_connection);
0041 
0042 /*
0043  * Negotiated protocol version with the host.
0044  */
0045 __u32 vmbus_proto_version;
0046 EXPORT_SYMBOL_GPL(vmbus_proto_version);
0047 
0048 /*
0049  * Table of VMBus versions listed from newest to oldest.
0050  * VERSION_WIN7 and VERSION_WS2008 are no longer supported in
0051  * Linux guests and are not listed.
0052  */
0053 static __u32 vmbus_versions[] = {
0054     VERSION_WIN10_V5_3,
0055     VERSION_WIN10_V5_2,
0056     VERSION_WIN10_V5_1,
0057     VERSION_WIN10_V5,
0058     VERSION_WIN10_V4_1,
0059     VERSION_WIN10,
0060     VERSION_WIN8_1,
0061     VERSION_WIN8
0062 };
0063 
0064 /*
0065  * Maximal VMBus protocol version guests can negotiate.  Useful to cap the
0066  * VMBus version for testing and debugging purpose.
0067  */
0068 static uint max_version = VERSION_WIN10_V5_3;
0069 
0070 module_param(max_version, uint, S_IRUGO);
0071 MODULE_PARM_DESC(max_version,
0072          "Maximal VMBus protocol version which can be negotiated");
0073 
0074 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
0075 {
0076     int ret = 0;
0077     struct vmbus_channel_initiate_contact *msg;
0078     unsigned long flags;
0079 
0080     init_completion(&msginfo->waitevent);
0081 
0082     msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
0083 
0084     memset(msg, 0, sizeof(*msg));
0085     msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
0086     msg->vmbus_version_requested = version;
0087 
0088     /*
0089      * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must
0090      * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
0091      * and for subsequent messages, we must use the Message Connection ID
0092      * field in the host-returned Version Response Message. And, with
0093      * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we
0094      * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
0095      * compatibility.
0096      *
0097      * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
0098      */
0099     if (version >= VERSION_WIN10_V5) {
0100         msg->msg_sint = VMBUS_MESSAGE_SINT;
0101         vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4;
0102     } else {
0103         msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
0104         vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
0105     }
0106 
0107     msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0];
0108     msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1];
0109 
0110     msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
0111 
0112     /*
0113      * Add to list before we send the request since we may
0114      * receive the response before returning from this routine
0115      */
0116     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
0117     list_add_tail(&msginfo->msglistentry,
0118               &vmbus_connection.chn_msg_list);
0119 
0120     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
0121 
0122     ret = vmbus_post_msg(msg,
0123                  sizeof(struct vmbus_channel_initiate_contact),
0124                  true);
0125 
0126     trace_vmbus_negotiate_version(msg, ret);
0127 
0128     if (ret != 0) {
0129         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
0130         list_del(&msginfo->msglistentry);
0131         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
0132                     flags);
0133         return ret;
0134     }
0135 
0136     /* Wait for the connection response */
0137     wait_for_completion(&msginfo->waitevent);
0138 
0139     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
0140     list_del(&msginfo->msglistentry);
0141     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
0142 
0143     /* Check if successful */
0144     if (msginfo->response.version_response.version_supported) {
0145         vmbus_connection.conn_state = CONNECTED;
0146 
0147         if (version >= VERSION_WIN10_V5)
0148             vmbus_connection.msg_conn_id =
0149                 msginfo->response.version_response.msg_conn_id;
0150     } else {
0151         return -ECONNREFUSED;
0152     }
0153 
0154     return ret;
0155 }
0156 
0157 /*
0158  * vmbus_connect - Sends a connect request on the partition service connection
0159  */
0160 int vmbus_connect(void)
0161 {
0162     struct vmbus_channel_msginfo *msginfo = NULL;
0163     int i, ret = 0;
0164     __u32 version;
0165 
0166     /* Initialize the vmbus connection */
0167     vmbus_connection.conn_state = CONNECTING;
0168     vmbus_connection.work_queue = create_workqueue("hv_vmbus_con");
0169     if (!vmbus_connection.work_queue) {
0170         ret = -ENOMEM;
0171         goto cleanup;
0172     }
0173 
0174     vmbus_connection.rescind_work_queue =
0175         create_workqueue("hv_vmbus_rescind");
0176     if (!vmbus_connection.rescind_work_queue) {
0177         ret = -ENOMEM;
0178         goto cleanup;
0179     }
0180     vmbus_connection.ignore_any_offer_msg = false;
0181 
0182     vmbus_connection.handle_primary_chan_wq =
0183         create_workqueue("hv_pri_chan");
0184     if (!vmbus_connection.handle_primary_chan_wq) {
0185         ret = -ENOMEM;
0186         goto cleanup;
0187     }
0188 
0189     vmbus_connection.handle_sub_chan_wq =
0190         create_workqueue("hv_sub_chan");
0191     if (!vmbus_connection.handle_sub_chan_wq) {
0192         ret = -ENOMEM;
0193         goto cleanup;
0194     }
0195 
0196     INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
0197     spin_lock_init(&vmbus_connection.channelmsg_lock);
0198 
0199     INIT_LIST_HEAD(&vmbus_connection.chn_list);
0200     mutex_init(&vmbus_connection.channel_mutex);
0201 
0202     /*
0203      * Setup the vmbus event connection for channel interrupt
0204      * abstraction stuff
0205      */
0206     vmbus_connection.int_page =
0207     (void *)hv_alloc_hyperv_zeroed_page();
0208     if (vmbus_connection.int_page == NULL) {
0209         ret = -ENOMEM;
0210         goto cleanup;
0211     }
0212 
0213     vmbus_connection.recv_int_page = vmbus_connection.int_page;
0214     vmbus_connection.send_int_page =
0215         (void *)((unsigned long)vmbus_connection.int_page +
0216             (HV_HYP_PAGE_SIZE >> 1));
0217 
0218     /*
0219      * Setup the monitor notification facility. The 1st page for
0220      * parent->child and the 2nd page for child->parent
0221      */
0222     vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
0223     vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
0224     if ((vmbus_connection.monitor_pages[0] == NULL) ||
0225         (vmbus_connection.monitor_pages[1] == NULL)) {
0226         ret = -ENOMEM;
0227         goto cleanup;
0228     }
0229 
0230     vmbus_connection.monitor_pages_original[0]
0231         = vmbus_connection.monitor_pages[0];
0232     vmbus_connection.monitor_pages_original[1]
0233         = vmbus_connection.monitor_pages[1];
0234     vmbus_connection.monitor_pages_pa[0]
0235         = virt_to_phys(vmbus_connection.monitor_pages[0]);
0236     vmbus_connection.monitor_pages_pa[1]
0237         = virt_to_phys(vmbus_connection.monitor_pages[1]);
0238 
0239     if (hv_is_isolation_supported()) {
0240         ret = set_memory_decrypted((unsigned long)
0241                        vmbus_connection.monitor_pages[0],
0242                        1);
0243         ret |= set_memory_decrypted((unsigned long)
0244                         vmbus_connection.monitor_pages[1],
0245                         1);
0246         if (ret)
0247             goto cleanup;
0248 
0249         /*
0250          * Isolation VM with AMD SNP needs to access monitor page via
0251          * address space above shared gpa boundary.
0252          */
0253         if (hv_isolation_type_snp()) {
0254             vmbus_connection.monitor_pages_pa[0] +=
0255                 ms_hyperv.shared_gpa_boundary;
0256             vmbus_connection.monitor_pages_pa[1] +=
0257                 ms_hyperv.shared_gpa_boundary;
0258 
0259             vmbus_connection.monitor_pages[0]
0260                 = memremap(vmbus_connection.monitor_pages_pa[0],
0261                        HV_HYP_PAGE_SIZE,
0262                        MEMREMAP_WB);
0263             if (!vmbus_connection.monitor_pages[0]) {
0264                 ret = -ENOMEM;
0265                 goto cleanup;
0266             }
0267 
0268             vmbus_connection.monitor_pages[1]
0269                 = memremap(vmbus_connection.monitor_pages_pa[1],
0270                        HV_HYP_PAGE_SIZE,
0271                        MEMREMAP_WB);
0272             if (!vmbus_connection.monitor_pages[1]) {
0273                 ret = -ENOMEM;
0274                 goto cleanup;
0275             }
0276         }
0277 
0278         /*
0279          * Set memory host visibility hvcall smears memory
0280          * and so zero monitor pages here.
0281          */
0282         memset(vmbus_connection.monitor_pages[0], 0x00,
0283                HV_HYP_PAGE_SIZE);
0284         memset(vmbus_connection.monitor_pages[1], 0x00,
0285                HV_HYP_PAGE_SIZE);
0286 
0287     }
0288 
0289     msginfo = kzalloc(sizeof(*msginfo) +
0290               sizeof(struct vmbus_channel_initiate_contact),
0291               GFP_KERNEL);
0292     if (msginfo == NULL) {
0293         ret = -ENOMEM;
0294         goto cleanup;
0295     }
0296 
0297     /*
0298      * Negotiate a compatible VMBUS version number with the
0299      * host. We start with the highest number we can support
0300      * and work our way down until we negotiate a compatible
0301      * version.
0302      */
0303 
0304     for (i = 0; ; i++) {
0305         if (i == ARRAY_SIZE(vmbus_versions)) {
0306             ret = -EDOM;
0307             goto cleanup;
0308         }
0309 
0310         version = vmbus_versions[i];
0311         if (version > max_version)
0312             continue;
0313 
0314         ret = vmbus_negotiate_version(msginfo, version);
0315         if (ret == -ETIMEDOUT)
0316             goto cleanup;
0317 
0318         if (vmbus_connection.conn_state == CONNECTED)
0319             break;
0320     }
0321 
0322     if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) {
0323         pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n",
0324                version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF);
0325         ret = -EINVAL;
0326         goto cleanup;
0327     }
0328 
0329     vmbus_proto_version = version;
0330     pr_info("Vmbus version:%d.%d\n",
0331         version >> 16, version & 0xFFFF);
0332 
0333     vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS,
0334                         sizeof(struct vmbus_channel *),
0335                         GFP_KERNEL);
0336     if (vmbus_connection.channels == NULL) {
0337         ret = -ENOMEM;
0338         goto cleanup;
0339     }
0340 
0341     kfree(msginfo);
0342     return 0;
0343 
0344 cleanup:
0345     pr_err("Unable to connect to host\n");
0346 
0347     vmbus_connection.conn_state = DISCONNECTED;
0348     vmbus_disconnect();
0349 
0350     kfree(msginfo);
0351 
0352     return ret;
0353 }
0354 
0355 void vmbus_disconnect(void)
0356 {
0357     /*
0358      * First send the unload request to the host.
0359      */
0360     vmbus_initiate_unload(false);
0361 
0362     if (vmbus_connection.handle_sub_chan_wq)
0363         destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
0364 
0365     if (vmbus_connection.handle_primary_chan_wq)
0366         destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
0367 
0368     if (vmbus_connection.rescind_work_queue)
0369         destroy_workqueue(vmbus_connection.rescind_work_queue);
0370 
0371     if (vmbus_connection.work_queue)
0372         destroy_workqueue(vmbus_connection.work_queue);
0373 
0374     if (vmbus_connection.int_page) {
0375         hv_free_hyperv_page((unsigned long)vmbus_connection.int_page);
0376         vmbus_connection.int_page = NULL;
0377     }
0378 
0379     if (hv_is_isolation_supported()) {
0380         /*
0381          * memunmap() checks input address is ioremap address or not
0382          * inside. It doesn't unmap any thing in the non-SNP CVM and
0383          * so not check CVM type here.
0384          */
0385         memunmap(vmbus_connection.monitor_pages[0]);
0386         memunmap(vmbus_connection.monitor_pages[1]);
0387 
0388         set_memory_encrypted((unsigned long)
0389             vmbus_connection.monitor_pages_original[0],
0390             1);
0391         set_memory_encrypted((unsigned long)
0392             vmbus_connection.monitor_pages_original[1],
0393             1);
0394     }
0395 
0396     hv_free_hyperv_page((unsigned long)
0397         vmbus_connection.monitor_pages_original[0]);
0398     hv_free_hyperv_page((unsigned long)
0399         vmbus_connection.monitor_pages_original[1]);
0400     vmbus_connection.monitor_pages_original[0] =
0401         vmbus_connection.monitor_pages[0] = NULL;
0402     vmbus_connection.monitor_pages_original[1] =
0403         vmbus_connection.monitor_pages[1] = NULL;
0404 }
0405 
0406 /*
0407  * relid2channel - Get the channel object given its
0408  * child relative id (ie channel id)
0409  */
0410 struct vmbus_channel *relid2channel(u32 relid)
0411 {
0412     if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
0413         return NULL;
0414     return READ_ONCE(vmbus_connection.channels[relid]);
0415 }
0416 
0417 /*
0418  * vmbus_on_event - Process a channel event notification
0419  *
0420  * For batched channels (default) optimize host to guest signaling
0421  * by ensuring:
0422  * 1. While reading the channel, we disable interrupts from host.
0423  * 2. Ensure that we process all posted messages from the host
0424  *    before returning from this callback.
0425  * 3. Once we return, enable signaling from the host. Once this
0426  *    state is set we check to see if additional packets are
0427  *    available to read. In this case we repeat the process.
0428  *    If this tasklet has been running for a long time
0429  *    then reschedule ourselves.
0430  */
0431 void vmbus_on_event(unsigned long data)
0432 {
0433     struct vmbus_channel *channel = (void *) data;
0434     unsigned long time_limit = jiffies + 2;
0435 
0436     trace_vmbus_on_event(channel);
0437 
0438     hv_debug_delay_test(channel, INTERRUPT_DELAY);
0439     do {
0440         void (*callback_fn)(void *);
0441 
0442         /* A channel once created is persistent even when
0443          * there is no driver handling the device. An
0444          * unloading driver sets the onchannel_callback to NULL.
0445          */
0446         callback_fn = READ_ONCE(channel->onchannel_callback);
0447         if (unlikely(callback_fn == NULL))
0448             return;
0449 
0450         (*callback_fn)(channel->channel_callback_context);
0451 
0452         if (channel->callback_mode != HV_CALL_BATCHED)
0453             return;
0454 
0455         if (likely(hv_end_read(&channel->inbound) == 0))
0456             return;
0457 
0458         hv_begin_read(&channel->inbound);
0459     } while (likely(time_before(jiffies, time_limit)));
0460 
0461     /* The time limit (2 jiffies) has been reached */
0462     tasklet_schedule(&channel->callback_event);
0463 }
0464 
0465 /*
0466  * vmbus_post_msg - Send a msg on the vmbus's message connection
0467  */
0468 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
0469 {
0470     struct vmbus_channel_message_header *hdr;
0471     union hv_connection_id conn_id;
0472     int ret = 0;
0473     int retries = 0;
0474     u32 usec = 1;
0475 
0476     conn_id.asu32 = 0;
0477     conn_id.u.id = vmbus_connection.msg_conn_id;
0478 
0479     /*
0480      * hv_post_message() can have transient failures because of
0481      * insufficient resources. Retry the operation a couple of
0482      * times before giving up.
0483      */
0484     while (retries < 100) {
0485         ret = hv_post_message(conn_id, 1, buffer, buflen);
0486 
0487         switch (ret) {
0488         case HV_STATUS_INVALID_CONNECTION_ID:
0489             /*
0490              * See vmbus_negotiate_version(): VMBus protocol 5.0
0491              * and higher require that we must use
0492              * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate
0493              * Contact message, but on old hosts that only
0494              * support VMBus protocol 4.0 or lower, here we get
0495              * HV_STATUS_INVALID_CONNECTION_ID and we should
0496              * return an error immediately without retrying.
0497              */
0498             hdr = buffer;
0499             if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT)
0500                 return -EINVAL;
0501             /*
0502              * We could get this if we send messages too
0503              * frequently.
0504              */
0505             ret = -EAGAIN;
0506             break;
0507         case HV_STATUS_INSUFFICIENT_MEMORY:
0508         case HV_STATUS_INSUFFICIENT_BUFFERS:
0509             ret = -ENOBUFS;
0510             break;
0511         case HV_STATUS_SUCCESS:
0512             return ret;
0513         default:
0514             pr_err("hv_post_msg() failed; error code:%d\n", ret);
0515             return -EINVAL;
0516         }
0517 
0518         retries++;
0519         if (can_sleep && usec > 1000)
0520             msleep(usec / 1000);
0521         else if (usec < MAX_UDELAY_MS * 1000)
0522             udelay(usec);
0523         else
0524             mdelay(usec / 1000);
0525 
0526         if (retries < 22)
0527             usec *= 2;
0528     }
0529     return ret;
0530 }
0531 
0532 /*
0533  * vmbus_set_event - Send an event notification to the parent
0534  */
0535 void vmbus_set_event(struct vmbus_channel *channel)
0536 {
0537     u32 child_relid = channel->offermsg.child_relid;
0538 
0539     if (!channel->is_dedicated_interrupt)
0540         vmbus_send_interrupt(child_relid);
0541 
0542     ++channel->sig_events;
0543 
0544     if (hv_isolation_type_snp())
0545         hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
0546                 NULL, sizeof(channel->sig_event));
0547     else
0548         hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
0549 }
0550 EXPORT_SYMBOL_GPL(vmbus_set_event);