drivers/hv/channel_mgmt.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2009, Microsoft Corporation.
0004  *
0005  * Authors:
0006  *   Haiyang Zhang <haiyangz@microsoft.com>
0007  *   Hank Janssen  <hjanssen@microsoft.com>
0008  */
0009 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0010
0011 #include <linux/kernel.h>
0012 #include <linux/interrupt.h>
0013 #include <linux/sched.h>
0014 #include <linux/wait.h>
0015 #include <linux/mm.h>
0016 #include <linux/slab.h>
0017 #include <linux/list.h>
0018 #include <linux/module.h>
0019 #include <linux/completion.h>
0020 #include <linux/delay.h>
0021 #include <linux/cpu.h>
0022 #include <linux/hyperv.h>
0023 #include <asm/mshyperv.h>
0024 #include <linux/sched/isolation.h>
0025
0026 #include "hyperv_vmbus.h"
0027
0028 static void init_vp_index(struct vmbus_channel *channel);
0029
0030 const struct vmbus_device vmbus_devs[] = {
0031     /* IDE */
0032     { .dev_type = HV_IDE,
0033       HV_IDE_GUID,
0034       .perf_device = true,
0035       .allowed_in_isolated = false,
0036     },
0037
0038     /* SCSI */
0039     { .dev_type = HV_SCSI,
0040       HV_SCSI_GUID,
0041       .perf_device = true,
0042       .allowed_in_isolated = true,
0043     },
0044
0045     /* Fibre Channel */
0046     { .dev_type = HV_FC,
0047       HV_SYNTHFC_GUID,
0048       .perf_device = true,
0049       .allowed_in_isolated = false,
0050     },
0051
0052     /* Synthetic NIC */
0053     { .dev_type = HV_NIC,
0054       HV_NIC_GUID,
0055       .perf_device = true,
0056       .allowed_in_isolated = true,
0057     },
0058
0059     /* Network Direct */
0060     { .dev_type = HV_ND,
0061       HV_ND_GUID,
0062       .perf_device = true,
0063       .allowed_in_isolated = false,
0064     },
0065
0066     /* PCIE */
0067     { .dev_type = HV_PCIE,
0068       HV_PCIE_GUID,
0069       .perf_device = false,
0070       .allowed_in_isolated = false,
0071     },
0072
0073     /* Synthetic Frame Buffer */
0074     { .dev_type = HV_FB,
0075       HV_SYNTHVID_GUID,
0076       .perf_device = false,
0077       .allowed_in_isolated = false,
0078     },
0079
0080     /* Synthetic Keyboard */
0081     { .dev_type = HV_KBD,
0082       HV_KBD_GUID,
0083       .perf_device = false,
0084       .allowed_in_isolated = false,
0085     },
0086
0087     /* Synthetic MOUSE */
0088     { .dev_type = HV_MOUSE,
0089       HV_MOUSE_GUID,
0090       .perf_device = false,
0091       .allowed_in_isolated = false,
0092     },
0093
0094     /* KVP */
0095     { .dev_type = HV_KVP,
0096       HV_KVP_GUID,
0097       .perf_device = false,
0098       .allowed_in_isolated = false,
0099     },
0100
0101     /* Time Synch */
0102     { .dev_type = HV_TS,
0103       HV_TS_GUID,
0104       .perf_device = false,
0105       .allowed_in_isolated = true,
0106     },
0107
0108     /* Heartbeat */
0109     { .dev_type = HV_HB,
0110       HV_HEART_BEAT_GUID,
0111       .perf_device = false,
0112       .allowed_in_isolated = true,
0113     },
0114
0115     /* Shutdown */
0116     { .dev_type = HV_SHUTDOWN,
0117       HV_SHUTDOWN_GUID,
0118       .perf_device = false,
0119       .allowed_in_isolated = true,
0120     },
0121
0122     /* File copy */
0123     { .dev_type = HV_FCOPY,
0124       HV_FCOPY_GUID,
0125       .perf_device = false,
0126       .allowed_in_isolated = false,
0127     },
0128
0129     /* Backup */
0130     { .dev_type = HV_BACKUP,
0131       HV_VSS_GUID,
0132       .perf_device = false,
0133       .allowed_in_isolated = false,
0134     },
0135
0136     /* Dynamic Memory */
0137     { .dev_type = HV_DM,
0138       HV_DM_GUID,
0139       .perf_device = false,
0140       .allowed_in_isolated = false,
0141     },
0142
0143     /* Unknown GUID */
0144     { .dev_type = HV_UNKNOWN,
0145       .perf_device = false,
0146       .allowed_in_isolated = false,
0147     },
0148 };
0149
0150 static const struct {
0151     guid_t guid;
0152 } vmbus_unsupported_devs[] = {
0153     { HV_AVMA1_GUID },
0154     { HV_AVMA2_GUID },
0155     { HV_RDV_GUID   },
0156     { HV_IMC_GUID   },
0157 };
0158
0159 /*
0160  * The rescinded channel may be blocked waiting for a response from the host;
0161  * take care of that.
0162  */
0163 static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
0164 {
0165     struct vmbus_channel_msginfo *msginfo;
0166     unsigned long flags;
0167
0168
0169     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
0170     channel->rescind = true;
0171     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
0172                 msglistentry) {
0173
0174         if (msginfo->waiting_channel == channel) {
0175             complete(&msginfo->waitevent);
0176             break;
0177         }
0178     }
0179     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
0180 }
0181
0182 static bool is_unsupported_vmbus_devs(const guid_t *guid)
0183 {
0184     int i;
0185
0186     for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
0187         if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
0188             return true;
0189     return false;
0190 }
0191
0192 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
0193 {
0194     const guid_t *guid = &channel->offermsg.offer.if_type;
0195     u16 i;
0196
0197     if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
0198         return HV_UNKNOWN;
0199
0200     for (i = HV_IDE; i < HV_UNKNOWN; i++) {
0201         if (guid_equal(guid, &vmbus_devs[i].guid))
0202             return i;
0203     }
0204     pr_info("Unknown GUID: %pUl\n", guid);
0205     return i;
0206 }
0207
0208 /**
0209  * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
0210  * @icmsghdrp: Pointer to msg header structure
0211  * @buf: Raw buffer channel data
0212  * @buflen: Length of the raw buffer channel data.
0213  * @fw_version: The framework versions we can support.
0214  * @fw_vercnt: The size of @fw_version.
0215  * @srv_version: The service versions we can support.
0216  * @srv_vercnt: The size of @srv_version.
0217  * @nego_fw_version: The selected framework version.
0218  * @nego_srv_version: The selected service version.
0219  *
0220  * Note: Versions are given in decreasing order.
0221  *
0222  * Set up and fill in default negotiate response message.
0223  * Mainly used by Hyper-V drivers.
0224  */
0225 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
0226                 u32 buflen, const int *fw_version, int fw_vercnt,
0227                 const int *srv_version, int srv_vercnt,
0228                 int *nego_fw_version, int *nego_srv_version)
0229 {
0230     int icframe_major, icframe_minor;
0231     int icmsg_major, icmsg_minor;
0232     int fw_major, fw_minor;
0233     int srv_major, srv_minor;
0234     int i, j;
0235     bool found_match = false;
0236     struct icmsg_negotiate *negop;
0237
0238     /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
0239     if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
0240         pr_err_ratelimited("Invalid icmsg negotiate\n");
0241         return false;
0242     }
0243
0244     icmsghdrp->icmsgsize = 0x10;
0245     negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
0246
0247     icframe_major = negop->icframe_vercnt;
0248     icframe_minor = 0;
0249
0250     icmsg_major = negop->icmsg_vercnt;
0251     icmsg_minor = 0;
0252
0253     /* Validate negop packet */
0254     if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
0255         icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
0256         ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
0257         pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
0258                    icframe_major, icmsg_major);
0259         goto fw_error;
0260     }
0261
0262     /*
0263      * Select the framework version number we will
0264      * support.
0265      */
0266
0267     for (i = 0; i < fw_vercnt; i++) {
0268         fw_major = (fw_version[i] >> 16);
0269         fw_minor = (fw_version[i] & 0xFFFF);
0270
0271         for (j = 0; j < negop->icframe_vercnt; j++) {
0272             if ((negop->icversion_data[j].major == fw_major) &&
0273                 (negop->icversion_data[j].minor == fw_minor)) {
0274                 icframe_major = negop->icversion_data[j].major;
0275                 icframe_minor = negop->icversion_data[j].minor;
0276                 found_match = true;
0277                 break;
0278             }
0279         }
0280
0281         if (found_match)
0282             break;
0283     }
0284
0285     if (!found_match)
0286         goto fw_error;
0287
0288     found_match = false;
0289
0290     for (i = 0; i < srv_vercnt; i++) {
0291         srv_major = (srv_version[i] >> 16);
0292         srv_minor = (srv_version[i] & 0xFFFF);
0293
0294         for (j = negop->icframe_vercnt;
0295             (j < negop->icframe_vercnt + negop->icmsg_vercnt);
0296             j++) {
0297
0298             if ((negop->icversion_data[j].major == srv_major) &&
0299                 (negop->icversion_data[j].minor == srv_minor)) {
0300
0301                 icmsg_major = negop->icversion_data[j].major;
0302                 icmsg_minor = negop->icversion_data[j].minor;
0303                 found_match = true;
0304                 break;
0305             }
0306         }
0307
0308         if (found_match)
0309             break;
0310     }
0311
0312     /*
0313      * Respond with the framework and service
0314      * version numbers we can support.
0315      */
0316
0317 fw_error:
0318     if (!found_match) {
0319         negop->icframe_vercnt = 0;
0320         negop->icmsg_vercnt = 0;
0321     } else {
0322         negop->icframe_vercnt = 1;
0323         negop->icmsg_vercnt = 1;
0324     }
0325
0326     if (nego_fw_version)
0327         *nego_fw_version = (icframe_major << 16) | icframe_minor;
0328
0329     if (nego_srv_version)
0330         *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
0331
0332     negop->icversion_data[0].major = icframe_major;
0333     negop->icversion_data[0].minor = icframe_minor;
0334     negop->icversion_data[1].major = icmsg_major;
0335     negop->icversion_data[1].minor = icmsg_minor;
0336     return found_match;
0337 }
0338 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
0339
0340 /*
0341  * alloc_channel - Allocate and initialize a vmbus channel object
0342  */
0343 static struct vmbus_channel *alloc_channel(void)
0344 {
0345     struct vmbus_channel *channel;
0346
0347     channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
0348     if (!channel)
0349         return NULL;
0350
0351     spin_lock_init(&channel->sched_lock);
0352     init_completion(&channel->rescind_event);
0353
0354     INIT_LIST_HEAD(&channel->sc_list);
0355
0356     tasklet_init(&channel->callback_event,
0357              vmbus_on_event, (unsigned long)channel);
0358
0359     hv_ringbuffer_pre_init(channel);
0360
0361     return channel;
0362 }
0363
0364 /*
0365  * free_channel - Release the resources used by the vmbus channel object
0366  */
0367 static void free_channel(struct vmbus_channel *channel)
0368 {
0369     tasklet_kill(&channel->callback_event);
0370     vmbus_remove_channel_attr_group(channel);
0371
0372     kobject_put(&channel->kobj);
0373 }
0374
0375 void vmbus_channel_map_relid(struct vmbus_channel *channel)
0376 {
0377     if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
0378         return;
0379     /*
0380      * The mapping of the channel's relid is visible from the CPUs that
0381      * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
0382      * execute:
0383      *
0384      *  (a) In the "normal (i.e., not resuming from hibernation)" path,
0385      *      the full barrier in virt_store_mb() guarantees that the store
0386      *      is propagated to all CPUs before the add_channel_work work
0387      *      is queued.  In turn, add_channel_work is queued before the
0388      *      channel's ring buffer is allocated/initialized and the
0389      *      OPENCHANNEL message for the channel is sent in vmbus_open().
0390      *      Hyper-V won't start sending the interrupts for the channel
0391      *      before the OPENCHANNEL message is acked.  The memory barrier
0392      *      in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
0393      *      that vmbus_chan_sched() must find the channel's relid in
0394      *      recv_int_page before retrieving the channel pointer from the
0395      *      array of channels.
0396      *
0397      *  (b) In the "resuming from hibernation" path, the virt_store_mb()
0398      *      guarantees that the store is propagated to all CPUs before
0399      *      the VMBus connection is marked as ready for the resume event
0400      *      (cf. check_ready_for_resume_event()).  The interrupt handler
0401      *      of the VMBus driver and vmbus_chan_sched() can not run before
0402      *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
0403      */
0404     virt_store_mb(
0405         vmbus_connection.channels[channel->offermsg.child_relid],
0406         channel);
0407 }
0408
0409 void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
0410 {
0411     if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
0412         return;
0413     WRITE_ONCE(
0414         vmbus_connection.channels[channel->offermsg.child_relid],
0415         NULL);
0416 }
0417
0418 static void vmbus_release_relid(u32 relid)
0419 {
0420     struct vmbus_channel_relid_released msg;
0421     int ret;
0422
0423     memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
0424     msg.child_relid = relid;
0425     msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
0426     ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
0427                  true);
0428
0429     trace_vmbus_release_relid(&msg, ret);
0430 }
0431
0432 void hv_process_channel_removal(struct vmbus_channel *channel)
0433 {
0434     lockdep_assert_held(&vmbus_connection.channel_mutex);
0435     BUG_ON(!channel->rescind);
0436
0437     /*
0438      * hv_process_channel_removal() could find INVALID_RELID only for
0439      * hv_sock channels.  See the inline comments in vmbus_onoffer().
0440      */
0441     WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
0442         !is_hvsock_channel(channel));
0443
0444     /*
0445      * Upon suspend, an in-use hv_sock channel is removed from the array of
0446      * channels and the relid is invalidated.  After hibernation, when the
0447      * user-space application destroys the channel, it's unnecessary and
0448      * unsafe to remove the channel from the array of channels.  See also
0449      * the inline comments before the call of vmbus_release_relid() below.
0450      */
0451     if (channel->offermsg.child_relid != INVALID_RELID)
0452         vmbus_channel_unmap_relid(channel);
0453
0454     if (channel->primary_channel == NULL)
0455         list_del(&channel->listentry);
0456     else
0457         list_del(&channel->sc_list);
0458
0459     /*
0460      * If this is a "perf" channel, updates the hv_numa_map[] masks so that
0461      * init_vp_index() can (re-)use the CPU.
0462      */
0463     if (hv_is_perf_channel(channel))
0464         hv_clear_allocated_cpu(channel->target_cpu);
0465
0466     /*
0467      * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
0468      * the relid is invalidated; after hibernation, when the user-space app
0469      * destroys the channel, the relid is INVALID_RELID, and in this case
0470      * it's unnecessary and unsafe to release the old relid, since the same
0471      * relid can refer to a completely different channel now.
0472      */
0473     if (channel->offermsg.child_relid != INVALID_RELID)
0474         vmbus_release_relid(channel->offermsg.child_relid);
0475
0476     free_channel(channel);
0477 }
0478
0479 void vmbus_free_channels(void)
0480 {
0481     struct vmbus_channel *channel, *tmp;
0482
0483     list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
0484         listentry) {
0485         /* hv_process_channel_removal() needs this */
0486         channel->rescind = true;
0487
0488         vmbus_device_unregister(channel->device_obj);
0489     }
0490 }
0491
0492 /* Note: the function can run concurrently for primary/sub channels. */
0493 static void vmbus_add_channel_work(struct work_struct *work)
0494 {
0495     struct vmbus_channel *newchannel =
0496         container_of(work, struct vmbus_channel, add_channel_work);
0497     struct vmbus_channel *primary_channel = newchannel->primary_channel;
0498     int ret;
0499
0500     /*
0501      * This state is used to indicate a successful open
0502      * so that when we do close the channel normally, we
0503      * can cleanup properly.
0504      */
0505     newchannel->state = CHANNEL_OPEN_STATE;
0506
0507     if (primary_channel != NULL) {
0508         /* newchannel is a sub-channel. */
0509         struct hv_device *dev = primary_channel->device_obj;
0510
0511         if (vmbus_add_channel_kobj(dev, newchannel))
0512             goto err_deq_chan;
0513
0514         if (primary_channel->sc_creation_callback != NULL)
0515             primary_channel->sc_creation_callback(newchannel);
0516
0517         newchannel->probe_done = true;
0518         return;
0519     }
0520
0521     /*
0522      * Start the process of binding the primary channel to the driver
0523      */
0524     newchannel->device_obj = vmbus_device_create(
0525         &newchannel->offermsg.offer.if_type,
0526         &newchannel->offermsg.offer.if_instance,
0527         newchannel);
0528     if (!newchannel->device_obj)
0529         goto err_deq_chan;
0530
0531     newchannel->device_obj->device_id = newchannel->device_id;
0532     /*
0533      * Add the new device to the bus. This will kick off device-driver
0534      * binding which eventually invokes the device driver's AddDevice()
0535      * method.
0536      */
0537     ret = vmbus_device_register(newchannel->device_obj);
0538
0539     if (ret != 0) {
0540         pr_err("unable to add child device object (relid %d)\n",
0541             newchannel->offermsg.child_relid);
0542         kfree(newchannel->device_obj);
0543         goto err_deq_chan;
0544     }
0545
0546     newchannel->probe_done = true;
0547     return;
0548
0549 err_deq_chan:
0550     mutex_lock(&vmbus_connection.channel_mutex);
0551
0552     /*
0553      * We need to set the flag, otherwise
0554      * vmbus_onoffer_rescind() can be blocked.
0555      */
0556     newchannel->probe_done = true;
0557
0558     if (primary_channel == NULL)
0559         list_del(&newchannel->listentry);
0560     else
0561         list_del(&newchannel->sc_list);
0562
0563     /* vmbus_process_offer() has mapped the channel. */
0564     vmbus_channel_unmap_relid(newchannel);
0565
0566     mutex_unlock(&vmbus_connection.channel_mutex);
0567
0568     vmbus_release_relid(newchannel->offermsg.child_relid);
0569
0570     free_channel(newchannel);
0571 }
0572
0573 /*
0574  * vmbus_process_offer - Process the offer by creating a channel/device
0575  * associated with this offer
0576  */
0577 static void vmbus_process_offer(struct vmbus_channel *newchannel)
0578 {
0579     struct vmbus_channel *channel;
0580     struct workqueue_struct *wq;
0581     bool fnew = true;
0582
0583     /*
0584      * Synchronize vmbus_process_offer() and CPU hotplugging:
0585      *
0586      * CPU1             CPU2
0587      *
0588      * [vmbus_process_offer()]  [Hot removal of the CPU]
0589      *
0590      * CPU_READ_LOCK        CPUS_WRITE_LOCK
0591      * LOAD cpu_online_mask     SEARCH chn_list
0592      * STORE target_cpu     LOAD target_cpu
0593      * INSERT chn_list      STORE cpu_online_mask
0594      * CPUS_READ_UNLOCK     CPUS_WRITE_UNLOCK
0595      *
0596      * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
0597      *              CPU2's SEARCH from *not* seeing CPU1's INSERT
0598      *
0599      * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
0600      *              CPU2's LOAD from *not* seing CPU1's STORE
0601      */
0602     cpus_read_lock();
0603
0604     /*
0605      * Serializes the modifications of the chn_list list as well as
0606      * the accesses to next_numa_node_id in init_vp_index().
0607      */
0608     mutex_lock(&vmbus_connection.channel_mutex);
0609
0610     list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
0611         if (guid_equal(&channel->offermsg.offer.if_type,
0612                    &newchannel->offermsg.offer.if_type) &&
0613             guid_equal(&channel->offermsg.offer.if_instance,
0614                    &newchannel->offermsg.offer.if_instance)) {
0615             fnew = false;
0616             newchannel->primary_channel = channel;
0617             break;
0618         }
0619     }
0620
0621     init_vp_index(newchannel);
0622
0623     /* Remember the channels that should be cleaned up upon suspend. */
0624     if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
0625         atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
0626
0627     /*
0628      * Now that we have acquired the channel_mutex,
0629      * we can release the potentially racing rescind thread.
0630      */
0631     atomic_dec(&vmbus_connection.offer_in_progress);
0632
0633     if (fnew) {
0634         list_add_tail(&newchannel->listentry,
0635                   &vmbus_connection.chn_list);
0636     } else {
0637         /*
0638          * Check to see if this is a valid sub-channel.
0639          */
0640         if (newchannel->offermsg.offer.sub_channel_index == 0) {
0641             mutex_unlock(&vmbus_connection.channel_mutex);
0642             cpus_read_unlock();
0643             /*
0644              * Don't call free_channel(), because newchannel->kobj
0645              * is not initialized yet.
0646              */
0647             kfree(newchannel);
0648             WARN_ON_ONCE(1);
0649             return;
0650         }
0651         /*
0652          * Process the sub-channel.
0653          */
0654         list_add_tail(&newchannel->sc_list, &channel->sc_list);
0655     }
0656
0657     vmbus_channel_map_relid(newchannel);
0658
0659     mutex_unlock(&vmbus_connection.channel_mutex);
0660     cpus_read_unlock();
0661
0662     /*
0663      * vmbus_process_offer() mustn't call channel->sc_creation_callback()
0664      * directly for sub-channels, because sc_creation_callback() ->
0665      * vmbus_open() may never get the host's response to the
0666      * OPEN_CHANNEL message (the host may rescind a channel at any time,
0667      * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
0668      * may not wake up the vmbus_open() as it's blocked due to a non-zero
0669      * vmbus_connection.offer_in_progress, and finally we have a deadlock.
0670      *
0671      * The above is also true for primary channels, if the related device
0672      * drivers use sync probing mode by default.
0673      *
0674      * And, usually the handling of primary channels and sub-channels can
0675      * depend on each other, so we should offload them to different
0676      * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
0677      * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
0678      * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
0679      * and waits for all the sub-channels to appear, but the latter
0680      * can't get the rtnl_lock and this blocks the handling of
0681      * sub-channels.
0682      */
0683     INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
0684     wq = fnew ? vmbus_connection.handle_primary_chan_wq :
0685             vmbus_connection.handle_sub_chan_wq;
0686     queue_work(wq, &newchannel->add_channel_work);
0687 }
0688
0689 /*
0690  * Check if CPUs used by other channels of the same device.
0691  * It should only be called by init_vp_index().
0692  */
0693 static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
0694 {
0695     struct vmbus_channel *primary = chn->primary_channel;
0696     struct vmbus_channel *sc;
0697
0698     lockdep_assert_held(&vmbus_connection.channel_mutex);
0699
0700     if (!primary)
0701         return false;
0702
0703     if (primary->target_cpu == cpu)
0704         return true;
0705
0706     list_for_each_entry(sc, &primary->sc_list, sc_list)
0707         if (sc != chn && sc->target_cpu == cpu)
0708             return true;
0709
0710     return false;
0711 }
0712
0713 /*
0714  * We use this state to statically distribute the channel interrupt load.
0715  */
0716 static int next_numa_node_id;
0717
0718 /*
0719  * We can statically distribute the incoming channel interrupt load
0720  * by binding a channel to VCPU.
0721  *
0722  * For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
0723  * Performance critical channels will be distributed evenly among all
0724  * the available NUMA nodes.  Once the node is assigned, we will assign
0725  * the CPU based on a simple round robin scheme.
0726  */
0727 static void init_vp_index(struct vmbus_channel *channel)
0728 {
0729     bool perf_chn = hv_is_perf_channel(channel);
0730     u32 i, ncpu = num_online_cpus();
0731     cpumask_var_t available_mask;
0732     struct cpumask *allocated_mask;
0733     const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
0734     u32 target_cpu;
0735     int numa_node;
0736
0737     if (!perf_chn ||
0738         !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
0739         cpumask_empty(hk_mask)) {
0740         /*
0741          * If the channel is not a performance critical
0742          * channel, bind it to VMBUS_CONNECT_CPU.
0743          * In case alloc_cpumask_var() fails, bind it to
0744          * VMBUS_CONNECT_CPU.
0745          * If all the cpus are isolated, bind it to
0746          * VMBUS_CONNECT_CPU.
0747          */
0748         channel->target_cpu = VMBUS_CONNECT_CPU;
0749         if (perf_chn)
0750             hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
0751         return;
0752     }
0753
0754     for (i = 1; i <= ncpu + 1; i++) {
0755         while (true) {
0756             numa_node = next_numa_node_id++;
0757             if (numa_node == nr_node_ids) {
0758                 next_numa_node_id = 0;
0759                 continue;
0760             }
0761             if (cpumask_empty(cpumask_of_node(numa_node)))
0762                 continue;
0763             break;
0764         }
0765         allocated_mask = &hv_context.hv_numa_map[numa_node];
0766
0767 retry:
0768         cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
0769         cpumask_and(available_mask, available_mask, hk_mask);
0770
0771         if (cpumask_empty(available_mask)) {
0772             /*
0773              * We have cycled through all the CPUs in the node;
0774              * reset the allocated map.
0775              */
0776             cpumask_clear(allocated_mask);
0777             goto retry;
0778         }
0779
0780         target_cpu = cpumask_first(available_mask);
0781         cpumask_set_cpu(target_cpu, allocated_mask);
0782
0783         if (channel->offermsg.offer.sub_channel_index >= ncpu ||
0784             i > ncpu || !hv_cpuself_used(target_cpu, channel))
0785             break;
0786     }
0787
0788     channel->target_cpu = target_cpu;
0789
0790     free_cpumask_var(available_mask);
0791 }
0792
0793 #define UNLOAD_DELAY_UNIT_MS    10      /* 10 milliseconds */
0794 #define UNLOAD_WAIT_MS      (100*1000)  /* 100 seconds */
0795 #define UNLOAD_WAIT_LOOPS   (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
0796 #define UNLOAD_MSG_MS       (5*1000)    /* Every 5 seconds */
0797 #define UNLOAD_MSG_LOOPS    (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
0798
0799 static void vmbus_wait_for_unload(void)
0800 {
0801     int cpu;
0802     void *page_addr;
0803     struct hv_message *msg;
0804     struct vmbus_channel_message_header *hdr;
0805     u32 message_type, i;
0806
0807     /*
0808      * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
0809      * used for initial contact or to CPU0 depending on host version. When
0810      * we're crashing on a different CPU let's hope that IRQ handler on
0811      * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
0812      * functional and vmbus_unload_response() will complete
0813      * vmbus_connection.unload_event. If not, the last thing we can do is
0814      * read message pages for all CPUs directly.
0815      *
0816      * Wait up to 100 seconds since an Azure host must writeback any dirty
0817      * data in its disk cache before the VMbus UNLOAD request will
0818      * complete. This flushing has been empirically observed to take up
0819      * to 50 seconds in cases with a lot of dirty data, so allow additional
0820      * leeway and for inaccuracies in mdelay(). But eventually time out so
0821      * that the panic path can't get hung forever in case the response
0822      * message isn't seen.
0823      */
0824     for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
0825         if (completion_done(&vmbus_connection.unload_event))
0826             goto completed;
0827
0828         for_each_online_cpu(cpu) {
0829             struct hv_per_cpu_context *hv_cpu
0830                 = per_cpu_ptr(hv_context.cpu_context, cpu);
0831
0832             page_addr = hv_cpu->synic_message_page;
0833             msg = (struct hv_message *)page_addr
0834                 + VMBUS_MESSAGE_SINT;
0835
0836             message_type = READ_ONCE(msg->header.message_type);
0837             if (message_type == HVMSG_NONE)
0838                 continue;
0839
0840             hdr = (struct vmbus_channel_message_header *)
0841                 msg->u.payload;
0842
0843             if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
0844                 complete(&vmbus_connection.unload_event);
0845
0846             vmbus_signal_eom(msg, message_type);
0847         }
0848
0849         /*
0850          * Give a notice periodically so someone watching the
0851          * serial output won't think it is completely hung.
0852          */
0853         if (!(i % UNLOAD_MSG_LOOPS))
0854             pr_notice("Waiting for VMBus UNLOAD to complete\n");
0855
0856         mdelay(UNLOAD_DELAY_UNIT_MS);
0857     }
0858     pr_err("Continuing even though VMBus UNLOAD did not complete\n");
0859
0860 completed:
0861     /*
0862      * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
0863      * maybe-pending messages on all CPUs to be able to receive new
0864      * messages after we reconnect.
0865      */
0866     for_each_online_cpu(cpu) {
0867         struct hv_per_cpu_context *hv_cpu
0868             = per_cpu_ptr(hv_context.cpu_context, cpu);
0869
0870         page_addr = hv_cpu->synic_message_page;
0871         msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
0872         msg->header.message_type = HVMSG_NONE;
0873     }
0874 }
0875
0876 /*
0877  * vmbus_unload_response - Handler for the unload response.
0878  */
0879 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
0880 {
0881     /*
0882      * This is a global event; just wakeup the waiting thread.
0883      * Once we successfully unload, we can cleanup the monitor state.
0884      *
0885      * NB.  A malicious or compromised Hyper-V could send a spurious
0886      * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
0887      * of the complete() below.  Make sure that unload_event has been
0888      * initialized by the time this complete() is executed.
0889      */
0890     complete(&vmbus_connection.unload_event);
0891 }
0892
0893 void vmbus_initiate_unload(bool crash)
0894 {
0895     struct vmbus_channel_message_header hdr;
0896
0897     if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
0898         return;
0899
0900     /* Pre-Win2012R2 hosts don't support reconnect */
0901     if (vmbus_proto_version < VERSION_WIN8_1)
0902         return;
0903
0904     reinit_completion(&vmbus_connection.unload_event);
0905     memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
0906     hdr.msgtype = CHANNELMSG_UNLOAD;
0907     vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
0908                !crash);
0909
0910     /*
0911      * vmbus_initiate_unload() is also called on crash and the crash can be
0912      * happening in an interrupt context, where scheduling is impossible.
0913      */
0914     if (!crash)
0915         wait_for_completion(&vmbus_connection.unload_event);
0916     else
0917         vmbus_wait_for_unload();
0918 }
0919
0920 static void check_ready_for_resume_event(void)
0921 {
0922     /*
0923      * If all the old primary channels have been fixed up, then it's safe
0924      * to resume.
0925      */
0926     if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
0927         complete(&vmbus_connection.ready_for_resume_event);
0928 }
0929
0930 static void vmbus_setup_channel_state(struct vmbus_channel *channel,
0931                       struct vmbus_channel_offer_channel *offer)
0932 {
0933     /*
0934      * Setup state for signalling the host.
0935      */
0936     channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
0937
0938     channel->is_dedicated_interrupt =
0939             (offer->is_dedicated_interrupt != 0);
0940     channel->sig_event = offer->connection_id;
0941
0942     memcpy(&channel->offermsg, offer,
0943            sizeof(struct vmbus_channel_offer_channel));
0944     channel->monitor_grp = (u8)offer->monitorid / 32;
0945     channel->monitor_bit = (u8)offer->monitorid % 32;
0946     channel->device_id = hv_get_dev_type(channel);
0947 }
0948
0949 /*
0950  * find_primary_channel_by_offer - Get the channel object given the new offer.
0951  * This is only used in the resume path of hibernation.
0952  */
0953 static struct vmbus_channel *
0954 find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
0955 {
0956     struct vmbus_channel *channel = NULL, *iter;
0957     const guid_t *inst1, *inst2;
0958
0959     /* Ignore sub-channel offers. */
0960     if (offer->offer.sub_channel_index != 0)
0961         return NULL;
0962
0963     mutex_lock(&vmbus_connection.channel_mutex);
0964
0965     list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
0966         inst1 = &iter->offermsg.offer.if_instance;
0967         inst2 = &offer->offer.if_instance;
0968
0969         if (guid_equal(inst1, inst2)) {
0970             channel = iter;
0971             break;
0972         }
0973     }
0974
0975     mutex_unlock(&vmbus_connection.channel_mutex);
0976
0977     return channel;
0978 }
0979
0980 static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
0981 {
0982     const guid_t *guid = &offer->offer.if_type;
0983     u16 i;
0984
0985     if (!hv_is_isolation_supported())
0986         return true;
0987
0988     if (is_hvsock_offer(offer))
0989         return true;
0990
0991     for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
0992         if (guid_equal(guid, &vmbus_devs[i].guid))
0993             return vmbus_devs[i].allowed_in_isolated;
0994     }
0995     return false;
0996 }
0997
0998 /*
0999  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
1000  *
1001  */
1002 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
1003 {
1004     struct vmbus_channel_offer_channel *offer;
1005     struct vmbus_channel *oldchannel, *newchannel;
1006     size_t offer_sz;
1007
1008     offer = (struct vmbus_channel_offer_channel *)hdr;
1009
1010     trace_vmbus_onoffer(offer);
1011
1012     if (!vmbus_is_valid_offer(offer)) {
1013         pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
1014                    offer->child_relid);
1015         atomic_dec(&vmbus_connection.offer_in_progress);
1016         return;
1017     }
1018
1019     oldchannel = find_primary_channel_by_offer(offer);
1020
1021     if (oldchannel != NULL) {
1022         /*
1023          * We're resuming from hibernation: all the sub-channel and
1024          * hv_sock channels we had before the hibernation should have
1025          * been cleaned up, and now we must be seeing a re-offered
1026          * primary channel that we had before the hibernation.
1027          */
1028
1029         /*
1030          * { Initially: channel relid = INVALID_RELID,
1031          *      channels[valid_relid] = NULL }
1032          *
1033          * CPU1                 CPU2
1034          *
1035          * [vmbus_onoffer()]            [vmbus_device_release()]
1036          *
1037          * LOCK channel_mutex           LOCK channel_mutex
1038          * STORE channel relid = valid_relid    LOAD r1 = channel relid
1039          * MAP_RELID channel            if (r1 != INVALID_RELID)
1040          * UNLOCK channel_mutex           UNMAP_RELID channel
1041          *                  UNLOCK channel_mutex
1042          *
1043          * Forbids: r1 == valid_relid &&
1044          *              channels[valid_relid] == channel
1045          *
1046          * Note.  r1 can be INVALID_RELID only for an hv_sock channel.
1047          * None of the hv_sock channels which were present before the
1048          * suspend are re-offered upon the resume.  See the WARN_ON()
1049          * in hv_process_channel_removal().
1050          */
1051         mutex_lock(&vmbus_connection.channel_mutex);
1052
1053         atomic_dec(&vmbus_connection.offer_in_progress);
1054
1055         WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
1056         /* Fix up the relid. */
1057         oldchannel->offermsg.child_relid = offer->child_relid;
1058
1059         offer_sz = sizeof(*offer);
1060         if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
1061             /*
1062              * This is not an error, since the host can also change
1063              * the other field(s) of the offer, e.g. on WS RS5
1064              * (Build 17763), the offer->connection_id of the
1065              * Mellanox VF vmbus device can change when the host
1066              * reoffers the device upon resume.
1067              */
1068             pr_debug("vmbus offer changed: relid=%d\n",
1069                  offer->child_relid);
1070
1071             print_hex_dump_debug("Old vmbus offer: ",
1072                          DUMP_PREFIX_OFFSET, 16, 4,
1073                          &oldchannel->offermsg, offer_sz,
1074                          false);
1075             print_hex_dump_debug("New vmbus offer: ",
1076                          DUMP_PREFIX_OFFSET, 16, 4,
1077                          offer, offer_sz, false);
1078
1079             /* Fix up the old channel. */
1080             vmbus_setup_channel_state(oldchannel, offer);
1081         }
1082
1083         /* Add the channel back to the array of channels. */
1084         vmbus_channel_map_relid(oldchannel);
1085         check_ready_for_resume_event();
1086
1087         mutex_unlock(&vmbus_connection.channel_mutex);
1088         return;
1089     }
1090
1091     /* Allocate the channel object and save this offer. */
1092     newchannel = alloc_channel();
1093     if (!newchannel) {
1094         vmbus_release_relid(offer->child_relid);
1095         atomic_dec(&vmbus_connection.offer_in_progress);
1096         pr_err("Unable to allocate channel object\n");
1097         return;
1098     }
1099
1100     vmbus_setup_channel_state(newchannel, offer);
1101
1102     vmbus_process_offer(newchannel);
1103 }
1104
1105 static void check_ready_for_suspend_event(void)
1106 {
1107     /*
1108      * If all the sub-channels or hv_sock channels have been cleaned up,
1109      * then it's safe to suspend.
1110      */
1111     if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
1112         complete(&vmbus_connection.ready_for_suspend_event);
1113 }
1114
1115 /*
1116  * vmbus_onoffer_rescind - Rescind offer handler.
1117  *
1118  * We queue a work item to process this offer synchronously
1119  */
1120 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1121 {
1122     struct vmbus_channel_rescind_offer *rescind;
1123     struct vmbus_channel *channel;
1124     struct device *dev;
1125     bool clean_up_chan_for_suspend;
1126
1127     rescind = (struct vmbus_channel_rescind_offer *)hdr;
1128
1129     trace_vmbus_onoffer_rescind(rescind);
1130
1131     /*
1132      * The offer msg and the corresponding rescind msg
1133      * from the host are guranteed to be ordered -
1134      * offer comes in first and then the rescind.
1135      * Since we process these events in work elements,
1136      * and with preemption, we may end up processing
1137      * the events out of order.  We rely on the synchronization
1138      * provided by offer_in_progress and by channel_mutex for
1139      * ordering these events:
1140      *
1141      * { Initially: offer_in_progress = 1 }
1142      *
1143      * CPU1             CPU2
1144      *
1145      * [vmbus_onoffer()]        [vmbus_onoffer_rescind()]
1146      *
1147      * LOCK channel_mutex       WAIT_ON offer_in_progress == 0
1148      * DECREMENT offer_in_progress  LOCK channel_mutex
1149      * STORE channels[]     LOAD channels[]
1150      * UNLOCK channel_mutex     UNLOCK channel_mutex
1151      *
1152      * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1153      */
1154
1155     while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
1156         /*
1157          * We wait here until any channel offer is currently
1158          * being processed.
1159          */
1160         msleep(1);
1161     }
1162
1163     mutex_lock(&vmbus_connection.channel_mutex);
1164     channel = relid2channel(rescind->child_relid);
1165     if (channel != NULL) {
1166         /*
1167          * Guarantee that no other instance of vmbus_onoffer_rescind()
1168          * has got a reference to the channel object.  Synchronize on
1169          * &vmbus_connection.channel_mutex.
1170          */
1171         if (channel->rescind_ref) {
1172             mutex_unlock(&vmbus_connection.channel_mutex);
1173             return;
1174         }
1175         channel->rescind_ref = true;
1176     }
1177     mutex_unlock(&vmbus_connection.channel_mutex);
1178
1179     if (channel == NULL) {
1180         /*
1181          * We failed in processing the offer message;
1182          * we would have cleaned up the relid in that
1183          * failure path.
1184          */
1185         return;
1186     }
1187
1188     clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
1189                     is_sub_channel(channel);
1190     /*
1191      * Before setting channel->rescind in vmbus_rescind_cleanup(), we
1192      * should make sure the channel callback is not running any more.
1193      */
1194     vmbus_reset_channel_cb(channel);
1195
1196     /*
1197      * Now wait for offer handling to complete.
1198      */
1199     vmbus_rescind_cleanup(channel);
1200     while (READ_ONCE(channel->probe_done) == false) {
1201         /*
1202          * We wait here until any channel offer is currently
1203          * being processed.
1204          */
1205         msleep(1);
1206     }
1207
1208     /*
1209      * At this point, the rescind handling can proceed safely.
1210      */
1211
1212     if (channel->device_obj) {
1213         if (channel->chn_rescind_callback) {
1214             channel->chn_rescind_callback(channel);
1215
1216             if (clean_up_chan_for_suspend)
1217                 check_ready_for_suspend_event();
1218
1219             return;
1220         }
1221         /*
1222          * We will have to unregister this device from the
1223          * driver core.
1224          */
1225         dev = get_device(&channel->device_obj->device);
1226         if (dev) {
1227             vmbus_device_unregister(channel->device_obj);
1228             put_device(dev);
1229         }
1230     } else if (channel->primary_channel != NULL) {
1231         /*
1232          * Sub-channel is being rescinded. Following is the channel
1233          * close sequence when initiated from the driveri (refer to
1234          * vmbus_close() for details):
1235          * 1. Close all sub-channels first
1236          * 2. Then close the primary channel.
1237          */
1238         mutex_lock(&vmbus_connection.channel_mutex);
1239         if (channel->state == CHANNEL_OPEN_STATE) {
1240             /*
1241              * The channel is currently not open;
1242              * it is safe for us to cleanup the channel.
1243              */
1244             hv_process_channel_removal(channel);
1245         } else {
1246             complete(&channel->rescind_event);
1247         }
1248         mutex_unlock(&vmbus_connection.channel_mutex);
1249     }
1250
1251     /* The "channel" may have been freed. Do not access it any longer. */
1252
1253     if (clean_up_chan_for_suspend)
1254         check_ready_for_suspend_event();
1255 }
1256
1257 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1258 {
1259     BUG_ON(!is_hvsock_channel(channel));
1260
1261     /* We always get a rescind msg when a connection is closed. */
1262     while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
1263         msleep(1);
1264
1265     vmbus_device_unregister(channel->device_obj);
1266 }
1267 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1268
1269
1270 /*
1271  * vmbus_onoffers_delivered -
1272  * This is invoked when all offers have been delivered.
1273  *
1274  * Nothing to do here.
1275  */
1276 static void vmbus_onoffers_delivered(
1277             struct vmbus_channel_message_header *hdr)
1278 {
1279 }
1280
1281 /*
1282  * vmbus_onopen_result - Open result handler.
1283  *
1284  * This is invoked when we received a response to our channel open request.
1285  * Find the matching request, copy the response and signal the requesting
1286  * thread.
1287  */
1288 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1289 {
1290     struct vmbus_channel_open_result *result;
1291     struct vmbus_channel_msginfo *msginfo;
1292     struct vmbus_channel_message_header *requestheader;
1293     struct vmbus_channel_open_channel *openmsg;
1294     unsigned long flags;
1295
1296     result = (struct vmbus_channel_open_result *)hdr;
1297
1298     trace_vmbus_onopen_result(result);
1299
1300     /*
1301      * Find the open msg, copy the result and signal/unblock the wait event
1302      */
1303     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1304
1305     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1306                 msglistentry) {
1307         requestheader =
1308             (struct vmbus_channel_message_header *)msginfo->msg;
1309
1310         if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1311             openmsg =
1312             (struct vmbus_channel_open_channel *)msginfo->msg;
1313             if (openmsg->child_relid == result->child_relid &&
1314                 openmsg->openid == result->openid) {
1315                 memcpy(&msginfo->response.open_result,
1316                        result,
1317                        sizeof(
1318                     struct vmbus_channel_open_result));
1319                 complete(&msginfo->waitevent);
1320                 break;
1321             }
1322         }
1323     }
1324     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1325 }
1326
1327 /*
1328  * vmbus_ongpadl_created - GPADL created handler.
1329  *
1330  * This is invoked when we received a response to our gpadl create request.
1331  * Find the matching request, copy the response and signal the requesting
1332  * thread.
1333  */
1334 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1335 {
1336     struct vmbus_channel_gpadl_created *gpadlcreated;
1337     struct vmbus_channel_msginfo *msginfo;
1338     struct vmbus_channel_message_header *requestheader;
1339     struct vmbus_channel_gpadl_header *gpadlheader;
1340     unsigned long flags;
1341
1342     gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1343
1344     trace_vmbus_ongpadl_created(gpadlcreated);
1345
1346     /*
1347      * Find the establish msg, copy the result and signal/unblock the wait
1348      * event
1349      */
1350     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1351
1352     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1353                 msglistentry) {
1354         requestheader =
1355             (struct vmbus_channel_message_header *)msginfo->msg;
1356
1357         if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1358             gpadlheader =
1359             (struct vmbus_channel_gpadl_header *)requestheader;
1360
1361             if ((gpadlcreated->child_relid ==
1362                  gpadlheader->child_relid) &&
1363                 (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1364                 memcpy(&msginfo->response.gpadl_created,
1365                        gpadlcreated,
1366                        sizeof(
1367                     struct vmbus_channel_gpadl_created));
1368                 complete(&msginfo->waitevent);
1369                 break;
1370             }
1371         }
1372     }
1373     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1374 }
1375
1376 /*
1377  * vmbus_onmodifychannel_response - Modify Channel response handler.
1378  *
1379  * This is invoked when we received a response to our channel modify request.
1380  * Find the matching request, copy the response and signal the requesting thread.
1381  */
1382 static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
1383 {
1384     struct vmbus_channel_modifychannel_response *response;
1385     struct vmbus_channel_msginfo *msginfo;
1386     unsigned long flags;
1387
1388     response = (struct vmbus_channel_modifychannel_response *)hdr;
1389
1390     trace_vmbus_onmodifychannel_response(response);
1391
1392     /*
1393      * Find the modify msg, copy the response and signal/unblock the wait event.
1394      */
1395     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1396
1397     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
1398         struct vmbus_channel_message_header *responseheader =
1399                 (struct vmbus_channel_message_header *)msginfo->msg;
1400
1401         if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
1402             struct vmbus_channel_modifychannel *modifymsg;
1403
1404             modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
1405             if (modifymsg->child_relid == response->child_relid) {
1406                 memcpy(&msginfo->response.modify_response, response,
1407                        sizeof(*response));
1408                 complete(&msginfo->waitevent);
1409                 break;
1410             }
1411         }
1412     }
1413     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1414 }
1415
1416 /*
1417  * vmbus_ongpadl_torndown - GPADL torndown handler.
1418  *
1419  * This is invoked when we received a response to our gpadl teardown request.
1420  * Find the matching request, copy the response and signal the requesting
1421  * thread.
1422  */
1423 static void vmbus_ongpadl_torndown(
1424             struct vmbus_channel_message_header *hdr)
1425 {
1426     struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1427     struct vmbus_channel_msginfo *msginfo;
1428     struct vmbus_channel_message_header *requestheader;
1429     struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1430     unsigned long flags;
1431
1432     gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1433
1434     trace_vmbus_ongpadl_torndown(gpadl_torndown);
1435
1436     /*
1437      * Find the open msg, copy the result and signal/unblock the wait event
1438      */
1439     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1440
1441     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1442                 msglistentry) {
1443         requestheader =
1444             (struct vmbus_channel_message_header *)msginfo->msg;
1445
1446         if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1447             gpadl_teardown =
1448             (struct vmbus_channel_gpadl_teardown *)requestheader;
1449
1450             if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1451                 memcpy(&msginfo->response.gpadl_torndown,
1452                        gpadl_torndown,
1453                        sizeof(
1454                     struct vmbus_channel_gpadl_torndown));
1455                 complete(&msginfo->waitevent);
1456                 break;
1457             }
1458         }
1459     }
1460     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1461 }
1462
1463 /*
1464  * vmbus_onversion_response - Version response handler
1465  *
1466  * This is invoked when we received a response to our initiate contact request.
1467  * Find the matching request, copy the response and signal the requesting
1468  * thread.
1469  */
1470 static void vmbus_onversion_response(
1471         struct vmbus_channel_message_header *hdr)
1472 {
1473     struct vmbus_channel_msginfo *msginfo;
1474     struct vmbus_channel_message_header *requestheader;
1475     struct vmbus_channel_version_response *version_response;
1476     unsigned long flags;
1477
1478     version_response = (struct vmbus_channel_version_response *)hdr;
1479
1480     trace_vmbus_onversion_response(version_response);
1481
1482     spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1483
1484     list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1485                 msglistentry) {
1486         requestheader =
1487             (struct vmbus_channel_message_header *)msginfo->msg;
1488
1489         if (requestheader->msgtype ==
1490             CHANNELMSG_INITIATE_CONTACT) {
1491             memcpy(&msginfo->response.version_response,
1492                   version_response,
1493                   sizeof(struct vmbus_channel_version_response));
1494             complete(&msginfo->waitevent);
1495         }
1496     }
1497     spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1498 }
1499
1500 /* Channel message dispatch table */
1501 const struct vmbus_channel_message_table_entry
1502 channel_message_table[CHANNELMSG_COUNT] = {
1503     { CHANNELMSG_INVALID,           0, NULL, 0},
1504     { CHANNELMSG_OFFERCHANNEL,      0, vmbus_onoffer,
1505         sizeof(struct vmbus_channel_offer_channel)},
1506     { CHANNELMSG_RESCIND_CHANNELOFFER,  0, vmbus_onoffer_rescind,
1507         sizeof(struct vmbus_channel_rescind_offer) },
1508     { CHANNELMSG_REQUESTOFFERS,     0, NULL, 0},
1509     { CHANNELMSG_ALLOFFERS_DELIVERED,   1, vmbus_onoffers_delivered, 0},
1510     { CHANNELMSG_OPENCHANNEL,       0, NULL, 0},
1511     { CHANNELMSG_OPENCHANNEL_RESULT,    1, vmbus_onopen_result,
1512         sizeof(struct vmbus_channel_open_result)},
1513     { CHANNELMSG_CLOSECHANNEL,      0, NULL, 0},
1514     { CHANNELMSG_GPADL_HEADER,      0, NULL, 0},
1515     { CHANNELMSG_GPADL_BODY,        0, NULL, 0},
1516     { CHANNELMSG_GPADL_CREATED,     1, vmbus_ongpadl_created,
1517         sizeof(struct vmbus_channel_gpadl_created)},
1518     { CHANNELMSG_GPADL_TEARDOWN,        0, NULL, 0},
1519     { CHANNELMSG_GPADL_TORNDOWN,        1, vmbus_ongpadl_torndown,
1520         sizeof(struct vmbus_channel_gpadl_torndown) },
1521     { CHANNELMSG_RELID_RELEASED,        0, NULL, 0},
1522     { CHANNELMSG_INITIATE_CONTACT,      0, NULL, 0},
1523     { CHANNELMSG_VERSION_RESPONSE,      1, vmbus_onversion_response,
1524         sizeof(struct vmbus_channel_version_response)},
1525     { CHANNELMSG_UNLOAD,            0, NULL, 0},
1526     { CHANNELMSG_UNLOAD_RESPONSE,       1, vmbus_unload_response, 0},
1527     { CHANNELMSG_18,            0, NULL, 0},
1528     { CHANNELMSG_19,            0, NULL, 0},
1529     { CHANNELMSG_20,            0, NULL, 0},
1530     { CHANNELMSG_TL_CONNECT_REQUEST,    0, NULL, 0},
1531     { CHANNELMSG_MODIFYCHANNEL,     0, NULL, 0},
1532     { CHANNELMSG_TL_CONNECT_RESULT,     0, NULL, 0},
1533     { CHANNELMSG_MODIFYCHANNEL_RESPONSE,    1, vmbus_onmodifychannel_response,
1534         sizeof(struct vmbus_channel_modifychannel_response)},
1535 };
1536
1537 /*
1538  * vmbus_onmessage - Handler for channel protocol messages.
1539  *
1540  * This is invoked in the vmbus worker thread context.
1541  */
1542 void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
1543 {
1544     trace_vmbus_on_message(hdr);
1545
1546     /*
1547      * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1548      * out of bound and the message_handler pointer can not be NULL.
1549      */
1550     channel_message_table[hdr->msgtype].message_handler(hdr);
1551 }
1552
1553 /*
1554  * vmbus_request_offers - Send a request to get all our pending offers.
1555  */
1556 int vmbus_request_offers(void)
1557 {
1558     struct vmbus_channel_message_header *msg;
1559     struct vmbus_channel_msginfo *msginfo;
1560     int ret;
1561
1562     msginfo = kzalloc(sizeof(*msginfo) +
1563               sizeof(struct vmbus_channel_message_header),
1564               GFP_KERNEL);
1565     if (!msginfo)
1566         return -ENOMEM;
1567
1568     msg = (struct vmbus_channel_message_header *)msginfo->msg;
1569
1570     msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1571
1572     ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1573                  true);
1574
1575     trace_vmbus_request_offers(ret);
1576
1577     if (ret != 0) {
1578         pr_err("Unable to request offers - %d\n", ret);
1579
1580         goto cleanup;
1581     }
1582
1583 cleanup:
1584     kfree(msginfo);
1585
1586     return ret;
1587 }
1588
1589 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1590                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1591 {
1592     primary_channel->sc_creation_callback = sc_cr_cb;
1593 }
1594 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1595
1596 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1597         void (*chn_rescind_cb)(struct vmbus_channel *))
1598 {
1599     channel->chn_rescind_callback = chn_rescind_cb;
1600 }
1601 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);