0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0012
0013 #include <linux/kernel.h>
0014 #include <linux/mm.h>
0015 #include <linux/hyperv.h>
0016 #include <linux/uio.h>
0017 #include <linux/vmalloc.h>
0018 #include <linux/slab.h>
0019 #include <linux/prefetch.h>
0020 #include <linux/io.h>
0021 #include <asm/mshyperv.h>
0022
0023 #include "hyperv_vmbus.h"
0024
0025 #define VMBUS_PKT_TRAILER 8
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
0053 {
0054 struct hv_ring_buffer_info *rbi = &channel->outbound;
0055
0056 virt_mb();
0057 if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
0058 return;
0059
0060
0061 virt_rmb();
0062
0063
0064
0065
0066 if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
0067 ++channel->intr_out_empty;
0068 vmbus_setevent(channel);
0069 }
0070 }
0071
0072
0073 static inline u32
0074 hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
0075 {
0076 u32 next = ring_info->ring_buffer->write_index;
0077
0078 return next;
0079 }
0080
0081
0082 static inline void
0083 hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
0084 u32 next_write_location)
0085 {
0086 ring_info->ring_buffer->write_index = next_write_location;
0087 }
0088
0089
0090 static inline u32
0091 hv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
0092 {
0093 return ring_info->ring_datasize;
0094 }
0095
0096
0097 static inline u64
0098 hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
0099 {
0100 return (u64)ring_info->ring_buffer->write_index << 32;
0101 }
0102
0103
0104
0105
0106
0107 static u32 hv_copyto_ringbuffer(
0108 struct hv_ring_buffer_info *ring_info,
0109 u32 start_write_offset,
0110 const void *src,
0111 u32 srclen)
0112 {
0113 void *ring_buffer = hv_get_ring_buffer(ring_info);
0114 u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
0115
0116 memcpy(ring_buffer + start_write_offset, src, srclen);
0117
0118 start_write_offset += srclen;
0119 if (start_write_offset >= ring_buffer_size)
0120 start_write_offset -= ring_buffer_size;
0121
0122 return start_write_offset;
0123 }
0124
0125
0126
0127
0128
0129
0130
0131
0132 static void
0133 hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
0134 u32 *read, u32 *write)
0135 {
0136 u32 read_loc, write_loc, dsize;
0137
0138
0139 read_loc = READ_ONCE(rbi->ring_buffer->read_index);
0140 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
0141 dsize = rbi->ring_datasize;
0142
0143 *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
0144 read_loc - write_loc;
0145 *read = dsize - *write;
0146 }
0147
0148
0149 int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
0150 struct hv_ring_buffer_debug_info *debug_info)
0151 {
0152 u32 bytes_avail_towrite;
0153 u32 bytes_avail_toread;
0154
0155 mutex_lock(&ring_info->ring_buffer_mutex);
0156
0157 if (!ring_info->ring_buffer) {
0158 mutex_unlock(&ring_info->ring_buffer_mutex);
0159 return -EINVAL;
0160 }
0161
0162 hv_get_ringbuffer_availbytes(ring_info,
0163 &bytes_avail_toread,
0164 &bytes_avail_towrite);
0165 debug_info->bytes_avail_toread = bytes_avail_toread;
0166 debug_info->bytes_avail_towrite = bytes_avail_towrite;
0167 debug_info->current_read_index = ring_info->ring_buffer->read_index;
0168 debug_info->current_write_index = ring_info->ring_buffer->write_index;
0169 debug_info->current_interrupt_mask
0170 = ring_info->ring_buffer->interrupt_mask;
0171 mutex_unlock(&ring_info->ring_buffer_mutex);
0172
0173 return 0;
0174 }
0175 EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
0176
0177
0178 void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
0179 {
0180 mutex_init(&channel->inbound.ring_buffer_mutex);
0181 mutex_init(&channel->outbound.ring_buffer_mutex);
0182 }
0183
0184
0185 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
0186 struct page *pages, u32 page_cnt, u32 max_pkt_size)
0187 {
0188 struct page **pages_wraparound;
0189 unsigned long *pfns_wraparound;
0190 u64 pfn;
0191 int i;
0192
0193 BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
0194
0195
0196
0197
0198
0199 if (hv_isolation_type_snp()) {
0200 pfn = page_to_pfn(pages) +
0201 PFN_DOWN(ms_hyperv.shared_gpa_boundary);
0202
0203 pfns_wraparound = kcalloc(page_cnt * 2 - 1,
0204 sizeof(unsigned long), GFP_KERNEL);
0205 if (!pfns_wraparound)
0206 return -ENOMEM;
0207
0208 pfns_wraparound[0] = pfn;
0209 for (i = 0; i < 2 * (page_cnt - 1); i++)
0210 pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1;
0211
0212 ring_info->ring_buffer = (struct hv_ring_buffer *)
0213 vmap_pfn(pfns_wraparound, page_cnt * 2 - 1,
0214 PAGE_KERNEL);
0215 kfree(pfns_wraparound);
0216
0217 if (!ring_info->ring_buffer)
0218 return -ENOMEM;
0219
0220
0221 memset(ring_info->ring_buffer, 0x00, PAGE_SIZE * page_cnt);
0222 } else {
0223 pages_wraparound = kcalloc(page_cnt * 2 - 1,
0224 sizeof(struct page *),
0225 GFP_KERNEL);
0226 if (!pages_wraparound)
0227 return -ENOMEM;
0228
0229 pages_wraparound[0] = pages;
0230 for (i = 0; i < 2 * (page_cnt - 1); i++)
0231 pages_wraparound[i + 1] =
0232 &pages[i % (page_cnt - 1) + 1];
0233
0234 ring_info->ring_buffer = (struct hv_ring_buffer *)
0235 vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
0236 PAGE_KERNEL);
0237
0238 kfree(pages_wraparound);
0239 if (!ring_info->ring_buffer)
0240 return -ENOMEM;
0241 }
0242
0243
0244 ring_info->ring_buffer->read_index =
0245 ring_info->ring_buffer->write_index = 0;
0246
0247
0248 ring_info->ring_buffer->feature_bits.value = 1;
0249
0250 ring_info->ring_size = page_cnt << PAGE_SHIFT;
0251 ring_info->ring_size_div10_reciprocal =
0252 reciprocal_value(ring_info->ring_size / 10);
0253 ring_info->ring_datasize = ring_info->ring_size -
0254 sizeof(struct hv_ring_buffer);
0255 ring_info->priv_read_index = 0;
0256
0257
0258 if (max_pkt_size) {
0259 ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL);
0260 if (!ring_info->pkt_buffer)
0261 return -ENOMEM;
0262 ring_info->pkt_buffer_size = max_pkt_size;
0263 }
0264
0265 spin_lock_init(&ring_info->ring_lock);
0266
0267 return 0;
0268 }
0269
0270
0271 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
0272 {
0273 mutex_lock(&ring_info->ring_buffer_mutex);
0274 vunmap(ring_info->ring_buffer);
0275 ring_info->ring_buffer = NULL;
0276 mutex_unlock(&ring_info->ring_buffer_mutex);
0277
0278 kfree(ring_info->pkt_buffer);
0279 ring_info->pkt_buffer = NULL;
0280 ring_info->pkt_buffer_size = 0;
0281 }
0282
0283
0284 int hv_ringbuffer_write(struct vmbus_channel *channel,
0285 const struct kvec *kv_list, u32 kv_count,
0286 u64 requestid, u64 *trans_id)
0287 {
0288 int i;
0289 u32 bytes_avail_towrite;
0290 u32 totalbytes_towrite = sizeof(u64);
0291 u32 next_write_location;
0292 u32 old_write;
0293 u64 prev_indices;
0294 unsigned long flags;
0295 struct hv_ring_buffer_info *outring_info = &channel->outbound;
0296 struct vmpacket_descriptor *desc = kv_list[0].iov_base;
0297 u64 __trans_id, rqst_id = VMBUS_NO_RQSTOR;
0298
0299 if (channel->rescind)
0300 return -ENODEV;
0301
0302 for (i = 0; i < kv_count; i++)
0303 totalbytes_towrite += kv_list[i].iov_len;
0304
0305 spin_lock_irqsave(&outring_info->ring_lock, flags);
0306
0307 bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
0308
0309
0310
0311
0312
0313
0314 if (bytes_avail_towrite <= totalbytes_towrite) {
0315 ++channel->out_full_total;
0316
0317 if (!channel->out_full_flag) {
0318 ++channel->out_full_first;
0319 channel->out_full_flag = true;
0320 }
0321
0322 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
0323 return -EAGAIN;
0324 }
0325
0326 channel->out_full_flag = false;
0327
0328
0329 next_write_location = hv_get_next_write_location(outring_info);
0330
0331 old_write = next_write_location;
0332
0333 for (i = 0; i < kv_count; i++) {
0334 next_write_location = hv_copyto_ringbuffer(outring_info,
0335 next_write_location,
0336 kv_list[i].iov_base,
0337 kv_list[i].iov_len);
0338 }
0339
0340
0341
0342
0343
0344
0345
0346 if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
0347 if (channel->next_request_id_callback != NULL) {
0348 rqst_id = channel->next_request_id_callback(channel, requestid);
0349 if (rqst_id == VMBUS_RQST_ERROR) {
0350 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
0351 return -EAGAIN;
0352 }
0353 }
0354 }
0355 desc = hv_get_ring_buffer(outring_info) + old_write;
0356 __trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id;
0357
0358
0359
0360
0361
0362 WRITE_ONCE(desc->trans_id, __trans_id);
0363 if (trans_id)
0364 *trans_id = __trans_id;
0365
0366
0367 prev_indices = hv_get_ring_bufferindices(outring_info);
0368
0369 next_write_location = hv_copyto_ringbuffer(outring_info,
0370 next_write_location,
0371 &prev_indices,
0372 sizeof(u64));
0373
0374
0375 virt_mb();
0376
0377
0378 hv_set_next_write_location(outring_info, next_write_location);
0379
0380
0381 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
0382
0383 hv_signal_on_write(old_write, channel);
0384
0385 if (channel->rescind) {
0386 if (rqst_id != VMBUS_NO_RQSTOR) {
0387
0388 if (channel->request_addr_callback != NULL)
0389 channel->request_addr_callback(channel, rqst_id);
0390 }
0391 return -ENODEV;
0392 }
0393
0394 return 0;
0395 }
0396
0397 int hv_ringbuffer_read(struct vmbus_channel *channel,
0398 void *buffer, u32 buflen, u32 *buffer_actual_len,
0399 u64 *requestid, bool raw)
0400 {
0401 struct vmpacket_descriptor *desc;
0402 u32 packetlen, offset;
0403
0404 if (unlikely(buflen == 0))
0405 return -EINVAL;
0406
0407 *buffer_actual_len = 0;
0408 *requestid = 0;
0409
0410
0411 desc = hv_pkt_iter_first(channel);
0412 if (desc == NULL) {
0413
0414
0415
0416
0417 return 0;
0418 }
0419
0420 offset = raw ? 0 : (desc->offset8 << 3);
0421 packetlen = (desc->len8 << 3) - offset;
0422 *buffer_actual_len = packetlen;
0423 *requestid = desc->trans_id;
0424
0425 if (unlikely(packetlen > buflen))
0426 return -ENOBUFS;
0427
0428
0429 memcpy(buffer, (const char *)desc + offset, packetlen);
0430
0431
0432 __hv_pkt_iter_next(channel, desc);
0433
0434
0435 hv_pkt_iter_close(channel);
0436
0437 return 0;
0438 }
0439
0440
0441
0442
0443
0444
0445
0446
0447 static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
0448 {
0449 u32 priv_read_loc = rbi->priv_read_index;
0450 u32 write_loc;
0451
0452
0453
0454
0455
0456
0457
0458
0459 write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
0460
0461 if (write_loc >= priv_read_loc)
0462 return write_loc - priv_read_loc;
0463 else
0464 return (rbi->ring_datasize - priv_read_loc) + write_loc;
0465 }
0466
0467
0468
0469
0470
0471
0472 struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
0473 {
0474 struct hv_ring_buffer_info *rbi = &channel->inbound;
0475 struct vmpacket_descriptor *desc, *desc_copy;
0476 u32 bytes_avail, pkt_len, pkt_offset;
0477
0478 hv_debug_delay_test(channel, MESSAGE_DELAY);
0479
0480 bytes_avail = hv_pkt_iter_avail(rbi);
0481 if (bytes_avail < sizeof(struct vmpacket_descriptor))
0482 return NULL;
0483 bytes_avail = min(rbi->pkt_buffer_size, bytes_avail);
0484
0485 desc = (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
0486
0487
0488
0489
0490
0491 pkt_len = READ_ONCE(desc->len8) << 3;
0492 pkt_offset = READ_ONCE(desc->offset8) << 3;
0493
0494
0495
0496
0497
0498 if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail)
0499 pkt_len = bytes_avail;
0500
0501
0502
0503
0504
0505 if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len)
0506 pkt_offset = sizeof(struct vmpacket_descriptor);
0507
0508
0509 desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer;
0510 memcpy(desc_copy, desc, pkt_len);
0511
0512
0513
0514
0515
0516
0517 desc_copy->len8 = pkt_len >> 3;
0518 desc_copy->offset8 = pkt_offset >> 3;
0519
0520 return desc_copy;
0521 }
0522 EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
0523
0524
0525
0526
0527
0528
0529
0530 struct vmpacket_descriptor *
0531 __hv_pkt_iter_next(struct vmbus_channel *channel,
0532 const struct vmpacket_descriptor *desc)
0533 {
0534 struct hv_ring_buffer_info *rbi = &channel->inbound;
0535 u32 packetlen = desc->len8 << 3;
0536 u32 dsize = rbi->ring_datasize;
0537
0538 hv_debug_delay_test(channel, MESSAGE_DELAY);
0539
0540 rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
0541 if (rbi->priv_read_index >= dsize)
0542 rbi->priv_read_index -= dsize;
0543
0544
0545 return hv_pkt_iter_first(channel);
0546 }
0547 EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
0548
0549
0550 static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
0551 u32 start_read_index)
0552 {
0553 if (rbi->priv_read_index >= start_read_index)
0554 return rbi->priv_read_index - start_read_index;
0555 else
0556 return rbi->ring_datasize - start_read_index +
0557 rbi->priv_read_index;
0558 }
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580 void hv_pkt_iter_close(struct vmbus_channel *channel)
0581 {
0582 struct hv_ring_buffer_info *rbi = &channel->inbound;
0583 u32 curr_write_sz, pending_sz, bytes_read, start_read_index;
0584
0585
0586
0587
0588
0589
0590 virt_rmb();
0591 start_read_index = rbi->ring_buffer->read_index;
0592 rbi->ring_buffer->read_index = rbi->priv_read_index;
0593
0594
0595
0596
0597
0598
0599 if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
0600 return;
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611 virt_mb();
0612
0613
0614
0615
0616
0617
0618 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
0619 if (!pending_sz)
0620 return;
0621
0622
0623
0624
0625
0626 virt_rmb();
0627 curr_write_sz = hv_get_bytes_to_write(rbi);
0628 bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650 if (curr_write_sz - bytes_read > pending_sz)
0651 return;
0652
0653
0654
0655
0656
0657 if (curr_write_sz <= pending_sz)
0658 return;
0659
0660 ++channel->intr_in_full;
0661 vmbus_setevent(channel);
0662 }
0663 EXPORT_SYMBOL_GPL(hv_pkt_iter_close);