0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/types.h>
0009 #include <linux/socket.h>
0010 #include <linux/stddef.h>
0011 #include <net/sock.h>
0012
0013 #include "vmci_transport_notify.h"
0014
0015 #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
0016
0017 static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
0018 {
0019 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0020 bool retval;
0021 u64 notify_limit;
0022
0023 if (!PKT_FIELD(vsk, peer_waiting_write))
0024 return false;
0025
0026 #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
0027
0028
0029
0030
0031
0032
0033
0034 if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
0035 PKT_FIELD(vsk, peer_waiting_write_detected) = true;
0036 if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
0037 PKT_FIELD(vsk, write_notify_window) =
0038 PKT_FIELD(vsk, write_notify_min_window);
0039 } else {
0040 PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
0041 if (PKT_FIELD(vsk, write_notify_window) <
0042 PKT_FIELD(vsk, write_notify_min_window))
0043 PKT_FIELD(vsk, write_notify_window) =
0044 PKT_FIELD(vsk, write_notify_min_window);
0045
0046 }
0047 }
0048 notify_limit = vmci_trans(vsk)->consume_size -
0049 PKT_FIELD(vsk, write_notify_window);
0050 #else
0051 notify_limit = 0;
0052 #endif
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068 retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
0069 notify_limit;
0070 #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
0071 if (retval) {
0072
0073
0074
0075
0076
0077 PKT_FIELD(vsk, peer_waiting_write_detected) = false;
0078 }
0079 #endif
0080 return retval;
0081 #else
0082 return true;
0083 #endif
0084 }
0085
0086 static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
0087 {
0088 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0089 if (!PKT_FIELD(vsk, peer_waiting_read))
0090 return false;
0091
0092
0093
0094
0095
0096
0097
0098 return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
0099 #else
0100 return true;
0101 #endif
0102 }
0103
0104 static void
0105 vmci_transport_handle_waiting_read(struct sock *sk,
0106 struct vmci_transport_packet *pkt,
0107 bool bottom_half,
0108 struct sockaddr_vm *dst,
0109 struct sockaddr_vm *src)
0110 {
0111 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0112 struct vsock_sock *vsk;
0113
0114 vsk = vsock_sk(sk);
0115
0116 PKT_FIELD(vsk, peer_waiting_read) = true;
0117 memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
0118 sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
0119
0120 if (vmci_transport_notify_waiting_read(vsk)) {
0121 bool sent;
0122
0123 if (bottom_half)
0124 sent = vmci_transport_send_wrote_bh(dst, src) > 0;
0125 else
0126 sent = vmci_transport_send_wrote(sk) > 0;
0127
0128 if (sent)
0129 PKT_FIELD(vsk, peer_waiting_read) = false;
0130 }
0131 #endif
0132 }
0133
0134 static void
0135 vmci_transport_handle_waiting_write(struct sock *sk,
0136 struct vmci_transport_packet *pkt,
0137 bool bottom_half,
0138 struct sockaddr_vm *dst,
0139 struct sockaddr_vm *src)
0140 {
0141 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0142 struct vsock_sock *vsk;
0143
0144 vsk = vsock_sk(sk);
0145
0146 PKT_FIELD(vsk, peer_waiting_write) = true;
0147 memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
0148 sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
0149
0150 if (vmci_transport_notify_waiting_write(vsk)) {
0151 bool sent;
0152
0153 if (bottom_half)
0154 sent = vmci_transport_send_read_bh(dst, src) > 0;
0155 else
0156 sent = vmci_transport_send_read(sk) > 0;
0157
0158 if (sent)
0159 PKT_FIELD(vsk, peer_waiting_write) = false;
0160 }
0161 #endif
0162 }
0163
0164 static void
0165 vmci_transport_handle_read(struct sock *sk,
0166 struct vmci_transport_packet *pkt,
0167 bool bottom_half,
0168 struct sockaddr_vm *dst, struct sockaddr_vm *src)
0169 {
0170 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0171 struct vsock_sock *vsk;
0172
0173 vsk = vsock_sk(sk);
0174 PKT_FIELD(vsk, sent_waiting_write) = false;
0175 #endif
0176
0177 sk->sk_write_space(sk);
0178 }
0179
0180 static bool send_waiting_read(struct sock *sk, u64 room_needed)
0181 {
0182 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0183 struct vsock_sock *vsk;
0184 struct vmci_transport_waiting_info waiting_info;
0185 u64 tail;
0186 u64 head;
0187 u64 room_left;
0188 bool ret;
0189
0190 vsk = vsock_sk(sk);
0191
0192 if (PKT_FIELD(vsk, sent_waiting_read))
0193 return true;
0194
0195 if (PKT_FIELD(vsk, write_notify_window) <
0196 vmci_trans(vsk)->consume_size)
0197 PKT_FIELD(vsk, write_notify_window) =
0198 min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
0199 vmci_trans(vsk)->consume_size);
0200
0201 vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
0202 room_left = vmci_trans(vsk)->consume_size - head;
0203 if (room_needed >= room_left) {
0204 waiting_info.offset = room_needed - room_left;
0205 waiting_info.generation =
0206 PKT_FIELD(vsk, consume_q_generation) + 1;
0207 } else {
0208 waiting_info.offset = head + room_needed;
0209 waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
0210 }
0211
0212 ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
0213 if (ret)
0214 PKT_FIELD(vsk, sent_waiting_read) = true;
0215
0216 return ret;
0217 #else
0218 return true;
0219 #endif
0220 }
0221
0222 static bool send_waiting_write(struct sock *sk, u64 room_needed)
0223 {
0224 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0225 struct vsock_sock *vsk;
0226 struct vmci_transport_waiting_info waiting_info;
0227 u64 tail;
0228 u64 head;
0229 u64 room_left;
0230 bool ret;
0231
0232 vsk = vsock_sk(sk);
0233
0234 if (PKT_FIELD(vsk, sent_waiting_write))
0235 return true;
0236
0237 vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
0238 room_left = vmci_trans(vsk)->produce_size - tail;
0239 if (room_needed + 1 >= room_left) {
0240
0241 waiting_info.offset = room_needed + 1 - room_left;
0242 waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
0243 } else {
0244 waiting_info.offset = tail + room_needed + 1;
0245 waiting_info.generation =
0246 PKT_FIELD(vsk, produce_q_generation) - 1;
0247 }
0248
0249 ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
0250 if (ret)
0251 PKT_FIELD(vsk, sent_waiting_write) = true;
0252
0253 return ret;
0254 #else
0255 return true;
0256 #endif
0257 }
0258
0259 static int vmci_transport_send_read_notification(struct sock *sk)
0260 {
0261 struct vsock_sock *vsk;
0262 bool sent_read;
0263 unsigned int retries;
0264 int err;
0265
0266 vsk = vsock_sk(sk);
0267 sent_read = false;
0268 retries = 0;
0269 err = 0;
0270
0271 if (vmci_transport_notify_waiting_write(vsk)) {
0272
0273
0274
0275
0276
0277
0278
0279 while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
0280 !sent_read &&
0281 retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
0282 err = vmci_transport_send_read(sk);
0283 if (err >= 0)
0284 sent_read = true;
0285
0286 retries++;
0287 }
0288
0289 if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
0290 pr_err("%p unable to send read notify to peer\n", sk);
0291 else
0292 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0293 PKT_FIELD(vsk, peer_waiting_write) = false;
0294 #endif
0295
0296 }
0297 return err;
0298 }
0299
0300 static void
0301 vmci_transport_handle_wrote(struct sock *sk,
0302 struct vmci_transport_packet *pkt,
0303 bool bottom_half,
0304 struct sockaddr_vm *dst, struct sockaddr_vm *src)
0305 {
0306 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0307 struct vsock_sock *vsk = vsock_sk(sk);
0308 PKT_FIELD(vsk, sent_waiting_read) = false;
0309 #endif
0310 sk->sk_data_ready(sk);
0311 }
0312
0313 static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
0314 {
0315 struct vsock_sock *vsk = vsock_sk(sk);
0316
0317 PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
0318 PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
0319 PKT_FIELD(vsk, peer_waiting_read) = false;
0320 PKT_FIELD(vsk, peer_waiting_write) = false;
0321 PKT_FIELD(vsk, peer_waiting_write_detected) = false;
0322 PKT_FIELD(vsk, sent_waiting_read) = false;
0323 PKT_FIELD(vsk, sent_waiting_write) = false;
0324 PKT_FIELD(vsk, produce_q_generation) = 0;
0325 PKT_FIELD(vsk, consume_q_generation) = 0;
0326
0327 memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
0328 sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
0329 memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
0330 sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
0331 }
0332
0333 static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
0334 {
0335 }
0336
0337 static int
0338 vmci_transport_notify_pkt_poll_in(struct sock *sk,
0339 size_t target, bool *data_ready_now)
0340 {
0341 struct vsock_sock *vsk = vsock_sk(sk);
0342
0343 if (vsock_stream_has_data(vsk)) {
0344 *data_ready_now = true;
0345 } else {
0346
0347
0348
0349
0350 if (sk->sk_state == TCP_ESTABLISHED) {
0351 if (!send_waiting_read(sk, 1))
0352 return -1;
0353
0354 }
0355 *data_ready_now = false;
0356 }
0357
0358 return 0;
0359 }
0360
0361 static int
0362 vmci_transport_notify_pkt_poll_out(struct sock *sk,
0363 size_t target, bool *space_avail_now)
0364 {
0365 s64 produce_q_free_space;
0366 struct vsock_sock *vsk = vsock_sk(sk);
0367
0368 produce_q_free_space = vsock_stream_has_space(vsk);
0369 if (produce_q_free_space > 0) {
0370 *space_avail_now = true;
0371 return 0;
0372 } else if (produce_q_free_space == 0) {
0373
0374
0375
0376
0377
0378
0379
0380
0381 if (!send_waiting_write(sk, 1))
0382 return -1;
0383
0384 *space_avail_now = false;
0385 }
0386
0387 return 0;
0388 }
0389
0390 static int
0391 vmci_transport_notify_pkt_recv_init(
0392 struct sock *sk,
0393 size_t target,
0394 struct vmci_transport_recv_notify_data *data)
0395 {
0396 struct vsock_sock *vsk = vsock_sk(sk);
0397
0398 #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
0399 data->consume_head = 0;
0400 data->produce_tail = 0;
0401 #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
0402 data->notify_on_block = false;
0403
0404 if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
0405 PKT_FIELD(vsk, write_notify_min_window) = target + 1;
0406 if (PKT_FIELD(vsk, write_notify_window) <
0407 PKT_FIELD(vsk, write_notify_min_window)) {
0408
0409
0410
0411
0412
0413
0414
0415 PKT_FIELD(vsk, write_notify_window) =
0416 PKT_FIELD(vsk, write_notify_min_window);
0417 data->notify_on_block = true;
0418 }
0419 }
0420 #endif
0421 #endif
0422
0423 return 0;
0424 }
0425
0426 static int
0427 vmci_transport_notify_pkt_recv_pre_block(
0428 struct sock *sk,
0429 size_t target,
0430 struct vmci_transport_recv_notify_data *data)
0431 {
0432 int err = 0;
0433
0434
0435 if (!send_waiting_read(sk, target)) {
0436 err = -EHOSTUNREACH;
0437 return err;
0438 }
0439 #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
0440 if (data->notify_on_block) {
0441 err = vmci_transport_send_read_notification(sk);
0442 if (err < 0)
0443 return err;
0444
0445 data->notify_on_block = false;
0446 }
0447 #endif
0448
0449 return err;
0450 }
0451
0452 static int
0453 vmci_transport_notify_pkt_recv_pre_dequeue(
0454 struct sock *sk,
0455 size_t target,
0456 struct vmci_transport_recv_notify_data *data)
0457 {
0458 struct vsock_sock *vsk = vsock_sk(sk);
0459
0460
0461
0462
0463 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0464 vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
0465 &data->produce_tail,
0466 &data->consume_head);
0467 #endif
0468
0469 return 0;
0470 }
0471
0472 static int
0473 vmci_transport_notify_pkt_recv_post_dequeue(
0474 struct sock *sk,
0475 size_t target,
0476 ssize_t copied,
0477 bool data_read,
0478 struct vmci_transport_recv_notify_data *data)
0479 {
0480 struct vsock_sock *vsk;
0481 int err;
0482
0483 vsk = vsock_sk(sk);
0484 err = 0;
0485
0486 if (data_read) {
0487 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0488
0489
0490
0491
0492 if (copied >=
0493 vmci_trans(vsk)->consume_size - data->consume_head)
0494 PKT_FIELD(vsk, consume_q_generation)++;
0495 #endif
0496
0497 err = vmci_transport_send_read_notification(sk);
0498 if (err < 0)
0499 return err;
0500
0501 }
0502 return err;
0503 }
0504
0505 static int
0506 vmci_transport_notify_pkt_send_init(
0507 struct sock *sk,
0508 struct vmci_transport_send_notify_data *data)
0509 {
0510 #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
0511 data->consume_head = 0;
0512 data->produce_tail = 0;
0513 #endif
0514
0515 return 0;
0516 }
0517
0518 static int
0519 vmci_transport_notify_pkt_send_pre_block(
0520 struct sock *sk,
0521 struct vmci_transport_send_notify_data *data)
0522 {
0523
0524 if (!send_waiting_write(sk, 1))
0525 return -EHOSTUNREACH;
0526
0527 return 0;
0528 }
0529
0530 static int
0531 vmci_transport_notify_pkt_send_pre_enqueue(
0532 struct sock *sk,
0533 struct vmci_transport_send_notify_data *data)
0534 {
0535 struct vsock_sock *vsk = vsock_sk(sk);
0536
0537 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0538 vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
0539 &data->produce_tail,
0540 &data->consume_head);
0541 #endif
0542
0543 return 0;
0544 }
0545
0546 static int
0547 vmci_transport_notify_pkt_send_post_enqueue(
0548 struct sock *sk,
0549 ssize_t written,
0550 struct vmci_transport_send_notify_data *data)
0551 {
0552 int err = 0;
0553 struct vsock_sock *vsk;
0554 bool sent_wrote = false;
0555 int retries = 0;
0556
0557 vsk = vsock_sk(sk);
0558
0559 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0560
0561
0562
0563
0564 if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
0565 PKT_FIELD(vsk, produce_q_generation)++;
0566
0567 #endif
0568
0569 if (vmci_transport_notify_waiting_read(vsk)) {
0570
0571
0572
0573
0574
0575 while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
0576 !sent_wrote &&
0577 retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
0578 err = vmci_transport_send_wrote(sk);
0579 if (err >= 0)
0580 sent_wrote = true;
0581
0582 retries++;
0583 }
0584
0585 if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
0586 pr_err("%p unable to send wrote notify to peer\n", sk);
0587 return err;
0588 } else {
0589 #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
0590 PKT_FIELD(vsk, peer_waiting_read) = false;
0591 #endif
0592 }
0593 }
0594 return err;
0595 }
0596
0597 static void
0598 vmci_transport_notify_pkt_handle_pkt(
0599 struct sock *sk,
0600 struct vmci_transport_packet *pkt,
0601 bool bottom_half,
0602 struct sockaddr_vm *dst,
0603 struct sockaddr_vm *src, bool *pkt_processed)
0604 {
0605 bool processed = false;
0606
0607 switch (pkt->type) {
0608 case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
0609 vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
0610 processed = true;
0611 break;
0612 case VMCI_TRANSPORT_PACKET_TYPE_READ:
0613 vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
0614 processed = true;
0615 break;
0616 case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
0617 vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
0618 dst, src);
0619 processed = true;
0620 break;
0621
0622 case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
0623 vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
0624 dst, src);
0625 processed = true;
0626 break;
0627 }
0628
0629 if (pkt_processed)
0630 *pkt_processed = processed;
0631 }
0632
0633 static void vmci_transport_notify_pkt_process_request(struct sock *sk)
0634 {
0635 struct vsock_sock *vsk = vsock_sk(sk);
0636
0637 PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
0638 if (vmci_trans(vsk)->consume_size <
0639 PKT_FIELD(vsk, write_notify_min_window))
0640 PKT_FIELD(vsk, write_notify_min_window) =
0641 vmci_trans(vsk)->consume_size;
0642 }
0643
0644 static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
0645 {
0646 struct vsock_sock *vsk = vsock_sk(sk);
0647
0648 PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
0649 if (vmci_trans(vsk)->consume_size <
0650 PKT_FIELD(vsk, write_notify_min_window))
0651 PKT_FIELD(vsk, write_notify_min_window) =
0652 vmci_trans(vsk)->consume_size;
0653 }
0654
0655
0656 const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
0657 .socket_init = vmci_transport_notify_pkt_socket_init,
0658 .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
0659 .poll_in = vmci_transport_notify_pkt_poll_in,
0660 .poll_out = vmci_transport_notify_pkt_poll_out,
0661 .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
0662 .recv_init = vmci_transport_notify_pkt_recv_init,
0663 .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
0664 .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
0665 .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
0666 .send_init = vmci_transport_notify_pkt_send_init,
0667 .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
0668 .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
0669 .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
0670 .process_request = vmci_transport_notify_pkt_process_request,
0671 .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
0672 };