Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * VMware vSockets Driver
0004  *
0005  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
0006  */
0007 
0008 #include <linux/types.h>
0009 #include <linux/socket.h>
0010 #include <linux/stddef.h>
0011 #include <net/sock.h>
0012 
0013 #include "vmci_transport_notify.h"
0014 
0015 #define PKT_FIELD(vsk, field_name) \
0016     (vmci_trans(vsk)->notify.pkt_q_state.field_name)
0017 
0018 static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
0019 {
0020     bool retval;
0021     u64 notify_limit;
0022 
0023     if (!PKT_FIELD(vsk, peer_waiting_write))
0024         return false;
0025 
0026     /* When the sender blocks, we take that as a sign that the sender is
0027      * faster than the receiver. To reduce the transmit rate of the sender,
0028      * we delay the sending of the read notification by decreasing the
0029      * write_notify_window. The notification is delayed until the number of
0030      * bytes used in the queue drops below the write_notify_window.
0031      */
0032 
0033     if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
0034         PKT_FIELD(vsk, peer_waiting_write_detected) = true;
0035         if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
0036             PKT_FIELD(vsk, write_notify_window) =
0037                 PKT_FIELD(vsk, write_notify_min_window);
0038         } else {
0039             PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
0040             if (PKT_FIELD(vsk, write_notify_window) <
0041                 PKT_FIELD(vsk, write_notify_min_window))
0042                 PKT_FIELD(vsk, write_notify_window) =
0043                     PKT_FIELD(vsk, write_notify_min_window);
0044 
0045         }
0046     }
0047     notify_limit = vmci_trans(vsk)->consume_size -
0048         PKT_FIELD(vsk, write_notify_window);
0049 
0050     /* The notify_limit is used to delay notifications in the case where
0051      * flow control is enabled. Below the test is expressed in terms of
0052      * free space in the queue: if free_space > ConsumeSize -
0053      * write_notify_window then notify An alternate way of expressing this
0054      * is to rewrite the expression to use the data ready in the receive
0055      * queue: if write_notify_window > bufferReady then notify as
0056      * free_space == ConsumeSize - bufferReady.
0057      */
0058 
0059     retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
0060         notify_limit;
0061 
0062     if (retval) {
0063         /* Once we notify the peer, we reset the detected flag so the
0064          * next wait will again cause a decrease in the window size.
0065          */
0066 
0067         PKT_FIELD(vsk, peer_waiting_write_detected) = false;
0068     }
0069     return retval;
0070 }
0071 
0072 static void
0073 vmci_transport_handle_read(struct sock *sk,
0074                struct vmci_transport_packet *pkt,
0075                bool bottom_half,
0076                struct sockaddr_vm *dst, struct sockaddr_vm *src)
0077 {
0078     sk->sk_write_space(sk);
0079 }
0080 
0081 static void
0082 vmci_transport_handle_wrote(struct sock *sk,
0083                 struct vmci_transport_packet *pkt,
0084                 bool bottom_half,
0085                 struct sockaddr_vm *dst, struct sockaddr_vm *src)
0086 {
0087     sk->sk_data_ready(sk);
0088 }
0089 
0090 static void vsock_block_update_write_window(struct sock *sk)
0091 {
0092     struct vsock_sock *vsk = vsock_sk(sk);
0093 
0094     if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
0095         PKT_FIELD(vsk, write_notify_window) =
0096             min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
0097             vmci_trans(vsk)->consume_size);
0098 }
0099 
0100 static int vmci_transport_send_read_notification(struct sock *sk)
0101 {
0102     struct vsock_sock *vsk;
0103     bool sent_read;
0104     unsigned int retries;
0105     int err;
0106 
0107     vsk = vsock_sk(sk);
0108     sent_read = false;
0109     retries = 0;
0110     err = 0;
0111 
0112     if (vmci_transport_notify_waiting_write(vsk)) {
0113         /* Notify the peer that we have read, retrying the send on
0114          * failure up to our maximum value.  XXX For now we just log
0115          * the failure, but later we should schedule a work item to
0116          * handle the resend until it succeeds.  That would require
0117          * keeping track of work items in the vsk and cleaning them up
0118          * upon socket close.
0119          */
0120         while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
0121                !sent_read &&
0122                retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
0123             err = vmci_transport_send_read(sk);
0124             if (err >= 0)
0125                 sent_read = true;
0126 
0127             retries++;
0128         }
0129 
0130         if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
0131             pr_err("%p unable to send read notification to peer\n",
0132                    sk);
0133         else
0134             PKT_FIELD(vsk, peer_waiting_write) = false;
0135 
0136     }
0137     return err;
0138 }
0139 
0140 static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
0141 {
0142     struct vsock_sock *vsk = vsock_sk(sk);
0143 
0144     PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
0145     PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
0146     PKT_FIELD(vsk, peer_waiting_write) = false;
0147     PKT_FIELD(vsk, peer_waiting_write_detected) = false;
0148 }
0149 
0150 static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
0151 {
0152     PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
0153     PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
0154     PKT_FIELD(vsk, peer_waiting_write) = false;
0155     PKT_FIELD(vsk, peer_waiting_write_detected) = false;
0156 }
0157 
0158 static int
0159 vmci_transport_notify_pkt_poll_in(struct sock *sk,
0160                   size_t target, bool *data_ready_now)
0161 {
0162     struct vsock_sock *vsk = vsock_sk(sk);
0163 
0164     if (vsock_stream_has_data(vsk)) {
0165         *data_ready_now = true;
0166     } else {
0167         /* We can't read right now because there is nothing in the
0168          * queue. Ask for notifications when there is something to
0169          * read.
0170          */
0171         if (sk->sk_state == TCP_ESTABLISHED)
0172             vsock_block_update_write_window(sk);
0173         *data_ready_now = false;
0174     }
0175 
0176     return 0;
0177 }
0178 
0179 static int
0180 vmci_transport_notify_pkt_poll_out(struct sock *sk,
0181                    size_t target, bool *space_avail_now)
0182 {
0183     s64 produce_q_free_space;
0184     struct vsock_sock *vsk = vsock_sk(sk);
0185 
0186     produce_q_free_space = vsock_stream_has_space(vsk);
0187     if (produce_q_free_space > 0) {
0188         *space_avail_now = true;
0189         return 0;
0190     } else if (produce_q_free_space == 0) {
0191         /* This is a connected socket but we can't currently send data.
0192          * Nothing else to do.
0193          */
0194         *space_avail_now = false;
0195     }
0196 
0197     return 0;
0198 }
0199 
0200 static int
0201 vmci_transport_notify_pkt_recv_init(
0202                 struct sock *sk,
0203                 size_t target,
0204                 struct vmci_transport_recv_notify_data *data)
0205 {
0206     struct vsock_sock *vsk = vsock_sk(sk);
0207 
0208     data->consume_head = 0;
0209     data->produce_tail = 0;
0210     data->notify_on_block = false;
0211 
0212     if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
0213         PKT_FIELD(vsk, write_notify_min_window) = target + 1;
0214         if (PKT_FIELD(vsk, write_notify_window) <
0215             PKT_FIELD(vsk, write_notify_min_window)) {
0216             /* If the current window is smaller than the new
0217              * minimal window size, we need to reevaluate whether
0218              * we need to notify the sender. If the number of ready
0219              * bytes are smaller than the new window, we need to
0220              * send a notification to the sender before we block.
0221              */
0222 
0223             PKT_FIELD(vsk, write_notify_window) =
0224                 PKT_FIELD(vsk, write_notify_min_window);
0225             data->notify_on_block = true;
0226         }
0227     }
0228 
0229     return 0;
0230 }
0231 
0232 static int
0233 vmci_transport_notify_pkt_recv_pre_block(
0234                 struct sock *sk,
0235                 size_t target,
0236                 struct vmci_transport_recv_notify_data *data)
0237 {
0238     int err = 0;
0239 
0240     vsock_block_update_write_window(sk);
0241 
0242     if (data->notify_on_block) {
0243         err = vmci_transport_send_read_notification(sk);
0244         if (err < 0)
0245             return err;
0246         data->notify_on_block = false;
0247     }
0248 
0249     return err;
0250 }
0251 
0252 static int
0253 vmci_transport_notify_pkt_recv_post_dequeue(
0254                 struct sock *sk,
0255                 size_t target,
0256                 ssize_t copied,
0257                 bool data_read,
0258                 struct vmci_transport_recv_notify_data *data)
0259 {
0260     struct vsock_sock *vsk;
0261     int err;
0262     bool was_full = false;
0263     u64 free_space;
0264 
0265     vsk = vsock_sk(sk);
0266     err = 0;
0267 
0268     if (data_read) {
0269         smp_mb();
0270 
0271         free_space =
0272             vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
0273         was_full = free_space == copied;
0274 
0275         if (was_full)
0276             PKT_FIELD(vsk, peer_waiting_write) = true;
0277 
0278         err = vmci_transport_send_read_notification(sk);
0279         if (err < 0)
0280             return err;
0281 
0282         /* See the comment in
0283          * vmci_transport_notify_pkt_send_post_enqueue().
0284          */
0285         sk->sk_data_ready(sk);
0286     }
0287 
0288     return err;
0289 }
0290 
0291 static int
0292 vmci_transport_notify_pkt_send_init(
0293                 struct sock *sk,
0294                 struct vmci_transport_send_notify_data *data)
0295 {
0296     data->consume_head = 0;
0297     data->produce_tail = 0;
0298 
0299     return 0;
0300 }
0301 
0302 static int
0303 vmci_transport_notify_pkt_send_post_enqueue(
0304                 struct sock *sk,
0305                 ssize_t written,
0306                 struct vmci_transport_send_notify_data *data)
0307 {
0308     int err = 0;
0309     struct vsock_sock *vsk;
0310     bool sent_wrote = false;
0311     bool was_empty;
0312     int retries = 0;
0313 
0314     vsk = vsock_sk(sk);
0315 
0316     smp_mb();
0317 
0318     was_empty =
0319         vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
0320     if (was_empty) {
0321         while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
0322                !sent_wrote &&
0323                retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
0324             err = vmci_transport_send_wrote(sk);
0325             if (err >= 0)
0326                 sent_wrote = true;
0327 
0328             retries++;
0329         }
0330     }
0331 
0332     if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
0333         pr_err("%p unable to send wrote notification to peer\n",
0334                sk);
0335         return err;
0336     }
0337 
0338     return err;
0339 }
0340 
0341 static void
0342 vmci_transport_notify_pkt_handle_pkt(
0343                 struct sock *sk,
0344                 struct vmci_transport_packet *pkt,
0345                 bool bottom_half,
0346                 struct sockaddr_vm *dst,
0347                 struct sockaddr_vm *src, bool *pkt_processed)
0348 {
0349     bool processed = false;
0350 
0351     switch (pkt->type) {
0352     case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
0353         vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
0354         processed = true;
0355         break;
0356     case VMCI_TRANSPORT_PACKET_TYPE_READ:
0357         vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
0358         processed = true;
0359         break;
0360     }
0361 
0362     if (pkt_processed)
0363         *pkt_processed = processed;
0364 }
0365 
0366 static void vmci_transport_notify_pkt_process_request(struct sock *sk)
0367 {
0368     struct vsock_sock *vsk = vsock_sk(sk);
0369 
0370     PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
0371     if (vmci_trans(vsk)->consume_size <
0372         PKT_FIELD(vsk, write_notify_min_window))
0373         PKT_FIELD(vsk, write_notify_min_window) =
0374             vmci_trans(vsk)->consume_size;
0375 }
0376 
0377 static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
0378 {
0379     struct vsock_sock *vsk = vsock_sk(sk);
0380 
0381     PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
0382     if (vmci_trans(vsk)->consume_size <
0383         PKT_FIELD(vsk, write_notify_min_window))
0384         PKT_FIELD(vsk, write_notify_min_window) =
0385             vmci_trans(vsk)->consume_size;
0386 }
0387 
0388 static int
0389 vmci_transport_notify_pkt_recv_pre_dequeue(
0390                 struct sock *sk,
0391                 size_t target,
0392                 struct vmci_transport_recv_notify_data *data)
0393 {
0394     return 0; /* NOP for QState. */
0395 }
0396 
0397 static int
0398 vmci_transport_notify_pkt_send_pre_block(
0399                 struct sock *sk,
0400                 struct vmci_transport_send_notify_data *data)
0401 {
0402     return 0; /* NOP for QState. */
0403 }
0404 
0405 static int
0406 vmci_transport_notify_pkt_send_pre_enqueue(
0407                 struct sock *sk,
0408                 struct vmci_transport_send_notify_data *data)
0409 {
0410     return 0; /* NOP for QState. */
0411 }
0412 
0413 /* Socket always on control packet based operations. */
0414 const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
0415     .socket_init = vmci_transport_notify_pkt_socket_init,
0416     .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
0417     .poll_in = vmci_transport_notify_pkt_poll_in,
0418     .poll_out = vmci_transport_notify_pkt_poll_out,
0419     .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
0420     .recv_init = vmci_transport_notify_pkt_recv_init,
0421     .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
0422     .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
0423     .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
0424     .send_init = vmci_transport_notify_pkt_send_init,
0425     .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
0426     .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
0427     .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
0428     .process_request = vmci_transport_notify_pkt_process_request,
0429     .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
0430 };