block/drbd/drbd_req.h

0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003    drbd_req.h
0004
0005    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
0006
0007    Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
0008    Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
0009    Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
0010
0011  */
0012
0013 #ifndef _DRBD_REQ_H
0014 #define _DRBD_REQ_H
0015
0016 #include <linux/module.h>
0017
0018 #include <linux/slab.h>
0019 #include <linux/drbd.h>
0020 #include "drbd_int.h"
0021
0022 /* The request callbacks will be called in irq context by the IDE drivers,
0023    and in Softirqs/Tasklets/BH context by the SCSI drivers,
0024    and by the receiver and worker in kernel-thread context.
0025    Try to get the locking right :) */
0026
0027 /*
0028  * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
0029  * associated with IO requests originating from the block layer above us.
0030  *
0031  * There are quite a few things that may happen to a drbd request
0032  * during its lifetime.
0033  *
0034  *  It will be created.
0035  *  It will be marked with the intention to be
0036  *    submitted to local disk and/or
0037  *    send via the network.
0038  *
0039  *  It has to be placed on the transfer log and other housekeeping lists,
0040  *  In case we have a network connection.
0041  *
0042  *  It may be identified as a concurrent (write) request
0043  *    and be handled accordingly.
0044  *
0045  *  It may me handed over to the local disk subsystem.
0046  *  It may be completed by the local disk subsystem,
0047  *    either successfully or with io-error.
0048  *  In case it is a READ request, and it failed locally,
0049  *    it may be retried remotely.
0050  *
0051  *  It may be queued for sending.
0052  *  It may be handed over to the network stack,
0053  *    which may fail.
0054  *  It may be acknowledged by the "peer" according to the wire_protocol in use.
0055  *    this may be a negative ack.
0056  *  It may receive a faked ack when the network connection is lost and the
0057  *  transfer log is cleaned up.
0058  *  Sending may be canceled due to network connection loss.
0059  *  When it finally has outlived its time,
0060  *    corresponding dirty bits in the resync-bitmap may be cleared or set,
0061  *    it will be destroyed,
0062  *    and completion will be signalled to the originator,
0063  *      with or without "success".
0064  */
0065
0066 enum drbd_req_event {
0067     CREATED,
0068     TO_BE_SENT,
0069     TO_BE_SUBMITTED,
0070
0071     /* XXX yes, now I am inconsistent...
0072      * these are not "events" but "actions"
0073      * oh, well... */
0074     QUEUE_FOR_NET_WRITE,
0075     QUEUE_FOR_NET_READ,
0076     QUEUE_FOR_SEND_OOS,
0077
0078     /* An empty flush is queued as P_BARRIER,
0079      * which will cause it to complete "successfully",
0080      * even if the local disk flush failed.
0081      *
0082      * Just like "real" requests, empty flushes (blkdev_issue_flush()) will
0083      * only see an error if neither local nor remote data is reachable. */
0084     QUEUE_AS_DRBD_BARRIER,
0085
0086     SEND_CANCELED,
0087     SEND_FAILED,
0088     HANDED_OVER_TO_NETWORK,
0089     OOS_HANDED_TO_NETWORK,
0090     CONNECTION_LOST_WHILE_PENDING,
0091     READ_RETRY_REMOTE_CANCELED,
0092     RECV_ACKED_BY_PEER,
0093     WRITE_ACKED_BY_PEER,
0094     WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
0095     CONFLICT_RESOLVED,
0096     POSTPONE_WRITE,
0097     NEG_ACKED,
0098     BARRIER_ACKED, /* in protocol A and B */
0099     DATA_RECEIVED, /* (remote read) */
0100
0101     COMPLETED_OK,
0102     READ_COMPLETED_WITH_ERROR,
0103     READ_AHEAD_COMPLETED_WITH_ERROR,
0104     WRITE_COMPLETED_WITH_ERROR,
0105     DISCARD_COMPLETED_NOTSUPP,
0106     DISCARD_COMPLETED_WITH_ERROR,
0107
0108     ABORT_DISK_IO,
0109     RESEND,
0110     FAIL_FROZEN_DISK_IO,
0111     RESTART_FROZEN_DISK_IO,
0112     NOTHING,
0113 };
0114
0115 /* encoding of request states for now.  we don't actually need that many bits.
0116  * we don't need to do atomic bit operations either, since most of the time we
0117  * need to look at the connection state and/or manipulate some lists at the
0118  * same time, so we should hold the request lock anyways.
0119  */
0120 enum drbd_req_state_bits {
0121     /* 3210
0122      * 0000: no local possible
0123      * 0001: to be submitted
0124      *    UNUSED, we could map: 011: submitted, completion still pending
0125      * 0110: completed ok
0126      * 0010: completed with error
0127      * 1001: Aborted (before completion)
0128      * 1x10: Aborted and completed -> free
0129      */
0130     __RQ_LOCAL_PENDING,
0131     __RQ_LOCAL_COMPLETED,
0132     __RQ_LOCAL_OK,
0133     __RQ_LOCAL_ABORTED,
0134
0135     /* 87654
0136      * 00000: no network possible
0137      * 00001: to be send
0138      * 00011: to be send, on worker queue
0139      * 00101: sent, expecting recv_ack (B) or write_ack (C)
0140      * 11101: sent,
0141      *        recv_ack (B) or implicit "ack" (A),
0142      *        still waiting for the barrier ack.
0143      *        master_bio may already be completed and invalidated.
0144      * 11100: write acked (C),
0145      *        data received (for remote read, any protocol)
0146      *        or finally the barrier ack has arrived (B,A)...
0147      *        request can be freed
0148      * 01100: neg-acked (write, protocol C)
0149      *        or neg-d-acked (read, any protocol)
0150      *        or killed from the transfer log
0151      *        during cleanup after connection loss
0152      *        request can be freed
0153      * 01000: canceled or send failed...
0154      *        request can be freed
0155      */
0156
0157     /* if "SENT" is not set, yet, this can still fail or be canceled.
0158      * if "SENT" is set already, we still wait for an Ack packet.
0159      * when cleared, the master_bio may be completed.
0160      * in (B,A) the request object may still linger on the transaction log
0161      * until the corresponding barrier ack comes in */
0162     __RQ_NET_PENDING,
0163
0164     /* If it is QUEUED, and it is a WRITE, it is also registered in the
0165      * transfer log. Currently we need this flag to avoid conflicts between
0166      * worker canceling the request and tl_clear_barrier killing it from
0167      * transfer log.  We should restructure the code so this conflict does
0168      * no longer occur. */
0169     __RQ_NET_QUEUED,
0170
0171     /* well, actually only "handed over to the network stack".
0172      *
0173      * TODO can potentially be dropped because of the similar meaning
0174      * of RQ_NET_SENT and ~RQ_NET_QUEUED.
0175      * however it is not exactly the same. before we drop it
0176      * we must ensure that we can tell a request with network part
0177      * from a request without, regardless of what happens to it. */
0178     __RQ_NET_SENT,
0179
0180     /* when set, the request may be freed (if RQ_NET_QUEUED is clear).
0181      * basically this means the corresponding P_BARRIER_ACK was received */
0182     __RQ_NET_DONE,
0183
0184     /* whether or not we know (C) or pretend (B,A) that the write
0185      * was successfully written on the peer.
0186      */
0187     __RQ_NET_OK,
0188
0189     /* peer called drbd_set_in_sync() for this write */
0190     __RQ_NET_SIS,
0191
0192     /* keep this last, its for the RQ_NET_MASK */
0193     __RQ_NET_MAX,
0194
0195     /* Set when this is a write, clear for a read */
0196     __RQ_WRITE,
0197     __RQ_WSAME,
0198     __RQ_UNMAP,
0199     __RQ_ZEROES,
0200
0201     /* Should call drbd_al_complete_io() for this request... */
0202     __RQ_IN_ACT_LOG,
0203
0204     /* This was the most recent request during some blk_finish_plug()
0205      * or its implicit from-schedule equivalent.
0206      * We may use it as hint to send a P_UNPLUG_REMOTE */
0207     __RQ_UNPLUG,
0208
0209     /* The peer has sent a retry ACK */
0210     __RQ_POSTPONED,
0211
0212     /* would have been completed,
0213      * but was not, because of drbd_suspended() */
0214     __RQ_COMPLETION_SUSP,
0215
0216     /* We expect a receive ACK (wire proto B) */
0217     __RQ_EXP_RECEIVE_ACK,
0218
0219     /* We expect a write ACK (wite proto C) */
0220     __RQ_EXP_WRITE_ACK,
0221
0222     /* waiting for a barrier ack, did an extra kref_get */
0223     __RQ_EXP_BARR_ACK,
0224 };
0225
0226 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
0227 #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
0228 #define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
0229 #define RQ_LOCAL_ABORTED   (1UL << __RQ_LOCAL_ABORTED)
0230
0231 #define RQ_LOCAL_MASK      ((RQ_LOCAL_ABORTED << 1)-1)
0232
0233 #define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
0234 #define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
0235 #define RQ_NET_SENT        (1UL << __RQ_NET_SENT)
0236 #define RQ_NET_DONE        (1UL << __RQ_NET_DONE)
0237 #define RQ_NET_OK          (1UL << __RQ_NET_OK)
0238 #define RQ_NET_SIS         (1UL << __RQ_NET_SIS)
0239
0240 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
0241
0242 #define RQ_WRITE           (1UL << __RQ_WRITE)
0243 #define RQ_WSAME           (1UL << __RQ_WSAME)
0244 #define RQ_UNMAP           (1UL << __RQ_UNMAP)
0245 #define RQ_ZEROES          (1UL << __RQ_ZEROES)
0246 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
0247 #define RQ_UNPLUG          (1UL << __RQ_UNPLUG)
0248 #define RQ_POSTPONED       (1UL << __RQ_POSTPONED)
0249 #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
0250 #define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
0251 #define RQ_EXP_WRITE_ACK   (1UL << __RQ_EXP_WRITE_ACK)
0252 #define RQ_EXP_BARR_ACK    (1UL << __RQ_EXP_BARR_ACK)
0253
0254 /* For waking up the frozen transfer log mod_req() has to return if the request
0255    should be counted in the epoch object*/
0256 #define MR_WRITE       1
0257 #define MR_READ        2
0258
0259 /* Short lived temporary struct on the stack.
0260  * We could squirrel the error to be returned into
0261  * bio->bi_iter.bi_size, or similar. But that would be too ugly. */
0262 struct bio_and_error {
0263     struct bio *bio;
0264     int error;
0265 };
0266
0267 extern void start_new_tl_epoch(struct drbd_connection *connection);
0268 extern void drbd_req_destroy(struct kref *kref);
0269 extern void _req_may_be_done(struct drbd_request *req,
0270         struct bio_and_error *m);
0271 extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
0272         struct bio_and_error *m);
0273 extern void complete_master_bio(struct drbd_device *device,
0274         struct bio_and_error *m);
0275 extern void request_timer_fn(struct timer_list *t);
0276 extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
0277 extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
0278 extern void tl_abort_disk_io(struct drbd_device *device);
0279
0280 /* this is in drbd_main.c */
0281 extern void drbd_restart_request(struct drbd_request *req);
0282
0283 /* use this if you don't want to deal with calling complete_master_bio()
0284  * outside the spinlock, e.g. when walking some list on cleanup. */
0285 static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
0286 {
0287     struct drbd_device *device = req->device;
0288     struct bio_and_error m;
0289     int rv;
0290
0291     /* __req_mod possibly frees req, do not touch req after that! */
0292     rv = __req_mod(req, what, &m);
0293     if (m.bio)
0294         complete_master_bio(device, &m);
0295
0296     return rv;
0297 }
0298
0299 /* completion of master bio is outside of our spinlock.
0300  * We still may or may not be inside some irqs disabled section
0301  * of the lower level driver completion callback, so we need to
0302  * spin_lock_irqsave here. */
0303 static inline int req_mod(struct drbd_request *req,
0304         enum drbd_req_event what)
0305 {
0306     unsigned long flags;
0307     struct drbd_device *device = req->device;
0308     struct bio_and_error m;
0309     int rv;
0310
0311     spin_lock_irqsave(&device->resource->req_lock, flags);
0312     rv = __req_mod(req, what, &m);
0313     spin_unlock_irqrestore(&device->resource->req_lock, flags);
0314
0315     if (m.bio)
0316         complete_master_bio(device, &m);
0317
0318     return rv;
0319 }
0320
0321 extern bool drbd_should_do_remote(union drbd_dev_state);
0322
0323 #endif