0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <linux/module.h>
0015
0016 #include <linux/slab.h>
0017 #include <linux/drbd.h>
0018 #include "drbd_int.h"
0019 #include "drbd_req.h"
0020
0021
0022 static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
0023
0024 static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src)
0025 {
0026 struct drbd_request *req;
0027
0028 req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
0029 if (!req)
0030 return NULL;
0031 memset(req, 0, sizeof(*req));
0032
0033 req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
0034 GFP_NOIO, &drbd_io_bio_set);
0035 req->private_bio->bi_private = req;
0036 req->private_bio->bi_end_io = drbd_request_endio;
0037
0038 req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
0039 | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
0040 | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
0041 req->device = device;
0042 req->master_bio = bio_src;
0043 req->epoch = 0;
0044
0045 drbd_clear_interval(&req->i);
0046 req->i.sector = bio_src->bi_iter.bi_sector;
0047 req->i.size = bio_src->bi_iter.bi_size;
0048 req->i.local = true;
0049 req->i.waiting = false;
0050
0051 INIT_LIST_HEAD(&req->tl_requests);
0052 INIT_LIST_HEAD(&req->w.list);
0053 INIT_LIST_HEAD(&req->req_pending_master_completion);
0054 INIT_LIST_HEAD(&req->req_pending_local);
0055
0056
0057 atomic_set(&req->completion_ref, 1);
0058
0059 kref_init(&req->kref);
0060 return req;
0061 }
0062
0063 static void drbd_remove_request_interval(struct rb_root *root,
0064 struct drbd_request *req)
0065 {
0066 struct drbd_device *device = req->device;
0067 struct drbd_interval *i = &req->i;
0068
0069 drbd_remove_interval(root, i);
0070
0071
0072 if (i->waiting)
0073 wake_up(&device->misc_wait);
0074 }
0075
0076 void drbd_req_destroy(struct kref *kref)
0077 {
0078 struct drbd_request *req = container_of(kref, struct drbd_request, kref);
0079 struct drbd_device *device = req->device;
0080 const unsigned s = req->rq_state;
0081
0082 if ((req->master_bio && !(s & RQ_POSTPONED)) ||
0083 atomic_read(&req->completion_ref) ||
0084 (s & RQ_LOCAL_PENDING) ||
0085 ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
0086 drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
0087 s, atomic_read(&req->completion_ref));
0088 return;
0089 }
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099 list_del_init(&req->tl_requests);
0100
0101
0102
0103 if (!drbd_interval_empty(&req->i)) {
0104 struct rb_root *root;
0105
0106 if (s & RQ_WRITE)
0107 root = &device->write_requests;
0108 else
0109 root = &device->read_requests;
0110 drbd_remove_request_interval(root, req);
0111 } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
0112 drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
0113 s, (unsigned long long)req->i.sector, req->i.size);
0114
0115
0116
0117
0118 if (s & RQ_WRITE) {
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130 if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
0131 if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
0132 drbd_set_out_of_sync(device, req->i.sector, req->i.size);
0133
0134 if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
0135 drbd_set_in_sync(device, req->i.sector, req->i.size);
0136 }
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148 if (s & RQ_IN_ACT_LOG) {
0149 if (get_ldev_if_state(device, D_FAILED)) {
0150 drbd_al_complete_io(device, &req->i);
0151 put_ldev(device);
0152 } else if (__ratelimit(&drbd_ratelimit_state)) {
0153 drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
0154 "but my Disk seems to have failed :(\n",
0155 (unsigned long long) req->i.sector, req->i.size);
0156 }
0157 }
0158 }
0159
0160 mempool_free(req, &drbd_request_mempool);
0161 }
0162
0163 static void wake_all_senders(struct drbd_connection *connection)
0164 {
0165 wake_up(&connection->sender_work.q_wait);
0166 }
0167
0168
0169 void start_new_tl_epoch(struct drbd_connection *connection)
0170 {
0171
0172 if (connection->current_tle_writes == 0)
0173 return;
0174
0175 connection->current_tle_writes = 0;
0176 atomic_inc(&connection->current_tle_nr);
0177 wake_all_senders(connection);
0178 }
0179
0180 void complete_master_bio(struct drbd_device *device,
0181 struct bio_and_error *m)
0182 {
0183 if (unlikely(m->error))
0184 m->bio->bi_status = errno_to_blk_status(m->error);
0185 bio_endio(m->bio);
0186 dec_ap_bio(device);
0187 }
0188
0189
0190
0191
0192
0193
0194
0195
0196 static
0197 void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
0198 {
0199 const unsigned s = req->rq_state;
0200 struct drbd_device *device = req->device;
0201 int error, ok;
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212 if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) ||
0213 (s & RQ_NET_QUEUED) || (s & RQ_NET_PENDING) ||
0214 (s & RQ_COMPLETION_SUSP)) {
0215 drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
0216 return;
0217 }
0218
0219 if (!req->master_bio) {
0220 drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
0221 return;
0222 }
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
0238 error = PTR_ERR(req->private_bio);
0239
0240
0241
0242
0243
0244
0245
0246
0247 if (op_is_write(bio_op(req->master_bio)) &&
0248 req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
0249 start_new_tl_epoch(first_peer_device(device)->connection);
0250
0251
0252 bio_end_io_acct(req->master_bio, req->start_jif);
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268 if (!ok &&
0269 bio_op(req->master_bio) == REQ_OP_READ &&
0270 !(req->master_bio->bi_opf & REQ_RAHEAD) &&
0271 !list_empty(&req->tl_requests))
0272 req->rq_state |= RQ_POSTPONED;
0273
0274 if (!(req->rq_state & RQ_POSTPONED)) {
0275 m->error = ok ? 0 : (error ?: -EIO);
0276 m->bio = req->master_bio;
0277 req->master_bio = NULL;
0278
0279
0280
0281
0282 req->i.completed = true;
0283 }
0284
0285 if (req->i.waiting)
0286 wake_up(&device->misc_wait);
0287
0288
0289
0290
0291
0292 list_del_init(&req->req_pending_master_completion);
0293 }
0294
0295
0296 static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
0297 {
0298 struct drbd_device *device = req->device;
0299 D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED));
0300
0301 if (!put)
0302 return;
0303
0304 if (!atomic_sub_and_test(put, &req->completion_ref))
0305 return;
0306
0307 drbd_req_complete(req, m);
0308
0309
0310
0311 if (req->rq_state & RQ_LOCAL_ABORTED)
0312 return;
0313
0314 if (req->rq_state & RQ_POSTPONED) {
0315
0316
0317 drbd_restart_request(req);
0318 return;
0319 }
0320
0321 kref_put(&req->kref, drbd_req_destroy);
0322 }
0323
0324 static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
0325 {
0326 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0327 if (!connection)
0328 return;
0329 if (connection->req_next == NULL)
0330 connection->req_next = req;
0331 }
0332
0333 static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
0334 {
0335 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0336 struct drbd_request *iter = req;
0337 if (!connection)
0338 return;
0339 if (connection->req_next != req)
0340 return;
0341
0342 req = NULL;
0343 list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
0344 const unsigned int s = iter->rq_state;
0345
0346 if (s & RQ_NET_QUEUED) {
0347 req = iter;
0348 break;
0349 }
0350 }
0351 connection->req_next = req;
0352 }
0353
0354 static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
0355 {
0356 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0357 if (!connection)
0358 return;
0359 if (connection->req_ack_pending == NULL)
0360 connection->req_ack_pending = req;
0361 }
0362
0363 static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
0364 {
0365 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0366 struct drbd_request *iter = req;
0367 if (!connection)
0368 return;
0369 if (connection->req_ack_pending != req)
0370 return;
0371
0372 req = NULL;
0373 list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
0374 const unsigned int s = iter->rq_state;
0375
0376 if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) {
0377 req = iter;
0378 break;
0379 }
0380 }
0381 connection->req_ack_pending = req;
0382 }
0383
0384 static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
0385 {
0386 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0387 if (!connection)
0388 return;
0389 if (connection->req_not_net_done == NULL)
0390 connection->req_not_net_done = req;
0391 }
0392
0393 static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
0394 {
0395 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
0396 struct drbd_request *iter = req;
0397 if (!connection)
0398 return;
0399 if (connection->req_not_net_done != req)
0400 return;
0401
0402 req = NULL;
0403 list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) {
0404 const unsigned int s = iter->rq_state;
0405
0406 if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) {
0407 req = iter;
0408 break;
0409 }
0410 }
0411 connection->req_not_net_done = req;
0412 }
0413
0414
0415
0416 static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
0417 int clear, int set)
0418 {
0419 struct drbd_device *device = req->device;
0420 struct drbd_peer_device *peer_device = first_peer_device(device);
0421 unsigned s = req->rq_state;
0422 int c_put = 0;
0423
0424 if (drbd_suspended(device) && !((s | clear) & RQ_COMPLETION_SUSP))
0425 set |= RQ_COMPLETION_SUSP;
0426
0427
0428
0429 req->rq_state &= ~clear;
0430 req->rq_state |= set;
0431
0432
0433 if (req->rq_state == s)
0434 return;
0435
0436
0437
0438 kref_get(&req->kref);
0439
0440 if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
0441 atomic_inc(&req->completion_ref);
0442
0443 if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
0444 inc_ap_pending(device);
0445 atomic_inc(&req->completion_ref);
0446 }
0447
0448 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
0449 atomic_inc(&req->completion_ref);
0450 set_if_null_req_next(peer_device, req);
0451 }
0452
0453 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
0454 kref_get(&req->kref);
0455
0456 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
0457
0458 if (!(s & RQ_NET_DONE)) {
0459 atomic_add(req->i.size >> 9, &device->ap_in_flight);
0460 set_if_null_req_not_net_done(peer_device, req);
0461 }
0462 if (req->rq_state & RQ_NET_PENDING)
0463 set_if_null_req_ack_pending(peer_device, req);
0464 }
0465
0466 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
0467 atomic_inc(&req->completion_ref);
0468
0469
0470
0471 if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
0472 ++c_put;
0473
0474 if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
0475 D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
0476 ++c_put;
0477 }
0478
0479 if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
0480 if (req->rq_state & RQ_LOCAL_ABORTED)
0481 kref_put(&req->kref, drbd_req_destroy);
0482 else
0483 ++c_put;
0484 list_del_init(&req->req_pending_local);
0485 }
0486
0487 if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
0488 dec_ap_pending(device);
0489 ++c_put;
0490 req->acked_jif = jiffies;
0491 advance_conn_req_ack_pending(peer_device, req);
0492 }
0493
0494 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
0495 ++c_put;
0496 advance_conn_req_next(peer_device, req);
0497 }
0498
0499 if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
0500 if (s & RQ_NET_SENT)
0501 atomic_sub(req->i.size >> 9, &device->ap_in_flight);
0502 if (s & RQ_EXP_BARR_ACK)
0503 kref_put(&req->kref, drbd_req_destroy);
0504 req->net_done_jif = jiffies;
0505
0506
0507
0508
0509 advance_conn_req_next(peer_device, req);
0510 advance_conn_req_ack_pending(peer_device, req);
0511 advance_conn_req_not_net_done(peer_device, req);
0512 }
0513
0514
0515
0516
0517 if (req->i.waiting)
0518 wake_up(&device->misc_wait);
0519
0520 drbd_req_put_completion_ref(req, m, c_put);
0521 kref_put(&req->kref, drbd_req_destroy);
0522 }
0523
0524 static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req)
0525 {
0526 if (!__ratelimit(&drbd_ratelimit_state))
0527 return;
0528
0529 drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n",
0530 (req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
0531 (unsigned long long)req->i.sector,
0532 req->i.size >> 9,
0533 device->ldev->backing_bdev);
0534 }
0535
0536
0537
0538
0539
0540
0541
0542 static inline bool is_pending_write_protocol_A(struct drbd_request *req)
0543 {
0544 return (req->rq_state &
0545 (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
0546 == (RQ_WRITE|RQ_NET_PENDING);
0547 }
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561 int __req_mod(struct drbd_request *req, enum drbd_req_event what,
0562 struct bio_and_error *m)
0563 {
0564 struct drbd_device *const device = req->device;
0565 struct drbd_peer_device *const peer_device = first_peer_device(device);
0566 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
0567 struct net_conf *nc;
0568 int p, rv = 0;
0569
0570 if (m)
0571 m->bio = NULL;
0572
0573 switch (what) {
0574 default:
0575 drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
0576 break;
0577
0578
0579
0580
0581
0582
0583
0584 case TO_BE_SENT:
0585
0586
0587 D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
0588 rcu_read_lock();
0589 nc = rcu_dereference(connection->net_conf);
0590 p = nc->wire_protocol;
0591 rcu_read_unlock();
0592 req->rq_state |=
0593 p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
0594 p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
0595 mod_rq_state(req, m, 0, RQ_NET_PENDING);
0596 break;
0597
0598 case TO_BE_SUBMITTED:
0599
0600 D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK));
0601 mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
0602 break;
0603
0604 case COMPLETED_OK:
0605 if (req->rq_state & RQ_WRITE)
0606 device->writ_cnt += req->i.size >> 9;
0607 else
0608 device->read_cnt += req->i.size >> 9;
0609
0610 mod_rq_state(req, m, RQ_LOCAL_PENDING,
0611 RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
0612 break;
0613
0614 case ABORT_DISK_IO:
0615 mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
0616 break;
0617
0618 case WRITE_COMPLETED_WITH_ERROR:
0619 drbd_report_io_error(device, req);
0620 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
0621 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
0622 break;
0623
0624 case READ_COMPLETED_WITH_ERROR:
0625 drbd_set_out_of_sync(device, req->i.sector, req->i.size);
0626 drbd_report_io_error(device, req);
0627 __drbd_chk_io_error(device, DRBD_READ_ERROR);
0628 fallthrough;
0629 case READ_AHEAD_COMPLETED_WITH_ERROR:
0630
0631 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
0632 break;
0633
0634 case DISCARD_COMPLETED_NOTSUPP:
0635 case DISCARD_COMPLETED_WITH_ERROR:
0636
0637
0638 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
0639 break;
0640
0641 case QUEUE_FOR_NET_READ:
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652 D_ASSERT(device, drbd_interval_empty(&req->i));
0653 drbd_insert_interval(&device->read_requests, &req->i);
0654
0655 set_bit(UNPLUG_REMOTE, &device->flags);
0656
0657 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
0658 D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
0659 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
0660 req->w.cb = w_send_read_req;
0661 drbd_queue_work(&connection->sender_work,
0662 &req->w);
0663 break;
0664
0665 case QUEUE_FOR_NET_WRITE:
0666
0667
0668
0669
0670
0671 D_ASSERT(device, drbd_interval_empty(&req->i));
0672 drbd_insert_interval(&device->write_requests, &req->i);
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691 set_bit(UNPLUG_REMOTE, &device->flags);
0692
0693
0694 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
0695 mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
0696 req->w.cb = w_send_dblock;
0697 drbd_queue_work(&connection->sender_work,
0698 &req->w);
0699
0700
0701 rcu_read_lock();
0702 nc = rcu_dereference(connection->net_conf);
0703 p = nc->max_epoch_size;
0704 rcu_read_unlock();
0705 if (connection->current_tle_writes >= p)
0706 start_new_tl_epoch(connection);
0707
0708 break;
0709
0710 case QUEUE_FOR_SEND_OOS:
0711 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
0712 req->w.cb = w_send_out_of_sync;
0713 drbd_queue_work(&connection->sender_work,
0714 &req->w);
0715 break;
0716
0717 case READ_RETRY_REMOTE_CANCELED:
0718 case SEND_CANCELED:
0719 case SEND_FAILED:
0720
0721
0722 mod_rq_state(req, m, RQ_NET_QUEUED, 0);
0723 break;
0724
0725 case HANDED_OVER_TO_NETWORK:
0726
0727 if (is_pending_write_protocol_A(req))
0728
0729
0730 mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
0731 RQ_NET_SENT|RQ_NET_OK);
0732 else
0733 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
0734
0735
0736
0737 break;
0738
0739 case OOS_HANDED_TO_NETWORK:
0740
0741
0742 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
0743 break;
0744
0745 case CONNECTION_LOST_WHILE_PENDING:
0746
0747 mod_rq_state(req, m,
0748 RQ_NET_OK|RQ_NET_PENDING|RQ_COMPLETION_SUSP,
0749 RQ_NET_DONE);
0750 break;
0751
0752 case CONFLICT_RESOLVED:
0753
0754
0755
0756
0757
0758
0759
0760 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
0761 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
0762 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE|RQ_NET_OK);
0763 break;
0764
0765 case WRITE_ACKED_BY_PEER_AND_SIS:
0766 req->rq_state |= RQ_NET_SIS;
0767 fallthrough;
0768 case WRITE_ACKED_BY_PEER:
0769
0770
0771
0772
0773
0774
0775
0776 goto ack_common;
0777 case RECV_ACKED_BY_PEER:
0778 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
0779
0780
0781
0782 ack_common:
0783 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
0784 break;
0785
0786 case POSTPONE_WRITE:
0787 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
0788
0789
0790
0791
0792 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
0793 req->rq_state |= RQ_POSTPONED;
0794 if (req->i.waiting)
0795 wake_up(&device->misc_wait);
0796
0797
0798
0799 break;
0800
0801 case NEG_ACKED:
0802 mod_rq_state(req, m, RQ_NET_OK|RQ_NET_PENDING, 0);
0803 break;
0804
0805 case FAIL_FROZEN_DISK_IO:
0806 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
0807 break;
0808 mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
0809 break;
0810
0811 case RESTART_FROZEN_DISK_IO:
0812 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
0813 break;
0814
0815 mod_rq_state(req, m,
0816 RQ_COMPLETION_SUSP|RQ_LOCAL_COMPLETED,
0817 RQ_LOCAL_PENDING);
0818
0819 rv = MR_READ;
0820 if (bio_data_dir(req->master_bio) == WRITE)
0821 rv = MR_WRITE;
0822
0823 get_ldev(device);
0824 req->w.cb = w_restart_disk_io;
0825 drbd_queue_work(&connection->sender_work,
0826 &req->w);
0827 break;
0828
0829 case RESEND:
0830
0831 if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
0832 mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
0833 break;
0834 }
0835
0836
0837
0838
0839
0840
0841 if (!(req->rq_state & RQ_NET_OK)) {
0842
0843
0844
0845 mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
0846 if (req->w.cb) {
0847
0848 drbd_queue_work(&connection->sender_work,
0849 &req->w);
0850 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
0851 }
0852 break;
0853 }
0854 fallthrough;
0855
0856 case BARRIER_ACKED:
0857
0858 if (!(req->rq_state & RQ_WRITE))
0859 break;
0860
0861 if (req->rq_state & RQ_NET_PENDING) {
0862
0863
0864
0865 drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n");
0866 }
0867
0868
0869
0870
0871 mod_rq_state(req, m, RQ_COMPLETION_SUSP,
0872 (req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
0873 break;
0874
0875 case DATA_RECEIVED:
0876 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
0877 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK|RQ_NET_DONE);
0878 break;
0879
0880 case QUEUE_AS_DRBD_BARRIER:
0881 start_new_tl_epoch(connection);
0882 mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
0883 break;
0884 }
0885
0886 return rv;
0887 }
0888
0889
0890
0891
0892
0893
0894
0895
0896 static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size)
0897 {
0898 unsigned long sbnr, ebnr;
0899 sector_t esector, nr_sectors;
0900
0901 if (device->state.disk == D_UP_TO_DATE)
0902 return true;
0903 if (device->state.disk != D_INCONSISTENT)
0904 return false;
0905 esector = sector + (size >> 9) - 1;
0906 nr_sectors = get_capacity(device->vdisk);
0907 D_ASSERT(device, sector < nr_sectors);
0908 D_ASSERT(device, esector < nr_sectors);
0909
0910 sbnr = BM_SECT_TO_BIT(sector);
0911 ebnr = BM_SECT_TO_BIT(esector);
0912
0913 return drbd_bm_count_bits(device, sbnr, ebnr) == 0;
0914 }
0915
0916 static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
0917 enum drbd_read_balancing rbm)
0918 {
0919 int stripe_shift;
0920
0921 switch (rbm) {
0922 case RB_CONGESTED_REMOTE:
0923 return false;
0924 case RB_LEAST_PENDING:
0925 return atomic_read(&device->local_cnt) >
0926 atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
0927 case RB_32K_STRIPING:
0928 case RB_64K_STRIPING:
0929 case RB_128K_STRIPING:
0930 case RB_256K_STRIPING:
0931 case RB_512K_STRIPING:
0932 case RB_1M_STRIPING:
0933 stripe_shift = (rbm - RB_32K_STRIPING + 15);
0934 return (sector >> (stripe_shift - 9)) & 1;
0935 case RB_ROUND_ROBIN:
0936 return test_and_change_bit(READ_BALANCE_RR, &device->flags);
0937 case RB_PREFER_REMOTE:
0938 return true;
0939 case RB_PREFER_LOCAL:
0940 default:
0941 return false;
0942 }
0943 }
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954 static void complete_conflicting_writes(struct drbd_request *req)
0955 {
0956 DEFINE_WAIT(wait);
0957 struct drbd_device *device = req->device;
0958 struct drbd_interval *i;
0959 sector_t sector = req->i.sector;
0960 int size = req->i.size;
0961
0962 for (;;) {
0963 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
0964
0965 if (i->completed)
0966 continue;
0967
0968
0969 break;
0970 }
0971 if (!i)
0972 break;
0973
0974
0975 prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
0976 i->waiting = true;
0977 spin_unlock_irq(&device->resource->req_lock);
0978 schedule();
0979 spin_lock_irq(&device->resource->req_lock);
0980 }
0981 finish_wait(&device->misc_wait, &wait);
0982 }
0983
0984
0985 static void maybe_pull_ahead(struct drbd_device *device)
0986 {
0987 struct drbd_connection *connection = first_peer_device(device)->connection;
0988 struct net_conf *nc;
0989 bool congested = false;
0990 enum drbd_on_congestion on_congestion;
0991
0992 rcu_read_lock();
0993 nc = rcu_dereference(connection->net_conf);
0994 on_congestion = nc ? nc->on_congestion : OC_BLOCK;
0995 rcu_read_unlock();
0996 if (on_congestion == OC_BLOCK ||
0997 connection->agreed_pro_version < 96)
0998 return;
0999
1000 if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
1001 return;
1002
1003
1004
1005
1006
1007 if (!get_ldev_if_state(device, D_UP_TO_DATE))
1008 return;
1009
1010 if (nc->cong_fill &&
1011 atomic_read(&device->ap_in_flight) >= nc->cong_fill) {
1012 drbd_info(device, "Congestion-fill threshold reached\n");
1013 congested = true;
1014 }
1015
1016 if (device->act_log->used >= nc->cong_extents) {
1017 drbd_info(device, "Congestion-extents threshold reached\n");
1018 congested = true;
1019 }
1020
1021 if (congested) {
1022
1023 start_new_tl_epoch(first_peer_device(device)->connection);
1024
1025 if (on_congestion == OC_PULL_AHEAD)
1026 _drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL);
1027 else
1028 _drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL);
1029 }
1030 put_ldev(device);
1031 }
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 static bool do_remote_read(struct drbd_request *req)
1043 {
1044 struct drbd_device *device = req->device;
1045 enum drbd_read_balancing rbm;
1046
1047 if (req->private_bio) {
1048 if (!drbd_may_do_local_read(device,
1049 req->i.sector, req->i.size)) {
1050 bio_put(req->private_bio);
1051 req->private_bio = NULL;
1052 put_ldev(device);
1053 }
1054 }
1055
1056 if (device->state.pdsk != D_UP_TO_DATE)
1057 return false;
1058
1059 if (req->private_bio == NULL)
1060 return true;
1061
1062
1063
1064
1065 rcu_read_lock();
1066 rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing;
1067 rcu_read_unlock();
1068
1069 if (rbm == RB_PREFER_LOCAL && req->private_bio)
1070 return false;
1071
1072 if (remote_due_to_read_balancing(device, req->i.sector, rbm)) {
1073 if (req->private_bio) {
1074 bio_put(req->private_bio);
1075 req->private_bio = NULL;
1076 put_ldev(device);
1077 }
1078 return true;
1079 }
1080
1081 return false;
1082 }
1083
1084 bool drbd_should_do_remote(union drbd_dev_state s)
1085 {
1086 return s.pdsk == D_UP_TO_DATE ||
1087 (s.pdsk >= D_INCONSISTENT &&
1088 s.conn >= C_WF_BITMAP_T &&
1089 s.conn < C_AHEAD);
1090
1091
1092
1093 }
1094
1095 static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
1096 {
1097 return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
1098
1099
1100 }
1101
1102
1103
1104
1105 static int drbd_process_write_request(struct drbd_request *req)
1106 {
1107 struct drbd_device *device = req->device;
1108 int remote, send_oos;
1109
1110 remote = drbd_should_do_remote(device->state);
1111 send_oos = drbd_should_send_out_of_sync(device->state);
1112
1113
1114
1115
1116
1117
1118
1119 if (unlikely(req->i.size == 0)) {
1120
1121 D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
1122 if (remote)
1123 _req_mod(req, QUEUE_AS_DRBD_BARRIER);
1124 return remote;
1125 }
1126
1127 if (!remote && !send_oos)
1128 return 0;
1129
1130 D_ASSERT(device, !(remote && send_oos));
1131
1132 if (remote) {
1133 _req_mod(req, TO_BE_SENT);
1134 _req_mod(req, QUEUE_FOR_NET_WRITE);
1135 } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
1136 _req_mod(req, QUEUE_FOR_SEND_OOS);
1137
1138 return remote;
1139 }
1140
1141 static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
1142 {
1143 int err = drbd_issue_discard_or_zero_out(req->device,
1144 req->i.sector, req->i.size >> 9, flags);
1145 if (err)
1146 req->private_bio->bi_status = BLK_STS_IOERR;
1147 bio_endio(req->private_bio);
1148 }
1149
1150 static void
1151 drbd_submit_req_private_bio(struct drbd_request *req)
1152 {
1153 struct drbd_device *device = req->device;
1154 struct bio *bio = req->private_bio;
1155 unsigned int type;
1156
1157 if (bio_op(bio) != REQ_OP_READ)
1158 type = DRBD_FAULT_DT_WR;
1159 else if (bio->bi_opf & REQ_RAHEAD)
1160 type = DRBD_FAULT_DT_RA;
1161 else
1162 type = DRBD_FAULT_DT_RD;
1163
1164
1165
1166
1167
1168
1169 if (get_ldev(device)) {
1170 if (drbd_insert_fault(device, type))
1171 bio_io_error(bio);
1172 else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
1173 drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
1174 ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
1175 else if (bio_op(bio) == REQ_OP_DISCARD)
1176 drbd_process_discard_or_zeroes_req(req, EE_TRIM);
1177 else
1178 submit_bio_noacct(bio);
1179 put_ldev(device);
1180 } else
1181 bio_io_error(bio);
1182 }
1183
1184 static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
1185 {
1186 spin_lock_irq(&device->resource->req_lock);
1187 list_add_tail(&req->tl_requests, &device->submit.writes);
1188 list_add_tail(&req->req_pending_master_completion,
1189 &device->pending_master_completion[1 ]);
1190 spin_unlock_irq(&device->resource->req_lock);
1191 queue_work(device->submit.wq, &device->submit.worker);
1192
1193 wake_up(&device->al_wait);
1194 }
1195
1196
1197
1198
1199
1200
1201 static struct drbd_request *
1202 drbd_request_prepare(struct drbd_device *device, struct bio *bio)
1203 {
1204 const int rw = bio_data_dir(bio);
1205 struct drbd_request *req;
1206
1207
1208 req = drbd_req_new(device, bio);
1209 if (!req) {
1210 dec_ap_bio(device);
1211
1212
1213 drbd_err(device, "could not kmalloc() req\n");
1214 bio->bi_status = BLK_STS_RESOURCE;
1215 bio_endio(bio);
1216 return ERR_PTR(-ENOMEM);
1217 }
1218
1219
1220 req->start_jif = bio_start_io_acct(req->master_bio);
1221
1222 if (!get_ldev(device)) {
1223 bio_put(req->private_bio);
1224 req->private_bio = NULL;
1225 }
1226
1227
1228 if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
1229 bio_op(bio) == REQ_OP_DISCARD)
1230 goto queue_for_submitter_thread;
1231
1232 if (rw == WRITE && req->private_bio && req->i.size
1233 && !test_bit(AL_SUSPENDED, &device->flags)) {
1234 if (!drbd_al_begin_io_fastpath(device, &req->i))
1235 goto queue_for_submitter_thread;
1236 req->rq_state |= RQ_IN_ACT_LOG;
1237 req->in_actlog_jif = jiffies;
1238 }
1239 return req;
1240
1241 queue_for_submitter_thread:
1242 atomic_inc(&device->ap_actlog_cnt);
1243 drbd_queue_write(device, req);
1244 return NULL;
1245 }
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257 static bool may_do_writes(struct drbd_device *device)
1258 {
1259 const union drbd_dev_state s = device->state;
1260 return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
1261 }
1262
1263 struct drbd_plug_cb {
1264 struct blk_plug_cb cb;
1265 struct drbd_request *most_recent_req;
1266
1267 };
1268
1269 static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
1270 {
1271 struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
1272 struct drbd_resource *resource = plug->cb.data;
1273 struct drbd_request *req = plug->most_recent_req;
1274
1275 kfree(cb);
1276 if (!req)
1277 return;
1278
1279 spin_lock_irq(&resource->req_lock);
1280
1281
1282 req->rq_state |= RQ_UNPLUG;
1283
1284 drbd_queue_unplug(req->device);
1285 kref_put(&req->kref, drbd_req_destroy);
1286 spin_unlock_irq(&resource->req_lock);
1287 }
1288
1289 static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
1290 {
1291
1292
1293 struct drbd_plug_cb *plug;
1294 struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
1295
1296 if (cb)
1297 plug = container_of(cb, struct drbd_plug_cb, cb);
1298 else
1299 plug = NULL;
1300 return plug;
1301 }
1302
1303 static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
1304 {
1305 struct drbd_request *tmp = plug->most_recent_req;
1306
1307
1308 kref_get(&req->kref);
1309 plug->most_recent_req = req;
1310 if (tmp)
1311 kref_put(&tmp->kref, drbd_req_destroy);
1312 }
1313
1314 static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
1315 {
1316 struct drbd_resource *resource = device->resource;
1317 const int rw = bio_data_dir(req->master_bio);
1318 struct bio_and_error m = { NULL, };
1319 bool no_remote = false;
1320 bool submit_private_bio = false;
1321
1322 spin_lock_irq(&resource->req_lock);
1323 if (rw == WRITE) {
1324
1325
1326
1327 complete_conflicting_writes(req);
1328
1329
1330
1331
1332 maybe_pull_ahead(device);
1333 }
1334
1335
1336 if (drbd_suspended(device)) {
1337
1338 req->rq_state |= RQ_POSTPONED;
1339 if (req->private_bio) {
1340 bio_put(req->private_bio);
1341 req->private_bio = NULL;
1342 put_ldev(device);
1343 }
1344 goto out;
1345 }
1346
1347
1348
1349
1350 if (rw != WRITE) {
1351 if (!do_remote_read(req) && !req->private_bio)
1352 goto nodata;
1353 }
1354
1355
1356 req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr);
1357
1358
1359
1360 if (likely(req->i.size!=0)) {
1361 if (rw == WRITE)
1362 first_peer_device(device)->connection->current_tle_writes++;
1363
1364 list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log);
1365 }
1366
1367 if (rw == WRITE) {
1368 if (req->private_bio && !may_do_writes(device)) {
1369 bio_put(req->private_bio);
1370 req->private_bio = NULL;
1371 put_ldev(device);
1372 goto nodata;
1373 }
1374 if (!drbd_process_write_request(req))
1375 no_remote = true;
1376 } else {
1377
1378
1379 if (req->private_bio == NULL) {
1380 _req_mod(req, TO_BE_SENT);
1381 _req_mod(req, QUEUE_FOR_NET_READ);
1382 } else
1383 no_remote = true;
1384 }
1385
1386 if (no_remote == false) {
1387 struct drbd_plug_cb *plug = drbd_check_plugged(resource);
1388 if (plug)
1389 drbd_update_plug(plug, req);
1390 }
1391
1392
1393
1394 if (list_empty(&req->req_pending_master_completion))
1395 list_add_tail(&req->req_pending_master_completion,
1396 &device->pending_master_completion[rw == WRITE]);
1397 if (req->private_bio) {
1398
1399 req->pre_submit_jif = jiffies;
1400 list_add_tail(&req->req_pending_local,
1401 &device->pending_completion[rw == WRITE]);
1402 _req_mod(req, TO_BE_SUBMITTED);
1403
1404 submit_private_bio = true;
1405 } else if (no_remote) {
1406 nodata:
1407 if (__ratelimit(&drbd_ratelimit_state))
1408 drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
1409 (unsigned long long)req->i.sector, req->i.size >> 9);
1410
1411
1412 }
1413
1414 out:
1415 drbd_req_put_completion_ref(req, &m, 1);
1416 spin_unlock_irq(&resource->req_lock);
1417
1418
1419
1420
1421
1422
1423
1424 if (submit_private_bio)
1425 drbd_submit_req_private_bio(req);
1426 if (m.bio)
1427 complete_master_bio(device, &m);
1428 }
1429
1430 void __drbd_make_request(struct drbd_device *device, struct bio *bio)
1431 {
1432 struct drbd_request *req = drbd_request_prepare(device, bio);
1433 if (IS_ERR_OR_NULL(req))
1434 return;
1435 drbd_send_and_submit(device, req);
1436 }
1437
1438 static void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
1439 {
1440 struct blk_plug plug;
1441 struct drbd_request *req, *tmp;
1442
1443 blk_start_plug(&plug);
1444 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1445 const int rw = bio_data_dir(req->master_bio);
1446
1447 if (rw == WRITE
1448 && req->private_bio && req->i.size
1449 && !test_bit(AL_SUSPENDED, &device->flags)) {
1450 if (!drbd_al_begin_io_fastpath(device, &req->i))
1451 continue;
1452
1453 req->rq_state |= RQ_IN_ACT_LOG;
1454 req->in_actlog_jif = jiffies;
1455 atomic_dec(&device->ap_actlog_cnt);
1456 }
1457
1458 list_del_init(&req->tl_requests);
1459 drbd_send_and_submit(device, req);
1460 }
1461 blk_finish_plug(&plug);
1462 }
1463
1464 static bool prepare_al_transaction_nonblock(struct drbd_device *device,
1465 struct list_head *incoming,
1466 struct list_head *pending,
1467 struct list_head *later)
1468 {
1469 struct drbd_request *req;
1470 int wake = 0;
1471 int err;
1472
1473 spin_lock_irq(&device->al_lock);
1474 while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
1475 err = drbd_al_begin_io_nonblock(device, &req->i);
1476 if (err == -ENOBUFS)
1477 break;
1478 if (err == -EBUSY)
1479 wake = 1;
1480 if (err)
1481 list_move_tail(&req->tl_requests, later);
1482 else
1483 list_move_tail(&req->tl_requests, pending);
1484 }
1485 spin_unlock_irq(&device->al_lock);
1486 if (wake)
1487 wake_up(&device->al_wait);
1488 return !list_empty(pending);
1489 }
1490
1491 static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
1492 {
1493 struct blk_plug plug;
1494 struct drbd_request *req;
1495
1496 blk_start_plug(&plug);
1497 while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
1498 req->rq_state |= RQ_IN_ACT_LOG;
1499 req->in_actlog_jif = jiffies;
1500 atomic_dec(&device->ap_actlog_cnt);
1501 list_del_init(&req->tl_requests);
1502 drbd_send_and_submit(device, req);
1503 }
1504 blk_finish_plug(&plug);
1505 }
1506
1507 void do_submit(struct work_struct *ws)
1508 {
1509 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
1510 LIST_HEAD(incoming);
1511 LIST_HEAD(pending);
1512 LIST_HEAD(busy);
1513
1514
1515 spin_lock_irq(&device->resource->req_lock);
1516 list_splice_tail_init(&device->submit.writes, &incoming);
1517 spin_unlock_irq(&device->resource->req_lock);
1518
1519 for (;;) {
1520 DEFINE_WAIT(wait);
1521
1522
1523 list_splice_init(&busy, &incoming);
1524 submit_fast_path(device, &incoming);
1525 if (list_empty(&incoming))
1526 break;
1527
1528 for (;;) {
1529 prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
1530
1531 list_splice_init(&busy, &incoming);
1532 prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
1533 if (!list_empty(&pending))
1534 break;
1535
1536 schedule();
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 if (!list_empty(&incoming))
1550 continue;
1551
1552
1553
1554
1555 spin_lock_irq(&device->resource->req_lock);
1556 list_splice_tail_init(&device->submit.writes, &incoming);
1557 spin_unlock_irq(&device->resource->req_lock);
1558 }
1559 finish_wait(&device->al_wait, &wait);
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 while (list_empty(&incoming)) {
1578 LIST_HEAD(more_pending);
1579 LIST_HEAD(more_incoming);
1580 bool made_progress;
1581
1582
1583
1584 if (list_empty(&device->submit.writes))
1585 break;
1586
1587 spin_lock_irq(&device->resource->req_lock);
1588 list_splice_tail_init(&device->submit.writes, &more_incoming);
1589 spin_unlock_irq(&device->resource->req_lock);
1590
1591 if (list_empty(&more_incoming))
1592 break;
1593
1594 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
1595
1596 list_splice_tail_init(&more_pending, &pending);
1597 list_splice_tail_init(&more_incoming, &incoming);
1598 if (!made_progress)
1599 break;
1600 }
1601
1602 drbd_al_begin_io_commit(device);
1603 send_and_submit_pending(device, &pending);
1604 }
1605 }
1606
1607 void drbd_submit_bio(struct bio *bio)
1608 {
1609 struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
1610
1611 bio = bio_split_to_limits(bio);
1612
1613
1614
1615
1616 D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
1617
1618 inc_ap_bio(device);
1619 __drbd_make_request(device, bio);
1620 }
1621
1622 static bool net_timeout_reached(struct drbd_request *net_req,
1623 struct drbd_connection *connection,
1624 unsigned long now, unsigned long ent,
1625 unsigned int ko_count, unsigned int timeout)
1626 {
1627 struct drbd_device *device = net_req->device;
1628
1629 if (!time_after(now, net_req->pre_send_jif + ent))
1630 return false;
1631
1632 if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
1633 return false;
1634
1635 if (net_req->rq_state & RQ_NET_PENDING) {
1636 drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
1637 jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
1638 return true;
1639 }
1640
1641
1642
1643
1644
1645 if (net_req->epoch == connection->send.current_epoch_nr) {
1646 drbd_warn(device,
1647 "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
1648 jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
1649 return false;
1650 }
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668 if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
1669 drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
1670 connection->send.last_sent_barrier_jif, now,
1671 jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
1672 return true;
1673 }
1674 return false;
1675 }
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694 void request_timer_fn(struct timer_list *t)
1695 {
1696 struct drbd_device *device = from_timer(device, t, request_timer);
1697 struct drbd_connection *connection = first_peer_device(device)->connection;
1698 struct drbd_request *req_read, *req_write, *req_peer;
1699 struct net_conf *nc;
1700 unsigned long oldest_submit_jif;
1701 unsigned long ent = 0, dt = 0, et, nt;
1702 unsigned long now;
1703 unsigned int ko_count = 0, timeout = 0;
1704
1705 rcu_read_lock();
1706 nc = rcu_dereference(connection->net_conf);
1707 if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
1708 ko_count = nc->ko_count;
1709 timeout = nc->timeout;
1710 }
1711
1712 if (get_ldev(device)) {
1713 dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
1714 put_ldev(device);
1715 }
1716 rcu_read_unlock();
1717
1718
1719 ent = timeout * HZ/10 * ko_count;
1720 et = min_not_zero(dt, ent);
1721
1722 if (!et)
1723 return;
1724
1725 now = jiffies;
1726 nt = now + et;
1727
1728 spin_lock_irq(&device->resource->req_lock);
1729 req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
1730 req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740 req_peer = connection->req_ack_pending;
1741
1742
1743
1744
1745 if (!req_peer)
1746 req_peer = connection->req_not_net_done;
1747
1748
1749 if (req_peer && req_peer->device != device)
1750 req_peer = NULL;
1751
1752
1753 if (req_peer == NULL && req_write == NULL && req_read == NULL)
1754 goto out;
1755
1756 oldest_submit_jif =
1757 (req_write && req_read)
1758 ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
1759 ? req_write->pre_submit_jif : req_read->pre_submit_jif )
1760 : req_write ? req_write->pre_submit_jif
1761 : req_read ? req_read->pre_submit_jif : now;
1762
1763 if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
1764 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
1765
1766 if (dt && oldest_submit_jif != now &&
1767 time_after(now, oldest_submit_jif + dt) &&
1768 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
1769 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
1770 __drbd_chk_io_error(device, DRBD_FORCE_DETACH);
1771 }
1772
1773
1774
1775 ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
1776 ? req_peer->pre_send_jif + ent : now + et;
1777 dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
1778 ? oldest_submit_jif + dt : now + et;
1779 nt = time_before(ent, dt) ? ent : dt;
1780 out:
1781 spin_unlock_irq(&device->resource->req_lock);
1782 mod_timer(&device->request_timer, nt);
1783 }