0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #define pr_fmt(fmt) "nbd: " fmt
0015
0016 #include <linux/major.h>
0017
0018 #include <linux/blkdev.h>
0019 #include <linux/module.h>
0020 #include <linux/init.h>
0021 #include <linux/sched.h>
0022 #include <linux/sched/mm.h>
0023 #include <linux/fs.h>
0024 #include <linux/bio.h>
0025 #include <linux/stat.h>
0026 #include <linux/errno.h>
0027 #include <linux/file.h>
0028 #include <linux/ioctl.h>
0029 #include <linux/mutex.h>
0030 #include <linux/compiler.h>
0031 #include <linux/completion.h>
0032 #include <linux/err.h>
0033 #include <linux/kernel.h>
0034 #include <linux/slab.h>
0035 #include <net/sock.h>
0036 #include <linux/net.h>
0037 #include <linux/kthread.h>
0038 #include <linux/types.h>
0039 #include <linux/debugfs.h>
0040 #include <linux/blk-mq.h>
0041
0042 #include <linux/uaccess.h>
0043 #include <asm/types.h>
0044
0045 #include <linux/nbd.h>
0046 #include <linux/nbd-netlink.h>
0047 #include <net/genetlink.h>
0048
0049 #define CREATE_TRACE_POINTS
0050 #include <trace/events/nbd.h>
0051
0052 static DEFINE_IDR(nbd_index_idr);
0053 static DEFINE_MUTEX(nbd_index_mutex);
0054 static struct workqueue_struct *nbd_del_wq;
0055 static int nbd_total_devices = 0;
0056
0057 struct nbd_sock {
0058 struct socket *sock;
0059 struct mutex tx_lock;
0060 struct request *pending;
0061 int sent;
0062 bool dead;
0063 int fallback_index;
0064 int cookie;
0065 };
0066
0067 struct recv_thread_args {
0068 struct work_struct work;
0069 struct nbd_device *nbd;
0070 int index;
0071 };
0072
0073 struct link_dead_args {
0074 struct work_struct work;
0075 int index;
0076 };
0077
0078 #define NBD_RT_TIMEDOUT 0
0079 #define NBD_RT_DISCONNECT_REQUESTED 1
0080 #define NBD_RT_DISCONNECTED 2
0081 #define NBD_RT_HAS_PID_FILE 3
0082 #define NBD_RT_HAS_CONFIG_REF 4
0083 #define NBD_RT_BOUND 5
0084 #define NBD_RT_DISCONNECT_ON_CLOSE 6
0085 #define NBD_RT_HAS_BACKEND_FILE 7
0086
0087 #define NBD_DESTROY_ON_DISCONNECT 0
0088 #define NBD_DISCONNECT_REQUESTED 1
0089
0090 struct nbd_config {
0091 u32 flags;
0092 unsigned long runtime_flags;
0093 u64 dead_conn_timeout;
0094
0095 struct nbd_sock **socks;
0096 int num_connections;
0097 atomic_t live_connections;
0098 wait_queue_head_t conn_wait;
0099
0100 atomic_t recv_threads;
0101 wait_queue_head_t recv_wq;
0102 unsigned int blksize_bits;
0103 loff_t bytesize;
0104 #if IS_ENABLED(CONFIG_DEBUG_FS)
0105 struct dentry *dbg_dir;
0106 #endif
0107 };
0108
0109 static inline unsigned int nbd_blksize(struct nbd_config *config)
0110 {
0111 return 1u << config->blksize_bits;
0112 }
0113
0114 struct nbd_device {
0115 struct blk_mq_tag_set tag_set;
0116
0117 int index;
0118 refcount_t config_refs;
0119 refcount_t refs;
0120 struct nbd_config *config;
0121 struct mutex config_lock;
0122 struct gendisk *disk;
0123 struct workqueue_struct *recv_workq;
0124 struct work_struct remove_work;
0125
0126 struct list_head list;
0127 struct task_struct *task_setup;
0128
0129 unsigned long flags;
0130 pid_t pid;
0131
0132 char *backend;
0133 };
0134
0135 #define NBD_CMD_REQUEUED 1
0136
0137
0138
0139
0140
0141 #define NBD_CMD_INFLIGHT 2
0142
0143 struct nbd_cmd {
0144 struct nbd_device *nbd;
0145 struct mutex lock;
0146 int index;
0147 int cookie;
0148 int retries;
0149 blk_status_t status;
0150 unsigned long flags;
0151 u32 cmd_cookie;
0152 };
0153
0154 #if IS_ENABLED(CONFIG_DEBUG_FS)
0155 static struct dentry *nbd_dbg_dir;
0156 #endif
0157
0158 #define nbd_name(nbd) ((nbd)->disk->disk_name)
0159
0160 #define NBD_MAGIC 0x68797548
0161
0162 #define NBD_DEF_BLKSIZE_BITS 10
0163
0164 static unsigned int nbds_max = 16;
0165 static int max_part = 16;
0166 static int part_shift;
0167
0168 static int nbd_dev_dbg_init(struct nbd_device *nbd);
0169 static void nbd_dev_dbg_close(struct nbd_device *nbd);
0170 static void nbd_config_put(struct nbd_device *nbd);
0171 static void nbd_connect_reply(struct genl_info *info, int index);
0172 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
0173 static void nbd_dead_link_work(struct work_struct *work);
0174 static void nbd_disconnect_and_put(struct nbd_device *nbd);
0175
0176 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
0177 {
0178 return disk_to_dev(nbd->disk);
0179 }
0180
0181 static void nbd_requeue_cmd(struct nbd_cmd *cmd)
0182 {
0183 struct request *req = blk_mq_rq_from_pdu(cmd);
0184
0185 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
0186 blk_mq_requeue_request(req, true);
0187 }
0188
0189 #define NBD_COOKIE_BITS 32
0190
0191 static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
0192 {
0193 struct request *req = blk_mq_rq_from_pdu(cmd);
0194 u32 tag = blk_mq_unique_tag(req);
0195 u64 cookie = cmd->cmd_cookie;
0196
0197 return (cookie << NBD_COOKIE_BITS) | tag;
0198 }
0199
0200 static u32 nbd_handle_to_tag(u64 handle)
0201 {
0202 return (u32)handle;
0203 }
0204
0205 static u32 nbd_handle_to_cookie(u64 handle)
0206 {
0207 return (u32)(handle >> NBD_COOKIE_BITS);
0208 }
0209
0210 static const char *nbdcmd_to_ascii(int cmd)
0211 {
0212 switch (cmd) {
0213 case NBD_CMD_READ: return "read";
0214 case NBD_CMD_WRITE: return "write";
0215 case NBD_CMD_DISC: return "disconnect";
0216 case NBD_CMD_FLUSH: return "flush";
0217 case NBD_CMD_TRIM: return "trim/discard";
0218 }
0219 return "invalid";
0220 }
0221
0222 static ssize_t pid_show(struct device *dev,
0223 struct device_attribute *attr, char *buf)
0224 {
0225 struct gendisk *disk = dev_to_disk(dev);
0226 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
0227
0228 return sprintf(buf, "%d\n", nbd->pid);
0229 }
0230
0231 static const struct device_attribute pid_attr = {
0232 .attr = { .name = "pid", .mode = 0444},
0233 .show = pid_show,
0234 };
0235
0236 static ssize_t backend_show(struct device *dev,
0237 struct device_attribute *attr, char *buf)
0238 {
0239 struct gendisk *disk = dev_to_disk(dev);
0240 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
0241
0242 return sprintf(buf, "%s\n", nbd->backend ?: "");
0243 }
0244
0245 static const struct device_attribute backend_attr = {
0246 .attr = { .name = "backend", .mode = 0444},
0247 .show = backend_show,
0248 };
0249
0250 static void nbd_dev_remove(struct nbd_device *nbd)
0251 {
0252 struct gendisk *disk = nbd->disk;
0253
0254 del_gendisk(disk);
0255 put_disk(disk);
0256 blk_mq_free_tag_set(&nbd->tag_set);
0257
0258
0259
0260
0261
0262 mutex_lock(&nbd_index_mutex);
0263 idr_remove(&nbd_index_idr, nbd->index);
0264 mutex_unlock(&nbd_index_mutex);
0265 destroy_workqueue(nbd->recv_workq);
0266 kfree(nbd);
0267 }
0268
0269 static void nbd_dev_remove_work(struct work_struct *work)
0270 {
0271 nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
0272 }
0273
0274 static void nbd_put(struct nbd_device *nbd)
0275 {
0276 if (!refcount_dec_and_test(&nbd->refs))
0277 return;
0278
0279
0280 if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
0281 queue_work(nbd_del_wq, &nbd->remove_work);
0282 else
0283 nbd_dev_remove(nbd);
0284 }
0285
0286 static int nbd_disconnected(struct nbd_config *config)
0287 {
0288 return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
0289 test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
0290 }
0291
0292 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
0293 int notify)
0294 {
0295 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
0296 struct link_dead_args *args;
0297 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
0298 if (args) {
0299 INIT_WORK(&args->work, nbd_dead_link_work);
0300 args->index = nbd->index;
0301 queue_work(system_wq, &args->work);
0302 }
0303 }
0304 if (!nsock->dead) {
0305 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
0306 if (atomic_dec_return(&nbd->config->live_connections) == 0) {
0307 if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
0308 &nbd->config->runtime_flags)) {
0309 set_bit(NBD_RT_DISCONNECTED,
0310 &nbd->config->runtime_flags);
0311 dev_info(nbd_to_dev(nbd),
0312 "Disconnected due to user request.\n");
0313 }
0314 }
0315 }
0316 nsock->dead = true;
0317 nsock->pending = NULL;
0318 nsock->sent = 0;
0319 }
0320
0321 static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
0322 loff_t blksize)
0323 {
0324 if (!blksize)
0325 blksize = 1u << NBD_DEF_BLKSIZE_BITS;
0326
0327 if (blk_validate_block_size(blksize))
0328 return -EINVAL;
0329
0330 nbd->config->bytesize = bytesize;
0331 nbd->config->blksize_bits = __ffs(blksize);
0332
0333 if (!nbd->pid)
0334 return 0;
0335
0336 if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
0337 nbd->disk->queue->limits.discard_granularity = blksize;
0338 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
0339 }
0340 blk_queue_logical_block_size(nbd->disk->queue, blksize);
0341 blk_queue_physical_block_size(nbd->disk->queue, blksize);
0342
0343 if (max_part)
0344 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
0345 if (!set_capacity_and_notify(nbd->disk, bytesize >> 9))
0346 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
0347 return 0;
0348 }
0349
0350 static void nbd_complete_rq(struct request *req)
0351 {
0352 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
0353
0354 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req,
0355 cmd->status ? "failed" : "done");
0356
0357 blk_mq_end_request(req, cmd->status);
0358 }
0359
0360
0361
0362
0363 static void sock_shutdown(struct nbd_device *nbd)
0364 {
0365 struct nbd_config *config = nbd->config;
0366 int i;
0367
0368 if (config->num_connections == 0)
0369 return;
0370 if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
0371 return;
0372
0373 for (i = 0; i < config->num_connections; i++) {
0374 struct nbd_sock *nsock = config->socks[i];
0375 mutex_lock(&nsock->tx_lock);
0376 nbd_mark_nsock_dead(nbd, nsock, 0);
0377 mutex_unlock(&nsock->tx_lock);
0378 }
0379 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
0380 }
0381
0382 static u32 req_to_nbd_cmd_type(struct request *req)
0383 {
0384 switch (req_op(req)) {
0385 case REQ_OP_DISCARD:
0386 return NBD_CMD_TRIM;
0387 case REQ_OP_FLUSH:
0388 return NBD_CMD_FLUSH;
0389 case REQ_OP_WRITE:
0390 return NBD_CMD_WRITE;
0391 case REQ_OP_READ:
0392 return NBD_CMD_READ;
0393 default:
0394 return U32_MAX;
0395 }
0396 }
0397
0398 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
0399 {
0400 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
0401 struct nbd_device *nbd = cmd->nbd;
0402 struct nbd_config *config;
0403
0404 if (!mutex_trylock(&cmd->lock))
0405 return BLK_EH_RESET_TIMER;
0406
0407 if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
0408 mutex_unlock(&cmd->lock);
0409 return BLK_EH_DONE;
0410 }
0411
0412 if (!refcount_inc_not_zero(&nbd->config_refs)) {
0413 cmd->status = BLK_STS_TIMEOUT;
0414 __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
0415 mutex_unlock(&cmd->lock);
0416 goto done;
0417 }
0418 config = nbd->config;
0419
0420 if (config->num_connections > 1 ||
0421 (config->num_connections == 1 && nbd->tag_set.timeout)) {
0422 dev_err_ratelimited(nbd_to_dev(nbd),
0423 "Connection timed out, retrying (%d/%d alive)\n",
0424 atomic_read(&config->live_connections),
0425 config->num_connections);
0426
0427
0428
0429
0430
0431
0432 if (config->socks) {
0433 if (cmd->index < config->num_connections) {
0434 struct nbd_sock *nsock =
0435 config->socks[cmd->index];
0436 mutex_lock(&nsock->tx_lock);
0437
0438
0439
0440
0441
0442
0443 if (cmd->cookie == nsock->cookie)
0444 nbd_mark_nsock_dead(nbd, nsock, 1);
0445 mutex_unlock(&nsock->tx_lock);
0446 }
0447 mutex_unlock(&cmd->lock);
0448 nbd_requeue_cmd(cmd);
0449 nbd_config_put(nbd);
0450 return BLK_EH_DONE;
0451 }
0452 }
0453
0454 if (!nbd->tag_set.timeout) {
0455
0456
0457
0458
0459 struct nbd_sock *nsock = config->socks[cmd->index];
0460 cmd->retries++;
0461 dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
0462 req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
0463 (unsigned long long)blk_rq_pos(req) << 9,
0464 blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
0465
0466 mutex_lock(&nsock->tx_lock);
0467 if (cmd->cookie != nsock->cookie) {
0468 nbd_requeue_cmd(cmd);
0469 mutex_unlock(&nsock->tx_lock);
0470 mutex_unlock(&cmd->lock);
0471 nbd_config_put(nbd);
0472 return BLK_EH_DONE;
0473 }
0474 mutex_unlock(&nsock->tx_lock);
0475 mutex_unlock(&cmd->lock);
0476 nbd_config_put(nbd);
0477 return BLK_EH_RESET_TIMER;
0478 }
0479
0480 dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
0481 set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
0482 cmd->status = BLK_STS_IOERR;
0483 __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
0484 mutex_unlock(&cmd->lock);
0485 sock_shutdown(nbd);
0486 nbd_config_put(nbd);
0487 done:
0488 blk_mq_complete_request(req);
0489 return BLK_EH_DONE;
0490 }
0491
0492
0493
0494
0495
0496 static int sock_xmit(struct nbd_device *nbd, int index, int send,
0497 struct iov_iter *iter, int msg_flags, int *sent)
0498 {
0499 struct nbd_config *config = nbd->config;
0500 struct socket *sock = config->socks[index]->sock;
0501 int result;
0502 struct msghdr msg;
0503 unsigned int noreclaim_flag;
0504
0505 if (unlikely(!sock)) {
0506 dev_err_ratelimited(disk_to_dev(nbd->disk),
0507 "Attempted %s on closed socket in sock_xmit\n",
0508 (send ? "send" : "recv"));
0509 return -EINVAL;
0510 }
0511
0512 msg.msg_iter = *iter;
0513
0514 noreclaim_flag = memalloc_noreclaim_save();
0515 do {
0516 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
0517 msg.msg_name = NULL;
0518 msg.msg_namelen = 0;
0519 msg.msg_control = NULL;
0520 msg.msg_controllen = 0;
0521 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
0522
0523 if (send)
0524 result = sock_sendmsg(sock, &msg);
0525 else
0526 result = sock_recvmsg(sock, &msg, msg.msg_flags);
0527
0528 if (result <= 0) {
0529 if (result == 0)
0530 result = -EPIPE;
0531 break;
0532 }
0533 if (sent)
0534 *sent += result;
0535 } while (msg_data_left(&msg));
0536
0537 memalloc_noreclaim_restore(noreclaim_flag);
0538
0539 return result;
0540 }
0541
0542
0543
0544
0545
0546 static inline int was_interrupted(int result)
0547 {
0548 return result == -ERESTARTSYS || result == -EINTR;
0549 }
0550
0551
0552 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
0553 {
0554 struct request *req = blk_mq_rq_from_pdu(cmd);
0555 struct nbd_config *config = nbd->config;
0556 struct nbd_sock *nsock = config->socks[index];
0557 int result;
0558 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
0559 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
0560 struct iov_iter from;
0561 unsigned long size = blk_rq_bytes(req);
0562 struct bio *bio;
0563 u64 handle;
0564 u32 type;
0565 u32 nbd_cmd_flags = 0;
0566 int sent = nsock->sent, skip = 0;
0567
0568 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
0569
0570 type = req_to_nbd_cmd_type(req);
0571 if (type == U32_MAX)
0572 return -EIO;
0573
0574 if (rq_data_dir(req) == WRITE &&
0575 (config->flags & NBD_FLAG_READ_ONLY)) {
0576 dev_err_ratelimited(disk_to_dev(nbd->disk),
0577 "Write on read-only\n");
0578 return -EIO;
0579 }
0580
0581 if (req->cmd_flags & REQ_FUA)
0582 nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
0583
0584
0585
0586
0587
0588 if (sent) {
0589 if (sent >= sizeof(request)) {
0590 skip = sent - sizeof(request);
0591
0592
0593 handle = nbd_cmd_handle(cmd);
0594
0595 goto send_pages;
0596 }
0597 iov_iter_advance(&from, sent);
0598 } else {
0599 cmd->cmd_cookie++;
0600 }
0601 cmd->index = index;
0602 cmd->cookie = nsock->cookie;
0603 cmd->retries = 0;
0604 request.type = htonl(type | nbd_cmd_flags);
0605 if (type != NBD_CMD_FLUSH) {
0606 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
0607 request.len = htonl(size);
0608 }
0609 handle = nbd_cmd_handle(cmd);
0610 memcpy(request.handle, &handle, sizeof(handle));
0611
0612 trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
0613
0614 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
0615 req, nbdcmd_to_ascii(type),
0616 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
0617 result = sock_xmit(nbd, index, 1, &from,
0618 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
0619 trace_nbd_header_sent(req, handle);
0620 if (result < 0) {
0621 if (was_interrupted(result)) {
0622
0623
0624
0625
0626
0627 if (sent) {
0628 nsock->pending = req;
0629 nsock->sent = sent;
0630 }
0631 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
0632 return BLK_STS_RESOURCE;
0633 }
0634 dev_err_ratelimited(disk_to_dev(nbd->disk),
0635 "Send control failed (result %d)\n", result);
0636 return -EAGAIN;
0637 }
0638 send_pages:
0639 if (type != NBD_CMD_WRITE)
0640 goto out;
0641
0642 bio = req->bio;
0643 while (bio) {
0644 struct bio *next = bio->bi_next;
0645 struct bvec_iter iter;
0646 struct bio_vec bvec;
0647
0648 bio_for_each_segment(bvec, bio, iter) {
0649 bool is_last = !next && bio_iter_last(bvec, iter);
0650 int flags = is_last ? 0 : MSG_MORE;
0651
0652 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
0653 req, bvec.bv_len);
0654 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
0655 if (skip) {
0656 if (skip >= iov_iter_count(&from)) {
0657 skip -= iov_iter_count(&from);
0658 continue;
0659 }
0660 iov_iter_advance(&from, skip);
0661 skip = 0;
0662 }
0663 result = sock_xmit(nbd, index, 1, &from, flags, &sent);
0664 if (result < 0) {
0665 if (was_interrupted(result)) {
0666
0667
0668
0669
0670 nsock->pending = req;
0671 nsock->sent = sent;
0672 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
0673 return BLK_STS_RESOURCE;
0674 }
0675 dev_err(disk_to_dev(nbd->disk),
0676 "Send data failed (result %d)\n",
0677 result);
0678 return -EAGAIN;
0679 }
0680
0681
0682
0683
0684
0685
0686 if (is_last)
0687 break;
0688 }
0689 bio = next;
0690 }
0691 out:
0692 trace_nbd_payload_sent(req, handle);
0693 nsock->pending = NULL;
0694 nsock->sent = 0;
0695 return 0;
0696 }
0697
0698 static int nbd_read_reply(struct nbd_device *nbd, int index,
0699 struct nbd_reply *reply)
0700 {
0701 struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
0702 struct iov_iter to;
0703 int result;
0704
0705 reply->magic = 0;
0706 iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
0707 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
0708 if (result < 0) {
0709 if (!nbd_disconnected(nbd->config))
0710 dev_err(disk_to_dev(nbd->disk),
0711 "Receive control failed (result %d)\n", result);
0712 return result;
0713 }
0714
0715 if (ntohl(reply->magic) != NBD_REPLY_MAGIC) {
0716 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
0717 (unsigned long)ntohl(reply->magic));
0718 return -EPROTO;
0719 }
0720
0721 return 0;
0722 }
0723
0724
0725 static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
0726 struct nbd_reply *reply)
0727 {
0728 int result;
0729 struct nbd_cmd *cmd;
0730 struct request *req = NULL;
0731 u64 handle;
0732 u16 hwq;
0733 u32 tag;
0734 int ret = 0;
0735
0736 memcpy(&handle, reply->handle, sizeof(handle));
0737 tag = nbd_handle_to_tag(handle);
0738 hwq = blk_mq_unique_tag_to_hwq(tag);
0739 if (hwq < nbd->tag_set.nr_hw_queues)
0740 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
0741 blk_mq_unique_tag_to_tag(tag));
0742 if (!req || !blk_mq_request_started(req)) {
0743 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
0744 tag, req);
0745 return ERR_PTR(-ENOENT);
0746 }
0747 trace_nbd_header_received(req, handle);
0748 cmd = blk_mq_rq_to_pdu(req);
0749
0750 mutex_lock(&cmd->lock);
0751 if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
0752 dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
0753 tag, cmd->status, cmd->flags);
0754 ret = -ENOENT;
0755 goto out;
0756 }
0757 if (cmd->index != index) {
0758 dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)",
0759 tag, index, cmd->index);
0760 ret = -ENOENT;
0761 goto out;
0762 }
0763 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
0764 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
0765 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
0766 ret = -ENOENT;
0767 goto out;
0768 }
0769 if (cmd->status != BLK_STS_OK) {
0770 dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
0771 req);
0772 ret = -ENOENT;
0773 goto out;
0774 }
0775 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
0776 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
0777 req);
0778 ret = -ENOENT;
0779 goto out;
0780 }
0781 if (ntohl(reply->error)) {
0782 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
0783 ntohl(reply->error));
0784 cmd->status = BLK_STS_IOERR;
0785 goto out;
0786 }
0787
0788 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
0789 if (rq_data_dir(req) != WRITE) {
0790 struct req_iterator iter;
0791 struct bio_vec bvec;
0792 struct iov_iter to;
0793
0794 rq_for_each_segment(bvec, req, iter) {
0795 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
0796 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
0797 if (result < 0) {
0798 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
0799 result);
0800
0801
0802
0803
0804
0805
0806 if (nbd_disconnected(nbd->config)) {
0807 cmd->status = BLK_STS_IOERR;
0808 goto out;
0809 }
0810 ret = -EIO;
0811 goto out;
0812 }
0813 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
0814 req, bvec.bv_len);
0815 }
0816 }
0817 out:
0818 trace_nbd_payload_received(req, handle);
0819 mutex_unlock(&cmd->lock);
0820 return ret ? ERR_PTR(ret) : cmd;
0821 }
0822
0823 static void recv_work(struct work_struct *work)
0824 {
0825 struct recv_thread_args *args = container_of(work,
0826 struct recv_thread_args,
0827 work);
0828 struct nbd_device *nbd = args->nbd;
0829 struct nbd_config *config = nbd->config;
0830 struct request_queue *q = nbd->disk->queue;
0831 struct nbd_sock *nsock;
0832 struct nbd_cmd *cmd;
0833 struct request *rq;
0834
0835 while (1) {
0836 struct nbd_reply reply;
0837
0838 if (nbd_read_reply(nbd, args->index, &reply))
0839 break;
0840
0841
0842
0843
0844
0845
0846
0847 if (!percpu_ref_tryget(&q->q_usage_counter)) {
0848 dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n",
0849 __func__);
0850 break;
0851 }
0852
0853 cmd = nbd_handle_reply(nbd, args->index, &reply);
0854 if (IS_ERR(cmd)) {
0855 percpu_ref_put(&q->q_usage_counter);
0856 break;
0857 }
0858
0859 rq = blk_mq_rq_from_pdu(cmd);
0860 if (likely(!blk_should_fake_timeout(rq->q))) {
0861 bool complete;
0862
0863 mutex_lock(&cmd->lock);
0864 complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
0865 &cmd->flags);
0866 mutex_unlock(&cmd->lock);
0867 if (complete)
0868 blk_mq_complete_request(rq);
0869 }
0870 percpu_ref_put(&q->q_usage_counter);
0871 }
0872
0873 nsock = config->socks[args->index];
0874 mutex_lock(&nsock->tx_lock);
0875 nbd_mark_nsock_dead(nbd, nsock, 1);
0876 mutex_unlock(&nsock->tx_lock);
0877
0878 nbd_config_put(nbd);
0879 atomic_dec(&config->recv_threads);
0880 wake_up(&config->recv_wq);
0881 kfree(args);
0882 }
0883
0884 static bool nbd_clear_req(struct request *req, void *data)
0885 {
0886 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
0887
0888
0889 if (blk_mq_request_completed(req))
0890 return true;
0891
0892 mutex_lock(&cmd->lock);
0893 if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
0894 mutex_unlock(&cmd->lock);
0895 return true;
0896 }
0897 cmd->status = BLK_STS_IOERR;
0898 mutex_unlock(&cmd->lock);
0899
0900 blk_mq_complete_request(req);
0901 return true;
0902 }
0903
0904 static void nbd_clear_que(struct nbd_device *nbd)
0905 {
0906 blk_mq_quiesce_queue(nbd->disk->queue);
0907 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
0908 blk_mq_unquiesce_queue(nbd->disk->queue);
0909 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
0910 }
0911
0912 static int find_fallback(struct nbd_device *nbd, int index)
0913 {
0914 struct nbd_config *config = nbd->config;
0915 int new_index = -1;
0916 struct nbd_sock *nsock = config->socks[index];
0917 int fallback = nsock->fallback_index;
0918
0919 if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
0920 return new_index;
0921
0922 if (config->num_connections <= 1) {
0923 dev_err_ratelimited(disk_to_dev(nbd->disk),
0924 "Dead connection, failed to find a fallback\n");
0925 return new_index;
0926 }
0927
0928 if (fallback >= 0 && fallback < config->num_connections &&
0929 !config->socks[fallback]->dead)
0930 return fallback;
0931
0932 if (nsock->fallback_index < 0 ||
0933 nsock->fallback_index >= config->num_connections ||
0934 config->socks[nsock->fallback_index]->dead) {
0935 int i;
0936 for (i = 0; i < config->num_connections; i++) {
0937 if (i == index)
0938 continue;
0939 if (!config->socks[i]->dead) {
0940 new_index = i;
0941 break;
0942 }
0943 }
0944 nsock->fallback_index = new_index;
0945 if (new_index < 0) {
0946 dev_err_ratelimited(disk_to_dev(nbd->disk),
0947 "Dead connection, failed to find a fallback\n");
0948 return new_index;
0949 }
0950 }
0951 new_index = nsock->fallback_index;
0952 return new_index;
0953 }
0954
0955 static int wait_for_reconnect(struct nbd_device *nbd)
0956 {
0957 struct nbd_config *config = nbd->config;
0958 if (!config->dead_conn_timeout)
0959 return 0;
0960
0961 if (!wait_event_timeout(config->conn_wait,
0962 test_bit(NBD_RT_DISCONNECTED,
0963 &config->runtime_flags) ||
0964 atomic_read(&config->live_connections) > 0,
0965 config->dead_conn_timeout))
0966 return 0;
0967
0968 return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
0969 }
0970
0971 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
0972 {
0973 struct request *req = blk_mq_rq_from_pdu(cmd);
0974 struct nbd_device *nbd = cmd->nbd;
0975 struct nbd_config *config;
0976 struct nbd_sock *nsock;
0977 int ret;
0978
0979 if (!refcount_inc_not_zero(&nbd->config_refs)) {
0980 dev_err_ratelimited(disk_to_dev(nbd->disk),
0981 "Socks array is empty\n");
0982 return -EINVAL;
0983 }
0984 config = nbd->config;
0985
0986 if (index >= config->num_connections) {
0987 dev_err_ratelimited(disk_to_dev(nbd->disk),
0988 "Attempted send on invalid socket\n");
0989 nbd_config_put(nbd);
0990 return -EINVAL;
0991 }
0992 cmd->status = BLK_STS_OK;
0993 again:
0994 nsock = config->socks[index];
0995 mutex_lock(&nsock->tx_lock);
0996 if (nsock->dead) {
0997 int old_index = index;
0998 index = find_fallback(nbd, index);
0999 mutex_unlock(&nsock->tx_lock);
1000 if (index < 0) {
1001 if (wait_for_reconnect(nbd)) {
1002 index = old_index;
1003 goto again;
1004 }
1005
1006
1007
1008
1009
1010
1011 sock_shutdown(nbd);
1012 nbd_config_put(nbd);
1013 return -EIO;
1014 }
1015 goto again;
1016 }
1017
1018
1019
1020
1021
1022
1023 blk_mq_start_request(req);
1024 if (unlikely(nsock->pending && nsock->pending != req)) {
1025 nbd_requeue_cmd(cmd);
1026 ret = 0;
1027 goto out;
1028 }
1029
1030
1031
1032
1033 ret = nbd_send_cmd(nbd, cmd, index);
1034
1035
1036
1037
1038 if (!ret)
1039 __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
1040 else if (ret == -EAGAIN) {
1041 dev_err_ratelimited(disk_to_dev(nbd->disk),
1042 "Request send failed, requeueing\n");
1043 nbd_mark_nsock_dead(nbd, nsock, 1);
1044 nbd_requeue_cmd(cmd);
1045 ret = 0;
1046 }
1047 out:
1048 mutex_unlock(&nsock->tx_lock);
1049 nbd_config_put(nbd);
1050 return ret;
1051 }
1052
1053 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
1054 const struct blk_mq_queue_data *bd)
1055 {
1056 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1057 int ret;
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068 mutex_lock(&cmd->lock);
1069 clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
1070
1071
1072
1073
1074
1075
1076 ret = nbd_handle_cmd(cmd, hctx->queue_num);
1077 if (ret < 0)
1078 ret = BLK_STS_IOERR;
1079 else if (!ret)
1080 ret = BLK_STS_OK;
1081 mutex_unlock(&cmd->lock);
1082
1083 return ret;
1084 }
1085
1086 static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
1087 int *err)
1088 {
1089 struct socket *sock;
1090
1091 *err = 0;
1092 sock = sockfd_lookup(fd, err);
1093 if (!sock)
1094 return NULL;
1095
1096 if (sock->ops->shutdown == sock_no_shutdown) {
1097 dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
1098 *err = -EINVAL;
1099 sockfd_put(sock);
1100 return NULL;
1101 }
1102
1103 return sock;
1104 }
1105
1106 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
1107 bool netlink)
1108 {
1109 struct nbd_config *config = nbd->config;
1110 struct socket *sock;
1111 struct nbd_sock **socks;
1112 struct nbd_sock *nsock;
1113 int err;
1114
1115 sock = nbd_get_socket(nbd, arg, &err);
1116 if (!sock)
1117 return err;
1118
1119
1120
1121
1122
1123 blk_mq_freeze_queue(nbd->disk->queue);
1124
1125 if (!netlink && !nbd->task_setup &&
1126 !test_bit(NBD_RT_BOUND, &config->runtime_flags))
1127 nbd->task_setup = current;
1128
1129 if (!netlink &&
1130 (nbd->task_setup != current ||
1131 test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
1132 dev_err(disk_to_dev(nbd->disk),
1133 "Device being setup by another task");
1134 err = -EBUSY;
1135 goto put_socket;
1136 }
1137
1138 nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
1139 if (!nsock) {
1140 err = -ENOMEM;
1141 goto put_socket;
1142 }
1143
1144 socks = krealloc(config->socks, (config->num_connections + 1) *
1145 sizeof(struct nbd_sock *), GFP_KERNEL);
1146 if (!socks) {
1147 kfree(nsock);
1148 err = -ENOMEM;
1149 goto put_socket;
1150 }
1151
1152 config->socks = socks;
1153
1154 nsock->fallback_index = -1;
1155 nsock->dead = false;
1156 mutex_init(&nsock->tx_lock);
1157 nsock->sock = sock;
1158 nsock->pending = NULL;
1159 nsock->sent = 0;
1160 nsock->cookie = 0;
1161 socks[config->num_connections++] = nsock;
1162 atomic_inc(&config->live_connections);
1163 blk_mq_unfreeze_queue(nbd->disk->queue);
1164
1165 return 0;
1166
1167 put_socket:
1168 blk_mq_unfreeze_queue(nbd->disk->queue);
1169 sockfd_put(sock);
1170 return err;
1171 }
1172
1173 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
1174 {
1175 struct nbd_config *config = nbd->config;
1176 struct socket *sock, *old;
1177 struct recv_thread_args *args;
1178 int i;
1179 int err;
1180
1181 sock = nbd_get_socket(nbd, arg, &err);
1182 if (!sock)
1183 return err;
1184
1185 args = kzalloc(sizeof(*args), GFP_KERNEL);
1186 if (!args) {
1187 sockfd_put(sock);
1188 return -ENOMEM;
1189 }
1190
1191 for (i = 0; i < config->num_connections; i++) {
1192 struct nbd_sock *nsock = config->socks[i];
1193
1194 if (!nsock->dead)
1195 continue;
1196
1197 mutex_lock(&nsock->tx_lock);
1198 if (!nsock->dead) {
1199 mutex_unlock(&nsock->tx_lock);
1200 continue;
1201 }
1202 sk_set_memalloc(sock->sk);
1203 if (nbd->tag_set.timeout)
1204 sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
1205 atomic_inc(&config->recv_threads);
1206 refcount_inc(&nbd->config_refs);
1207 old = nsock->sock;
1208 nsock->fallback_index = -1;
1209 nsock->sock = sock;
1210 nsock->dead = false;
1211 INIT_WORK(&args->work, recv_work);
1212 args->index = i;
1213 args->nbd = nbd;
1214 nsock->cookie++;
1215 mutex_unlock(&nsock->tx_lock);
1216 sockfd_put(old);
1217
1218 clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
1219
1220
1221
1222
1223 queue_work(nbd->recv_workq, &args->work);
1224
1225 atomic_inc(&config->live_connections);
1226 wake_up(&config->conn_wait);
1227 return 0;
1228 }
1229 sockfd_put(sock);
1230 kfree(args);
1231 return -ENOSPC;
1232 }
1233
1234 static void nbd_bdev_reset(struct nbd_device *nbd)
1235 {
1236 if (disk_openers(nbd->disk) > 1)
1237 return;
1238 set_capacity(nbd->disk, 0);
1239 }
1240
1241 static void nbd_parse_flags(struct nbd_device *nbd)
1242 {
1243 struct nbd_config *config = nbd->config;
1244 if (config->flags & NBD_FLAG_READ_ONLY)
1245 set_disk_ro(nbd->disk, true);
1246 else
1247 set_disk_ro(nbd->disk, false);
1248 if (config->flags & NBD_FLAG_SEND_FLUSH) {
1249 if (config->flags & NBD_FLAG_SEND_FUA)
1250 blk_queue_write_cache(nbd->disk->queue, true, true);
1251 else
1252 blk_queue_write_cache(nbd->disk->queue, true, false);
1253 }
1254 else
1255 blk_queue_write_cache(nbd->disk->queue, false, false);
1256 }
1257
1258 static void send_disconnects(struct nbd_device *nbd)
1259 {
1260 struct nbd_config *config = nbd->config;
1261 struct nbd_request request = {
1262 .magic = htonl(NBD_REQUEST_MAGIC),
1263 .type = htonl(NBD_CMD_DISC),
1264 };
1265 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
1266 struct iov_iter from;
1267 int i, ret;
1268
1269 for (i = 0; i < config->num_connections; i++) {
1270 struct nbd_sock *nsock = config->socks[i];
1271
1272 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
1273 mutex_lock(&nsock->tx_lock);
1274 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
1275 if (ret < 0)
1276 dev_err(disk_to_dev(nbd->disk),
1277 "Send disconnect failed %d\n", ret);
1278 mutex_unlock(&nsock->tx_lock);
1279 }
1280 }
1281
1282 static int nbd_disconnect(struct nbd_device *nbd)
1283 {
1284 struct nbd_config *config = nbd->config;
1285
1286 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
1287 set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
1288 set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
1289 send_disconnects(nbd);
1290 return 0;
1291 }
1292
1293 static void nbd_clear_sock(struct nbd_device *nbd)
1294 {
1295 sock_shutdown(nbd);
1296 nbd_clear_que(nbd);
1297 nbd->task_setup = NULL;
1298 }
1299
1300 static void nbd_config_put(struct nbd_device *nbd)
1301 {
1302 if (refcount_dec_and_mutex_lock(&nbd->config_refs,
1303 &nbd->config_lock)) {
1304 struct nbd_config *config = nbd->config;
1305 nbd_dev_dbg_close(nbd);
1306 invalidate_disk(nbd->disk);
1307 if (nbd->config->bytesize)
1308 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
1309 if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
1310 &config->runtime_flags))
1311 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
1312 nbd->pid = 0;
1313 if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
1314 &config->runtime_flags)) {
1315 device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
1316 kfree(nbd->backend);
1317 nbd->backend = NULL;
1318 }
1319 nbd_clear_sock(nbd);
1320 if (config->num_connections) {
1321 int i;
1322 for (i = 0; i < config->num_connections; i++) {
1323 sockfd_put(config->socks[i]->sock);
1324 kfree(config->socks[i]);
1325 }
1326 kfree(config->socks);
1327 }
1328 kfree(nbd->config);
1329 nbd->config = NULL;
1330
1331 nbd->tag_set.timeout = 0;
1332 nbd->disk->queue->limits.discard_granularity = 0;
1333 blk_queue_max_discard_sectors(nbd->disk->queue, 0);
1334
1335 mutex_unlock(&nbd->config_lock);
1336 nbd_put(nbd);
1337 module_put(THIS_MODULE);
1338 }
1339 }
1340
1341 static int nbd_start_device(struct nbd_device *nbd)
1342 {
1343 struct nbd_config *config = nbd->config;
1344 int num_connections = config->num_connections;
1345 int error = 0, i;
1346
1347 if (nbd->pid)
1348 return -EBUSY;
1349 if (!config->socks)
1350 return -EINVAL;
1351 if (num_connections > 1 &&
1352 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
1353 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
1354 return -EINVAL;
1355 }
1356
1357 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
1358 nbd->pid = task_pid_nr(current);
1359
1360 nbd_parse_flags(nbd);
1361
1362 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
1363 if (error) {
1364 dev_err(disk_to_dev(nbd->disk), "device_create_file failed for pid!\n");
1365 return error;
1366 }
1367 set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
1368
1369 nbd_dev_dbg_init(nbd);
1370 for (i = 0; i < num_connections; i++) {
1371 struct recv_thread_args *args;
1372
1373 args = kzalloc(sizeof(*args), GFP_KERNEL);
1374 if (!args) {
1375 sock_shutdown(nbd);
1376
1377
1378
1379
1380
1381
1382
1383
1384 if (i)
1385 flush_workqueue(nbd->recv_workq);
1386 return -ENOMEM;
1387 }
1388 sk_set_memalloc(config->socks[i]->sock->sk);
1389 if (nbd->tag_set.timeout)
1390 config->socks[i]->sock->sk->sk_sndtimeo =
1391 nbd->tag_set.timeout;
1392 atomic_inc(&config->recv_threads);
1393 refcount_inc(&nbd->config_refs);
1394 INIT_WORK(&args->work, recv_work);
1395 args->nbd = nbd;
1396 args->index = i;
1397 queue_work(nbd->recv_workq, &args->work);
1398 }
1399 return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
1400 }
1401
1402 static int nbd_start_device_ioctl(struct nbd_device *nbd)
1403 {
1404 struct nbd_config *config = nbd->config;
1405 int ret;
1406
1407 ret = nbd_start_device(nbd);
1408 if (ret)
1409 return ret;
1410
1411 if (max_part)
1412 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
1413 mutex_unlock(&nbd->config_lock);
1414 ret = wait_event_interruptible(config->recv_wq,
1415 atomic_read(&config->recv_threads) == 0);
1416 if (ret)
1417 sock_shutdown(nbd);
1418 flush_workqueue(nbd->recv_workq);
1419
1420 mutex_lock(&nbd->config_lock);
1421 nbd_bdev_reset(nbd);
1422
1423 if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
1424 ret = 0;
1425 if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
1426 ret = -ETIMEDOUT;
1427 return ret;
1428 }
1429
1430 static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
1431 struct block_device *bdev)
1432 {
1433 nbd_clear_sock(nbd);
1434 __invalidate_device(bdev, true);
1435 nbd_bdev_reset(nbd);
1436 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
1437 &nbd->config->runtime_flags))
1438 nbd_config_put(nbd);
1439 }
1440
1441 static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
1442 {
1443 nbd->tag_set.timeout = timeout * HZ;
1444 if (timeout)
1445 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
1446 else
1447 blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
1448 }
1449
1450
1451 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
1452 unsigned int cmd, unsigned long arg)
1453 {
1454 struct nbd_config *config = nbd->config;
1455 loff_t bytesize;
1456
1457 switch (cmd) {
1458 case NBD_DISCONNECT:
1459 return nbd_disconnect(nbd);
1460 case NBD_CLEAR_SOCK:
1461 nbd_clear_sock_ioctl(nbd, bdev);
1462 return 0;
1463 case NBD_SET_SOCK:
1464 return nbd_add_socket(nbd, arg, false);
1465 case NBD_SET_BLKSIZE:
1466 return nbd_set_size(nbd, config->bytesize, arg);
1467 case NBD_SET_SIZE:
1468 return nbd_set_size(nbd, arg, nbd_blksize(config));
1469 case NBD_SET_SIZE_BLOCKS:
1470 if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
1471 return -EINVAL;
1472 return nbd_set_size(nbd, bytesize, nbd_blksize(config));
1473 case NBD_SET_TIMEOUT:
1474 nbd_set_cmd_timeout(nbd, arg);
1475 return 0;
1476
1477 case NBD_SET_FLAGS:
1478 config->flags = arg;
1479 return 0;
1480 case NBD_DO_IT:
1481 return nbd_start_device_ioctl(nbd);
1482 case NBD_CLEAR_QUE:
1483
1484
1485
1486
1487 return 0;
1488 case NBD_PRINT_DEBUG:
1489
1490
1491
1492
1493 return 0;
1494 }
1495 return -ENOTTY;
1496 }
1497
1498 static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
1499 unsigned int cmd, unsigned long arg)
1500 {
1501 struct nbd_device *nbd = bdev->bd_disk->private_data;
1502 struct nbd_config *config = nbd->config;
1503 int error = -EINVAL;
1504
1505 if (!capable(CAP_SYS_ADMIN))
1506 return -EPERM;
1507
1508
1509
1510
1511 if (_IOC_TYPE(cmd) != 0xab)
1512 return -EINVAL;
1513
1514 mutex_lock(&nbd->config_lock);
1515
1516
1517
1518
1519 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
1520 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
1521 error = __nbd_ioctl(bdev, nbd, cmd, arg);
1522 else
1523 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
1524 mutex_unlock(&nbd->config_lock);
1525 return error;
1526 }
1527
1528 static struct nbd_config *nbd_alloc_config(void)
1529 {
1530 struct nbd_config *config;
1531
1532 if (!try_module_get(THIS_MODULE))
1533 return ERR_PTR(-ENODEV);
1534
1535 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
1536 if (!config) {
1537 module_put(THIS_MODULE);
1538 return ERR_PTR(-ENOMEM);
1539 }
1540
1541 atomic_set(&config->recv_threads, 0);
1542 init_waitqueue_head(&config->recv_wq);
1543 init_waitqueue_head(&config->conn_wait);
1544 config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
1545 atomic_set(&config->live_connections, 0);
1546 return config;
1547 }
1548
1549 static int nbd_open(struct block_device *bdev, fmode_t mode)
1550 {
1551 struct nbd_device *nbd;
1552 int ret = 0;
1553
1554 mutex_lock(&nbd_index_mutex);
1555 nbd = bdev->bd_disk->private_data;
1556 if (!nbd) {
1557 ret = -ENXIO;
1558 goto out;
1559 }
1560 if (!refcount_inc_not_zero(&nbd->refs)) {
1561 ret = -ENXIO;
1562 goto out;
1563 }
1564 if (!refcount_inc_not_zero(&nbd->config_refs)) {
1565 struct nbd_config *config;
1566
1567 mutex_lock(&nbd->config_lock);
1568 if (refcount_inc_not_zero(&nbd->config_refs)) {
1569 mutex_unlock(&nbd->config_lock);
1570 goto out;
1571 }
1572 config = nbd_alloc_config();
1573 if (IS_ERR(config)) {
1574 ret = PTR_ERR(config);
1575 mutex_unlock(&nbd->config_lock);
1576 goto out;
1577 }
1578 nbd->config = config;
1579 refcount_set(&nbd->config_refs, 1);
1580 refcount_inc(&nbd->refs);
1581 mutex_unlock(&nbd->config_lock);
1582 if (max_part)
1583 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1584 } else if (nbd_disconnected(nbd->config)) {
1585 if (max_part)
1586 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1587 }
1588 out:
1589 mutex_unlock(&nbd_index_mutex);
1590 return ret;
1591 }
1592
1593 static void nbd_release(struct gendisk *disk, fmode_t mode)
1594 {
1595 struct nbd_device *nbd = disk->private_data;
1596
1597 if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
1598 disk_openers(disk) == 0)
1599 nbd_disconnect_and_put(nbd);
1600
1601 nbd_config_put(nbd);
1602 nbd_put(nbd);
1603 }
1604
1605 static const struct block_device_operations nbd_fops =
1606 {
1607 .owner = THIS_MODULE,
1608 .open = nbd_open,
1609 .release = nbd_release,
1610 .ioctl = nbd_ioctl,
1611 .compat_ioctl = nbd_ioctl,
1612 };
1613
1614 #if IS_ENABLED(CONFIG_DEBUG_FS)
1615
1616 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
1617 {
1618 struct nbd_device *nbd = s->private;
1619
1620 if (nbd->pid)
1621 seq_printf(s, "recv: %d\n", nbd->pid);
1622
1623 return 0;
1624 }
1625
1626 DEFINE_SHOW_ATTRIBUTE(nbd_dbg_tasks);
1627
1628 static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1629 {
1630 struct nbd_device *nbd = s->private;
1631 u32 flags = nbd->config->flags;
1632
1633 seq_printf(s, "Hex: 0x%08x\n\n", flags);
1634
1635 seq_puts(s, "Known flags:\n");
1636
1637 if (flags & NBD_FLAG_HAS_FLAGS)
1638 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
1639 if (flags & NBD_FLAG_READ_ONLY)
1640 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1641 if (flags & NBD_FLAG_SEND_FLUSH)
1642 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1643 if (flags & NBD_FLAG_SEND_FUA)
1644 seq_puts(s, "NBD_FLAG_SEND_FUA\n");
1645 if (flags & NBD_FLAG_SEND_TRIM)
1646 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1647
1648 return 0;
1649 }
1650
1651 DEFINE_SHOW_ATTRIBUTE(nbd_dbg_flags);
1652
1653 static int nbd_dev_dbg_init(struct nbd_device *nbd)
1654 {
1655 struct dentry *dir;
1656 struct nbd_config *config = nbd->config;
1657
1658 if (!nbd_dbg_dir)
1659 return -EIO;
1660
1661 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
1662 if (!dir) {
1663 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
1664 nbd_name(nbd));
1665 return -EIO;
1666 }
1667 config->dbg_dir = dir;
1668
1669 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
1670 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
1671 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
1672 debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
1673 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
1674
1675 return 0;
1676 }
1677
1678 static void nbd_dev_dbg_close(struct nbd_device *nbd)
1679 {
1680 debugfs_remove_recursive(nbd->config->dbg_dir);
1681 }
1682
1683 static int nbd_dbg_init(void)
1684 {
1685 struct dentry *dbg_dir;
1686
1687 dbg_dir = debugfs_create_dir("nbd", NULL);
1688 if (!dbg_dir)
1689 return -EIO;
1690
1691 nbd_dbg_dir = dbg_dir;
1692
1693 return 0;
1694 }
1695
1696 static void nbd_dbg_close(void)
1697 {
1698 debugfs_remove_recursive(nbd_dbg_dir);
1699 }
1700
1701 #else
1702
1703 static int nbd_dev_dbg_init(struct nbd_device *nbd)
1704 {
1705 return 0;
1706 }
1707
1708 static void nbd_dev_dbg_close(struct nbd_device *nbd)
1709 {
1710 }
1711
1712 static int nbd_dbg_init(void)
1713 {
1714 return 0;
1715 }
1716
1717 static void nbd_dbg_close(void)
1718 {
1719 }
1720
1721 #endif
1722
1723 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
1724 unsigned int hctx_idx, unsigned int numa_node)
1725 {
1726 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
1727 cmd->nbd = set->driver_data;
1728 cmd->flags = 0;
1729 mutex_init(&cmd->lock);
1730 return 0;
1731 }
1732
1733 static const struct blk_mq_ops nbd_mq_ops = {
1734 .queue_rq = nbd_queue_rq,
1735 .complete = nbd_complete_rq,
1736 .init_request = nbd_init_request,
1737 .timeout = nbd_xmit_timeout,
1738 };
1739
1740 static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
1741 {
1742 struct nbd_device *nbd;
1743 struct gendisk *disk;
1744 int err = -ENOMEM;
1745
1746 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1747 if (!nbd)
1748 goto out;
1749
1750 nbd->tag_set.ops = &nbd_mq_ops;
1751 nbd->tag_set.nr_hw_queues = 1;
1752 nbd->tag_set.queue_depth = 128;
1753 nbd->tag_set.numa_node = NUMA_NO_NODE;
1754 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1755 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1756 BLK_MQ_F_BLOCKING;
1757 nbd->tag_set.driver_data = nbd;
1758 INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
1759 nbd->backend = NULL;
1760
1761 err = blk_mq_alloc_tag_set(&nbd->tag_set);
1762 if (err)
1763 goto out_free_nbd;
1764
1765 mutex_lock(&nbd_index_mutex);
1766 if (index >= 0) {
1767 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1768 GFP_KERNEL);
1769 if (err == -ENOSPC)
1770 err = -EEXIST;
1771 } else {
1772 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1773 if (err >= 0)
1774 index = err;
1775 }
1776 nbd->index = index;
1777 mutex_unlock(&nbd_index_mutex);
1778 if (err < 0)
1779 goto out_free_tags;
1780
1781 disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
1782 if (IS_ERR(disk)) {
1783 err = PTR_ERR(disk);
1784 goto out_free_idr;
1785 }
1786 nbd->disk = disk;
1787
1788 nbd->recv_workq = alloc_workqueue("nbd%d-recv",
1789 WQ_MEM_RECLAIM | WQ_HIGHPRI |
1790 WQ_UNBOUND, 0, nbd->index);
1791 if (!nbd->recv_workq) {
1792 dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
1793 err = -ENOMEM;
1794 goto out_err_disk;
1795 }
1796
1797
1798
1799
1800 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
1801 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1802 disk->queue->limits.discard_granularity = 0;
1803 blk_queue_max_discard_sectors(disk->queue, 0);
1804 blk_queue_max_segment_size(disk->queue, UINT_MAX);
1805 blk_queue_max_segments(disk->queue, USHRT_MAX);
1806 blk_queue_max_hw_sectors(disk->queue, 65536);
1807 disk->queue->limits.max_sectors = 256;
1808
1809 mutex_init(&nbd->config_lock);
1810 refcount_set(&nbd->config_refs, 0);
1811
1812
1813
1814
1815 refcount_set(&nbd->refs, 0);
1816 INIT_LIST_HEAD(&nbd->list);
1817 disk->major = NBD_MAJOR;
1818 disk->first_minor = index << part_shift;
1819 disk->minors = 1 << part_shift;
1820 disk->fops = &nbd_fops;
1821 disk->private_data = nbd;
1822 sprintf(disk->disk_name, "nbd%d", index);
1823 err = add_disk(disk);
1824 if (err)
1825 goto out_free_work;
1826
1827
1828
1829
1830 refcount_set(&nbd->refs, refs);
1831 nbd_total_devices++;
1832 return nbd;
1833
1834 out_free_work:
1835 destroy_workqueue(nbd->recv_workq);
1836 out_err_disk:
1837 put_disk(disk);
1838 out_free_idr:
1839 mutex_lock(&nbd_index_mutex);
1840 idr_remove(&nbd_index_idr, index);
1841 mutex_unlock(&nbd_index_mutex);
1842 out_free_tags:
1843 blk_mq_free_tag_set(&nbd->tag_set);
1844 out_free_nbd:
1845 kfree(nbd);
1846 out:
1847 return ERR_PTR(err);
1848 }
1849
1850 static struct nbd_device *nbd_find_get_unused(void)
1851 {
1852 struct nbd_device *nbd;
1853 int id;
1854
1855 lockdep_assert_held(&nbd_index_mutex);
1856
1857 idr_for_each_entry(&nbd_index_idr, nbd, id) {
1858 if (refcount_read(&nbd->config_refs) ||
1859 test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
1860 continue;
1861 if (refcount_inc_not_zero(&nbd->refs))
1862 return nbd;
1863 }
1864
1865 return NULL;
1866 }
1867
1868
1869 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
1870 [NBD_ATTR_INDEX] = { .type = NLA_U32 },
1871 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 },
1872 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 },
1873 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 },
1874 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 },
1875 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 },
1876 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED},
1877 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 },
1878 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED},
1879 [NBD_ATTR_BACKEND_IDENTIFIER] = { .type = NLA_STRING},
1880 };
1881
1882 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
1883 [NBD_SOCK_FD] = { .type = NLA_U32 },
1884 };
1885
1886
1887
1888
1889 static const struct nla_policy __attribute__((unused))
1890 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
1891 [NBD_DEVICE_INDEX] = { .type = NLA_U32 },
1892 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
1893 };
1894
1895 static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
1896 {
1897 struct nbd_config *config = nbd->config;
1898 u64 bsize = nbd_blksize(config);
1899 u64 bytes = config->bytesize;
1900
1901 if (info->attrs[NBD_ATTR_SIZE_BYTES])
1902 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1903
1904 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
1905 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
1906
1907 if (bytes != config->bytesize || bsize != nbd_blksize(config))
1908 return nbd_set_size(nbd, bytes, bsize);
1909 return 0;
1910 }
1911
1912 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
1913 {
1914 struct nbd_device *nbd;
1915 struct nbd_config *config;
1916 int index = -1;
1917 int ret;
1918 bool put_dev = false;
1919
1920 if (!netlink_capable(skb, CAP_SYS_ADMIN))
1921 return -EPERM;
1922
1923 if (info->attrs[NBD_ATTR_INDEX]) {
1924 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
1925
1926
1927
1928
1929
1930
1931 if (index < 0 || index > MINORMASK >> part_shift) {
1932 pr_err("illegal input index %d\n", index);
1933 return -EINVAL;
1934 }
1935 }
1936 if (!info->attrs[NBD_ATTR_SOCKETS]) {
1937 pr_err("must specify at least one socket\n");
1938 return -EINVAL;
1939 }
1940 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
1941 pr_err("must specify a size in bytes for the device\n");
1942 return -EINVAL;
1943 }
1944 again:
1945 mutex_lock(&nbd_index_mutex);
1946 if (index == -1) {
1947 nbd = nbd_find_get_unused();
1948 } else {
1949 nbd = idr_find(&nbd_index_idr, index);
1950 if (nbd) {
1951 if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
1952 test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
1953 !refcount_inc_not_zero(&nbd->refs)) {
1954 mutex_unlock(&nbd_index_mutex);
1955 pr_err("device at index %d is going down\n",
1956 index);
1957 return -EINVAL;
1958 }
1959 }
1960 }
1961 mutex_unlock(&nbd_index_mutex);
1962
1963 if (!nbd) {
1964 nbd = nbd_dev_add(index, 2);
1965 if (IS_ERR(nbd)) {
1966 pr_err("failed to add new device\n");
1967 return PTR_ERR(nbd);
1968 }
1969 }
1970
1971 mutex_lock(&nbd->config_lock);
1972 if (refcount_read(&nbd->config_refs)) {
1973 mutex_unlock(&nbd->config_lock);
1974 nbd_put(nbd);
1975 if (index == -1)
1976 goto again;
1977 pr_err("nbd%d already in use\n", index);
1978 return -EBUSY;
1979 }
1980 if (WARN_ON(nbd->config)) {
1981 mutex_unlock(&nbd->config_lock);
1982 nbd_put(nbd);
1983 return -EINVAL;
1984 }
1985 config = nbd_alloc_config();
1986 if (IS_ERR(config)) {
1987 mutex_unlock(&nbd->config_lock);
1988 nbd_put(nbd);
1989 pr_err("couldn't allocate config\n");
1990 return PTR_ERR(config);
1991 }
1992 nbd->config = config;
1993 refcount_set(&nbd->config_refs, 1);
1994 set_bit(NBD_RT_BOUND, &config->runtime_flags);
1995
1996 ret = nbd_genl_size_set(info, nbd);
1997 if (ret)
1998 goto out;
1999
2000 if (info->attrs[NBD_ATTR_TIMEOUT])
2001 nbd_set_cmd_timeout(nbd,
2002 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
2003 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
2004 config->dead_conn_timeout =
2005 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
2006 config->dead_conn_timeout *= HZ;
2007 }
2008 if (info->attrs[NBD_ATTR_SERVER_FLAGS])
2009 config->flags =
2010 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
2011 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2012 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2013 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
2014
2015
2016
2017
2018
2019
2020
2021
2022 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2023 &nbd->flags))
2024 put_dev = true;
2025 } else {
2026 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2027 &nbd->flags))
2028 refcount_inc(&nbd->refs);
2029 }
2030 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
2031 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2032 &config->runtime_flags);
2033 }
2034 }
2035
2036 if (info->attrs[NBD_ATTR_SOCKETS]) {
2037 struct nlattr *attr;
2038 int rem, fd;
2039
2040 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2041 rem) {
2042 struct nlattr *socks[NBD_SOCK_MAX+1];
2043
2044 if (nla_type(attr) != NBD_SOCK_ITEM) {
2045 pr_err("socks must be embedded in a SOCK_ITEM attr\n");
2046 ret = -EINVAL;
2047 goto out;
2048 }
2049 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2050 attr,
2051 nbd_sock_policy,
2052 info->extack);
2053 if (ret != 0) {
2054 pr_err("error processing sock list\n");
2055 ret = -EINVAL;
2056 goto out;
2057 }
2058 if (!socks[NBD_SOCK_FD])
2059 continue;
2060 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2061 ret = nbd_add_socket(nbd, fd, true);
2062 if (ret)
2063 goto out;
2064 }
2065 }
2066 ret = nbd_start_device(nbd);
2067 if (ret)
2068 goto out;
2069 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
2070 nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2071 GFP_KERNEL);
2072 if (!nbd->backend) {
2073 ret = -ENOMEM;
2074 goto out;
2075 }
2076 }
2077 ret = device_create_file(disk_to_dev(nbd->disk), &backend_attr);
2078 if (ret) {
2079 dev_err(disk_to_dev(nbd->disk),
2080 "device_create_file failed for backend!\n");
2081 goto out;
2082 }
2083 set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
2084 out:
2085 mutex_unlock(&nbd->config_lock);
2086 if (!ret) {
2087 set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
2088 refcount_inc(&nbd->config_refs);
2089 nbd_connect_reply(info, nbd->index);
2090 }
2091 nbd_config_put(nbd);
2092 if (put_dev)
2093 nbd_put(nbd);
2094 return ret;
2095 }
2096
2097 static void nbd_disconnect_and_put(struct nbd_device *nbd)
2098 {
2099 mutex_lock(&nbd->config_lock);
2100 nbd_disconnect(nbd);
2101 sock_shutdown(nbd);
2102 wake_up(&nbd->config->conn_wait);
2103
2104
2105
2106
2107 flush_workqueue(nbd->recv_workq);
2108 nbd_clear_que(nbd);
2109 nbd->task_setup = NULL;
2110 mutex_unlock(&nbd->config_lock);
2111
2112 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
2113 &nbd->config->runtime_flags))
2114 nbd_config_put(nbd);
2115 }
2116
2117 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
2118 {
2119 struct nbd_device *nbd;
2120 int index;
2121
2122 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2123 return -EPERM;
2124
2125 if (!info->attrs[NBD_ATTR_INDEX]) {
2126 pr_err("must specify an index to disconnect\n");
2127 return -EINVAL;
2128 }
2129 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2130 mutex_lock(&nbd_index_mutex);
2131 nbd = idr_find(&nbd_index_idr, index);
2132 if (!nbd) {
2133 mutex_unlock(&nbd_index_mutex);
2134 pr_err("couldn't find device at index %d\n", index);
2135 return -EINVAL;
2136 }
2137 if (!refcount_inc_not_zero(&nbd->refs)) {
2138 mutex_unlock(&nbd_index_mutex);
2139 pr_err("device at index %d is going down\n", index);
2140 return -EINVAL;
2141 }
2142 mutex_unlock(&nbd_index_mutex);
2143 if (!refcount_inc_not_zero(&nbd->config_refs))
2144 goto put_nbd;
2145 nbd_disconnect_and_put(nbd);
2146 nbd_config_put(nbd);
2147 put_nbd:
2148 nbd_put(nbd);
2149 return 0;
2150 }
2151
2152 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
2153 {
2154 struct nbd_device *nbd = NULL;
2155 struct nbd_config *config;
2156 int index;
2157 int ret = 0;
2158 bool put_dev = false;
2159
2160 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2161 return -EPERM;
2162
2163 if (!info->attrs[NBD_ATTR_INDEX]) {
2164 pr_err("must specify a device to reconfigure\n");
2165 return -EINVAL;
2166 }
2167 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2168 mutex_lock(&nbd_index_mutex);
2169 nbd = idr_find(&nbd_index_idr, index);
2170 if (!nbd) {
2171 mutex_unlock(&nbd_index_mutex);
2172 pr_err("couldn't find a device at index %d\n", index);
2173 return -EINVAL;
2174 }
2175 if (nbd->backend) {
2176 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
2177 if (nla_strcmp(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2178 nbd->backend)) {
2179 mutex_unlock(&nbd_index_mutex);
2180 dev_err(nbd_to_dev(nbd),
2181 "backend image doesn't match with %s\n",
2182 nbd->backend);
2183 return -EINVAL;
2184 }
2185 } else {
2186 mutex_unlock(&nbd_index_mutex);
2187 dev_err(nbd_to_dev(nbd), "must specify backend\n");
2188 return -EINVAL;
2189 }
2190 }
2191 if (!refcount_inc_not_zero(&nbd->refs)) {
2192 mutex_unlock(&nbd_index_mutex);
2193 pr_err("device at index %d is going down\n", index);
2194 return -EINVAL;
2195 }
2196 mutex_unlock(&nbd_index_mutex);
2197
2198 if (!refcount_inc_not_zero(&nbd->config_refs)) {
2199 dev_err(nbd_to_dev(nbd),
2200 "not configured, cannot reconfigure\n");
2201 nbd_put(nbd);
2202 return -EINVAL;
2203 }
2204
2205 mutex_lock(&nbd->config_lock);
2206 config = nbd->config;
2207 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
2208 !nbd->pid) {
2209 dev_err(nbd_to_dev(nbd),
2210 "not configured, cannot reconfigure\n");
2211 ret = -EINVAL;
2212 goto out;
2213 }
2214
2215 ret = nbd_genl_size_set(info, nbd);
2216 if (ret)
2217 goto out;
2218
2219 if (info->attrs[NBD_ATTR_TIMEOUT])
2220 nbd_set_cmd_timeout(nbd,
2221 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
2222 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
2223 config->dead_conn_timeout =
2224 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
2225 config->dead_conn_timeout *= HZ;
2226 }
2227 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2228 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2229 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
2230 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2231 &nbd->flags))
2232 put_dev = true;
2233 } else {
2234 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2235 &nbd->flags))
2236 refcount_inc(&nbd->refs);
2237 }
2238
2239 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
2240 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2241 &config->runtime_flags);
2242 } else {
2243 clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
2244 &config->runtime_flags);
2245 }
2246 }
2247
2248 if (info->attrs[NBD_ATTR_SOCKETS]) {
2249 struct nlattr *attr;
2250 int rem, fd;
2251
2252 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2253 rem) {
2254 struct nlattr *socks[NBD_SOCK_MAX+1];
2255
2256 if (nla_type(attr) != NBD_SOCK_ITEM) {
2257 pr_err("socks must be embedded in a SOCK_ITEM attr\n");
2258 ret = -EINVAL;
2259 goto out;
2260 }
2261 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2262 attr,
2263 nbd_sock_policy,
2264 info->extack);
2265 if (ret != 0) {
2266 pr_err("error processing sock list\n");
2267 ret = -EINVAL;
2268 goto out;
2269 }
2270 if (!socks[NBD_SOCK_FD])
2271 continue;
2272 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2273 ret = nbd_reconnect_socket(nbd, fd);
2274 if (ret) {
2275 if (ret == -ENOSPC)
2276 ret = 0;
2277 goto out;
2278 }
2279 dev_info(nbd_to_dev(nbd), "reconnected socket\n");
2280 }
2281 }
2282 out:
2283 mutex_unlock(&nbd->config_lock);
2284 nbd_config_put(nbd);
2285 nbd_put(nbd);
2286 if (put_dev)
2287 nbd_put(nbd);
2288 return ret;
2289 }
2290
2291 static const struct genl_small_ops nbd_connect_genl_ops[] = {
2292 {
2293 .cmd = NBD_CMD_CONNECT,
2294 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2295 .doit = nbd_genl_connect,
2296 },
2297 {
2298 .cmd = NBD_CMD_DISCONNECT,
2299 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2300 .doit = nbd_genl_disconnect,
2301 },
2302 {
2303 .cmd = NBD_CMD_RECONFIGURE,
2304 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2305 .doit = nbd_genl_reconfigure,
2306 },
2307 {
2308 .cmd = NBD_CMD_STATUS,
2309 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2310 .doit = nbd_genl_status,
2311 },
2312 };
2313
2314 static const struct genl_multicast_group nbd_mcast_grps[] = {
2315 { .name = NBD_GENL_MCAST_GROUP_NAME, },
2316 };
2317
2318 static struct genl_family nbd_genl_family __ro_after_init = {
2319 .hdrsize = 0,
2320 .name = NBD_GENL_FAMILY_NAME,
2321 .version = NBD_GENL_VERSION,
2322 .module = THIS_MODULE,
2323 .small_ops = nbd_connect_genl_ops,
2324 .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
2325 .maxattr = NBD_ATTR_MAX,
2326 .policy = nbd_attr_policy,
2327 .mcgrps = nbd_mcast_grps,
2328 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
2329 };
2330
2331 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
2332 {
2333 struct nlattr *dev_opt;
2334 u8 connected = 0;
2335 int ret;
2336
2337
2338
2339
2340
2341
2342
2343
2344 if (refcount_read(&nbd->config_refs))
2345 connected = 1;
2346 dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
2347 if (!dev_opt)
2348 return -EMSGSIZE;
2349 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
2350 if (ret)
2351 return -EMSGSIZE;
2352 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
2353 connected);
2354 if (ret)
2355 return -EMSGSIZE;
2356 nla_nest_end(reply, dev_opt);
2357 return 0;
2358 }
2359
2360 static int status_cb(int id, void *ptr, void *data)
2361 {
2362 struct nbd_device *nbd = ptr;
2363 return populate_nbd_status(nbd, (struct sk_buff *)data);
2364 }
2365
2366 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
2367 {
2368 struct nlattr *dev_list;
2369 struct sk_buff *reply;
2370 void *reply_head;
2371 size_t msg_size;
2372 int index = -1;
2373 int ret = -ENOMEM;
2374
2375 if (info->attrs[NBD_ATTR_INDEX])
2376 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2377
2378 mutex_lock(&nbd_index_mutex);
2379
2380 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
2381 nla_attr_size(sizeof(u8)));
2382 msg_size *= (index == -1) ? nbd_total_devices : 1;
2383
2384 reply = genlmsg_new(msg_size, GFP_KERNEL);
2385 if (!reply)
2386 goto out;
2387 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
2388 NBD_CMD_STATUS);
2389 if (!reply_head) {
2390 nlmsg_free(reply);
2391 goto out;
2392 }
2393
2394 dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
2395 if (index == -1) {
2396 ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
2397 if (ret) {
2398 nlmsg_free(reply);
2399 goto out;
2400 }
2401 } else {
2402 struct nbd_device *nbd;
2403 nbd = idr_find(&nbd_index_idr, index);
2404 if (nbd) {
2405 ret = populate_nbd_status(nbd, reply);
2406 if (ret) {
2407 nlmsg_free(reply);
2408 goto out;
2409 }
2410 }
2411 }
2412 nla_nest_end(reply, dev_list);
2413 genlmsg_end(reply, reply_head);
2414 ret = genlmsg_reply(reply, info);
2415 out:
2416 mutex_unlock(&nbd_index_mutex);
2417 return ret;
2418 }
2419
2420 static void nbd_connect_reply(struct genl_info *info, int index)
2421 {
2422 struct sk_buff *skb;
2423 void *msg_head;
2424 int ret;
2425
2426 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2427 if (!skb)
2428 return;
2429 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
2430 NBD_CMD_CONNECT);
2431 if (!msg_head) {
2432 nlmsg_free(skb);
2433 return;
2434 }
2435 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2436 if (ret) {
2437 nlmsg_free(skb);
2438 return;
2439 }
2440 genlmsg_end(skb, msg_head);
2441 genlmsg_reply(skb, info);
2442 }
2443
2444 static void nbd_mcast_index(int index)
2445 {
2446 struct sk_buff *skb;
2447 void *msg_head;
2448 int ret;
2449
2450 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2451 if (!skb)
2452 return;
2453 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
2454 NBD_CMD_LINK_DEAD);
2455 if (!msg_head) {
2456 nlmsg_free(skb);
2457 return;
2458 }
2459 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2460 if (ret) {
2461 nlmsg_free(skb);
2462 return;
2463 }
2464 genlmsg_end(skb, msg_head);
2465 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
2466 }
2467
2468 static void nbd_dead_link_work(struct work_struct *work)
2469 {
2470 struct link_dead_args *args = container_of(work, struct link_dead_args,
2471 work);
2472 nbd_mcast_index(args->index);
2473 kfree(args);
2474 }
2475
2476 static int __init nbd_init(void)
2477 {
2478 int i;
2479
2480 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
2481
2482 if (max_part < 0) {
2483 pr_err("max_part must be >= 0\n");
2484 return -EINVAL;
2485 }
2486
2487 part_shift = 0;
2488 if (max_part > 0) {
2489 part_shift = fls(max_part);
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499 max_part = (1UL << part_shift) - 1;
2500 }
2501
2502 if ((1UL << part_shift) > DISK_MAX_PARTS)
2503 return -EINVAL;
2504
2505 if (nbds_max > 1UL << (MINORBITS - part_shift))
2506 return -EINVAL;
2507
2508 if (register_blkdev(NBD_MAJOR, "nbd"))
2509 return -EIO;
2510
2511 nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
2512 if (!nbd_del_wq) {
2513 unregister_blkdev(NBD_MAJOR, "nbd");
2514 return -ENOMEM;
2515 }
2516
2517 if (genl_register_family(&nbd_genl_family)) {
2518 destroy_workqueue(nbd_del_wq);
2519 unregister_blkdev(NBD_MAJOR, "nbd");
2520 return -EINVAL;
2521 }
2522 nbd_dbg_init();
2523
2524 for (i = 0; i < nbds_max; i++)
2525 nbd_dev_add(i, 1);
2526 return 0;
2527 }
2528
2529 static int nbd_exit_cb(int id, void *ptr, void *data)
2530 {
2531 struct list_head *list = (struct list_head *)data;
2532 struct nbd_device *nbd = ptr;
2533
2534
2535 if (refcount_read(&nbd->refs))
2536 list_add_tail(&nbd->list, list);
2537
2538 return 0;
2539 }
2540
2541 static void __exit nbd_cleanup(void)
2542 {
2543 struct nbd_device *nbd;
2544 LIST_HEAD(del_list);
2545
2546
2547
2548
2549
2550 genl_unregister_family(&nbd_genl_family);
2551
2552 nbd_dbg_close();
2553
2554 mutex_lock(&nbd_index_mutex);
2555 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
2556 mutex_unlock(&nbd_index_mutex);
2557
2558 while (!list_empty(&del_list)) {
2559 nbd = list_first_entry(&del_list, struct nbd_device, list);
2560 list_del_init(&nbd->list);
2561 if (refcount_read(&nbd->config_refs))
2562 pr_err("possibly leaking nbd_config (ref %d)\n",
2563 refcount_read(&nbd->config_refs));
2564 if (refcount_read(&nbd->refs) != 1)
2565 pr_err("possibly leaking a device\n");
2566 nbd_put(nbd);
2567 }
2568
2569
2570 destroy_workqueue(nbd_del_wq);
2571
2572 idr_destroy(&nbd_index_idr);
2573 unregister_blkdev(NBD_MAJOR, "nbd");
2574 }
2575
2576 module_init(nbd_init);
2577 module_exit(nbd_cleanup);
2578
2579 MODULE_DESCRIPTION("Network Block Device");
2580 MODULE_LICENSE("GPL");
2581
2582 module_param(nbds_max, int, 0444);
2583 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
2584 module_param(max_part, int, 0444);
2585 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");