0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/module.h>
0013 #include <linux/moduleparam.h>
0014 #include <linux/sched.h>
0015 #include <linux/fs.h>
0016 #include <linux/pagemap.h>
0017 #include <linux/file.h>
0018 #include <linux/stat.h>
0019 #include <linux/errno.h>
0020 #include <linux/major.h>
0021 #include <linux/wait.h>
0022 #include <linux/blkdev.h>
0023 #include <linux/init.h>
0024 #include <linux/swap.h>
0025 #include <linux/slab.h>
0026 #include <linux/compat.h>
0027 #include <linux/mutex.h>
0028 #include <linux/writeback.h>
0029 #include <linux/completion.h>
0030 #include <linux/highmem.h>
0031 #include <linux/sysfs.h>
0032 #include <linux/miscdevice.h>
0033 #include <linux/falloc.h>
0034 #include <linux/uio.h>
0035 #include <linux/ioprio.h>
0036 #include <linux/sched/mm.h>
0037 #include <linux/uaccess.h>
0038 #include <linux/cdev.h>
0039 #include <linux/io_uring.h>
0040 #include <linux/blk-mq.h>
0041 #include <linux/delay.h>
0042 #include <linux/mm.h>
0043 #include <asm/page.h>
0044 #include <linux/task_work.h>
0045 #include <uapi/linux/ublk_cmd.h>
0046
0047 #define UBLK_MINORS (1U << MINORBITS)
0048
0049
0050 #define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
0051 | UBLK_F_URING_CMD_COMP_IN_TASK \
0052 | UBLK_F_NEED_GET_DATA)
0053
0054
0055 #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
0056
0057 struct ublk_rq_data {
0058 struct callback_head work;
0059 };
0060
0061 struct ublk_uring_cmd_pdu {
0062 struct request *req;
0063 };
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074 #define UBLK_IO_FLAG_ACTIVE 0x01
0075
0076
0077
0078
0079
0080
0081
0082
0083 #define UBLK_IO_FLAG_OWNED_BY_SRV 0x02
0084
0085
0086
0087
0088
0089
0090
0091
0092 #define UBLK_IO_FLAG_ABORTED 0x04
0093
0094
0095
0096
0097
0098
0099
0100
0101 #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
0102
0103 struct ublk_io {
0104
0105 __u64 addr;
0106 unsigned int flags;
0107 int res;
0108
0109 struct io_uring_cmd *cmd;
0110 };
0111
0112 struct ublk_queue {
0113 int q_id;
0114 int q_depth;
0115
0116 unsigned long flags;
0117 struct task_struct *ubq_daemon;
0118 char *io_cmd_buf;
0119
0120 unsigned long io_addr;
0121 unsigned int max_io_sz;
0122 bool abort_work_pending;
0123 unsigned short nr_io_ready;
0124 struct ublk_device *dev;
0125 struct ublk_io ios[0];
0126 };
0127
0128 #define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
0129
0130 struct ublk_device {
0131 struct gendisk *ub_disk;
0132
0133 char *__queues;
0134
0135 unsigned short queue_size;
0136 struct ublksrv_ctrl_dev_info dev_info;
0137
0138 struct blk_mq_tag_set tag_set;
0139
0140 struct cdev cdev;
0141 struct device cdev_dev;
0142
0143 #define UB_STATE_OPEN 0
0144 #define UB_STATE_USED 1
0145 unsigned long state;
0146 int ub_number;
0147
0148 struct mutex mutex;
0149
0150 spinlock_t mm_lock;
0151 struct mm_struct *mm;
0152
0153 struct ublk_params params;
0154
0155 struct completion completion;
0156 unsigned int nr_queues_ready;
0157 atomic_t nr_aborted_queues;
0158
0159
0160
0161
0162
0163 struct delayed_work monitor_work;
0164 struct work_struct stop_work;
0165 };
0166
0167
0168 struct ublk_params_header {
0169 __u32 len;
0170 __u32 types;
0171 };
0172
0173 static dev_t ublk_chr_devt;
0174 static struct class *ublk_chr_class;
0175
0176 static DEFINE_IDR(ublk_index_idr);
0177 static DEFINE_SPINLOCK(ublk_idr_lock);
0178 static wait_queue_head_t ublk_idr_wq;
0179
0180 static DEFINE_MUTEX(ublk_ctl_mutex);
0181
0182 static struct miscdevice ublk_misc;
0183
0184 static void ublk_dev_param_basic_apply(struct ublk_device *ub)
0185 {
0186 struct request_queue *q = ub->ub_disk->queue;
0187 const struct ublk_param_basic *p = &ub->params.basic;
0188
0189 blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
0190 blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
0191 blk_queue_io_min(q, 1 << p->io_min_shift);
0192 blk_queue_io_opt(q, 1 << p->io_opt_shift);
0193
0194 blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
0195 p->attrs & UBLK_ATTR_FUA);
0196 if (p->attrs & UBLK_ATTR_ROTATIONAL)
0197 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
0198 else
0199 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
0200
0201 blk_queue_max_hw_sectors(q, p->max_sectors);
0202 blk_queue_chunk_sectors(q, p->chunk_sectors);
0203 blk_queue_virt_boundary(q, p->virt_boundary_mask);
0204
0205 if (p->attrs & UBLK_ATTR_READ_ONLY)
0206 set_disk_ro(ub->ub_disk, true);
0207
0208 set_capacity(ub->ub_disk, p->dev_sectors);
0209 }
0210
0211 static void ublk_dev_param_discard_apply(struct ublk_device *ub)
0212 {
0213 struct request_queue *q = ub->ub_disk->queue;
0214 const struct ublk_param_discard *p = &ub->params.discard;
0215
0216 q->limits.discard_alignment = p->discard_alignment;
0217 q->limits.discard_granularity = p->discard_granularity;
0218 blk_queue_max_discard_sectors(q, p->max_discard_sectors);
0219 blk_queue_max_write_zeroes_sectors(q,
0220 p->max_write_zeroes_sectors);
0221 blk_queue_max_discard_segments(q, p->max_discard_segments);
0222 }
0223
0224 static int ublk_validate_params(const struct ublk_device *ub)
0225 {
0226
0227 if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
0228 const struct ublk_param_basic *p = &ub->params.basic;
0229
0230 if (p->logical_bs_shift > PAGE_SHIFT)
0231 return -EINVAL;
0232
0233 if (p->logical_bs_shift > p->physical_bs_shift)
0234 return -EINVAL;
0235
0236 if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
0237 return -EINVAL;
0238 } else
0239 return -EINVAL;
0240
0241 if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
0242 const struct ublk_param_discard *p = &ub->params.discard;
0243
0244
0245 if (p->max_discard_sectors && p->max_discard_segments != 1)
0246 return -EINVAL;
0247
0248 if (!p->discard_granularity)
0249 return -EINVAL;
0250 }
0251
0252 return 0;
0253 }
0254
0255 static int ublk_apply_params(struct ublk_device *ub)
0256 {
0257 if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
0258 return -EINVAL;
0259
0260 ublk_dev_param_basic_apply(ub);
0261
0262 if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
0263 ublk_dev_param_discard_apply(ub);
0264
0265 return 0;
0266 }
0267
0268 static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
0269 {
0270 if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
0271 !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
0272 return true;
0273 return false;
0274 }
0275
0276 static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
0277 {
0278 if (ubq->flags & UBLK_F_NEED_GET_DATA)
0279 return true;
0280 return false;
0281 }
0282
0283 static struct ublk_device *ublk_get_device(struct ublk_device *ub)
0284 {
0285 if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
0286 return ub;
0287 return NULL;
0288 }
0289
0290 static void ublk_put_device(struct ublk_device *ub)
0291 {
0292 put_device(&ub->cdev_dev);
0293 }
0294
0295 static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
0296 int qid)
0297 {
0298 return (struct ublk_queue *)&(dev->__queues[qid * dev->queue_size]);
0299 }
0300
0301 static inline bool ublk_rq_has_data(const struct request *rq)
0302 {
0303 return rq->bio && bio_has_data(rq->bio);
0304 }
0305
0306 static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
0307 int tag)
0308 {
0309 return (struct ublksrv_io_desc *)
0310 &(ubq->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
0311 }
0312
0313 static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
0314 {
0315 return ublk_get_queue(ub, q_id)->io_cmd_buf;
0316 }
0317
0318 static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
0319 {
0320 struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
0321
0322 return round_up(ubq->q_depth * sizeof(struct ublksrv_io_desc),
0323 PAGE_SIZE);
0324 }
0325
0326 static void ublk_free_disk(struct gendisk *disk)
0327 {
0328 struct ublk_device *ub = disk->private_data;
0329
0330 clear_bit(UB_STATE_USED, &ub->state);
0331 put_device(&ub->cdev_dev);
0332 }
0333
0334 static const struct block_device_operations ub_fops = {
0335 .owner = THIS_MODULE,
0336 .free_disk = ublk_free_disk,
0337 };
0338
0339 #define UBLK_MAX_PIN_PAGES 32
0340
0341 struct ublk_map_data {
0342 const struct ublk_queue *ubq;
0343 const struct request *rq;
0344 const struct ublk_io *io;
0345 unsigned max_bytes;
0346 };
0347
0348 struct ublk_io_iter {
0349 struct page *pages[UBLK_MAX_PIN_PAGES];
0350 unsigned pg_off;
0351 int nr_pages;
0352 struct bio *bio;
0353 struct bvec_iter iter;
0354 };
0355
0356 static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
0357 unsigned max_bytes, bool to_vm)
0358 {
0359 const unsigned total = min_t(unsigned, max_bytes,
0360 PAGE_SIZE - data->pg_off +
0361 ((data->nr_pages - 1) << PAGE_SHIFT));
0362 unsigned done = 0;
0363 unsigned pg_idx = 0;
0364
0365 while (done < total) {
0366 struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
0367 const unsigned int bytes = min3(bv.bv_len, total - done,
0368 (unsigned)(PAGE_SIZE - data->pg_off));
0369 void *bv_buf = bvec_kmap_local(&bv);
0370 void *pg_buf = kmap_local_page(data->pages[pg_idx]);
0371
0372 if (to_vm)
0373 memcpy(pg_buf + data->pg_off, bv_buf, bytes);
0374 else
0375 memcpy(bv_buf, pg_buf + data->pg_off, bytes);
0376
0377 kunmap_local(pg_buf);
0378 kunmap_local(bv_buf);
0379
0380
0381 data->pg_off += bytes;
0382 if (data->pg_off == PAGE_SIZE) {
0383 pg_idx += 1;
0384 data->pg_off = 0;
0385 }
0386
0387 done += bytes;
0388
0389
0390 bio_advance_iter_single(data->bio, &data->iter, bytes);
0391 if (!data->iter.bi_size) {
0392 data->bio = data->bio->bi_next;
0393 if (data->bio == NULL)
0394 break;
0395 data->iter = data->bio->bi_iter;
0396 }
0397 }
0398
0399 return done;
0400 }
0401
0402 static inline int ublk_copy_user_pages(struct ublk_map_data *data,
0403 bool to_vm)
0404 {
0405 const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
0406 const unsigned long start_vm = data->io->addr;
0407 unsigned int done = 0;
0408 struct ublk_io_iter iter = {
0409 .pg_off = start_vm & (PAGE_SIZE - 1),
0410 .bio = data->rq->bio,
0411 .iter = data->rq->bio->bi_iter,
0412 };
0413 const unsigned int nr_pages = round_up(data->max_bytes +
0414 (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
0415
0416 while (done < nr_pages) {
0417 const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES,
0418 nr_pages - done);
0419 unsigned i, len;
0420
0421 iter.nr_pages = get_user_pages_fast(start_vm +
0422 (done << PAGE_SHIFT), to_pin, gup_flags,
0423 iter.pages);
0424 if (iter.nr_pages <= 0)
0425 return done == 0 ? iter.nr_pages : done;
0426 len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm);
0427 for (i = 0; i < iter.nr_pages; i++) {
0428 if (to_vm)
0429 set_page_dirty(iter.pages[i]);
0430 put_page(iter.pages[i]);
0431 }
0432 data->max_bytes -= len;
0433 done += iter.nr_pages;
0434 }
0435
0436 return done;
0437 }
0438
0439 static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
0440 struct ublk_io *io)
0441 {
0442 const unsigned int rq_bytes = blk_rq_bytes(req);
0443
0444
0445
0446
0447
0448 if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH)
0449 return rq_bytes;
0450
0451 if (ublk_rq_has_data(req)) {
0452 struct ublk_map_data data = {
0453 .ubq = ubq,
0454 .rq = req,
0455 .io = io,
0456 .max_bytes = rq_bytes,
0457 };
0458
0459 ublk_copy_user_pages(&data, true);
0460
0461 return rq_bytes - data.max_bytes;
0462 }
0463 return rq_bytes;
0464 }
0465
0466 static int ublk_unmap_io(const struct ublk_queue *ubq,
0467 const struct request *req,
0468 struct ublk_io *io)
0469 {
0470 const unsigned int rq_bytes = blk_rq_bytes(req);
0471
0472 if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
0473 struct ublk_map_data data = {
0474 .ubq = ubq,
0475 .rq = req,
0476 .io = io,
0477 .max_bytes = io->res,
0478 };
0479
0480 WARN_ON_ONCE(io->res > rq_bytes);
0481
0482 ublk_copy_user_pages(&data, false);
0483
0484 return io->res - data.max_bytes;
0485 }
0486 return rq_bytes;
0487 }
0488
0489 static inline unsigned int ublk_req_build_flags(struct request *req)
0490 {
0491 unsigned flags = 0;
0492
0493 if (req->cmd_flags & REQ_FAILFAST_DEV)
0494 flags |= UBLK_IO_F_FAILFAST_DEV;
0495
0496 if (req->cmd_flags & REQ_FAILFAST_TRANSPORT)
0497 flags |= UBLK_IO_F_FAILFAST_TRANSPORT;
0498
0499 if (req->cmd_flags & REQ_FAILFAST_DRIVER)
0500 flags |= UBLK_IO_F_FAILFAST_DRIVER;
0501
0502 if (req->cmd_flags & REQ_META)
0503 flags |= UBLK_IO_F_META;
0504
0505 if (req->cmd_flags & REQ_FUA)
0506 flags |= UBLK_IO_F_FUA;
0507
0508 if (req->cmd_flags & REQ_NOUNMAP)
0509 flags |= UBLK_IO_F_NOUNMAP;
0510
0511 if (req->cmd_flags & REQ_SWAP)
0512 flags |= UBLK_IO_F_SWAP;
0513
0514 return flags;
0515 }
0516
0517 static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
0518 {
0519 struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
0520 struct ublk_io *io = &ubq->ios[req->tag];
0521 u32 ublk_op;
0522
0523 switch (req_op(req)) {
0524 case REQ_OP_READ:
0525 ublk_op = UBLK_IO_OP_READ;
0526 break;
0527 case REQ_OP_WRITE:
0528 ublk_op = UBLK_IO_OP_WRITE;
0529 break;
0530 case REQ_OP_FLUSH:
0531 ublk_op = UBLK_IO_OP_FLUSH;
0532 break;
0533 case REQ_OP_DISCARD:
0534 ublk_op = UBLK_IO_OP_DISCARD;
0535 break;
0536 case REQ_OP_WRITE_ZEROES:
0537 ublk_op = UBLK_IO_OP_WRITE_ZEROES;
0538 break;
0539 default:
0540 return BLK_STS_IOERR;
0541 }
0542
0543
0544 iod->op_flags = ublk_op | ublk_req_build_flags(req);
0545 iod->nr_sectors = blk_rq_sectors(req);
0546 iod->start_sector = blk_rq_pos(req);
0547 iod->addr = io->addr;
0548
0549 return BLK_STS_OK;
0550 }
0551
0552 static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
0553 struct io_uring_cmd *ioucmd)
0554 {
0555 return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu;
0556 }
0557
0558 static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
0559 {
0560 return ubq->ubq_daemon->flags & PF_EXITING;
0561 }
0562
0563
0564 static void ublk_complete_rq(struct request *req)
0565 {
0566 struct ublk_queue *ubq = req->mq_hctx->driver_data;
0567 struct ublk_io *io = &ubq->ios[req->tag];
0568 unsigned int unmapped_bytes;
0569
0570
0571 if (!io->res && req_op(req) == REQ_OP_READ)
0572 io->res = -EIO;
0573
0574 if (io->res < 0) {
0575 blk_mq_end_request(req, errno_to_blk_status(io->res));
0576 return;
0577 }
0578
0579
0580
0581
0582
0583
0584
0585 if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
0586 blk_mq_end_request(req, BLK_STS_OK);
0587 return;
0588 }
0589
0590
0591 unmapped_bytes = ublk_unmap_io(ubq, req, io);
0592
0593
0594
0595
0596
0597
0598 if (unlikely(unmapped_bytes < io->res))
0599 io->res = unmapped_bytes;
0600
0601 if (blk_update_request(req, BLK_STS_OK, io->res))
0602 blk_mq_requeue_request(req, true);
0603 else
0604 __blk_mq_end_request(req, BLK_STS_OK);
0605 }
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615 static void __ublk_fail_req(struct ublk_io *io, struct request *req)
0616 {
0617 WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
0618
0619 if (!(io->flags & UBLK_IO_FLAG_ABORTED)) {
0620 io->flags |= UBLK_IO_FLAG_ABORTED;
0621 blk_mq_end_request(req, BLK_STS_IOERR);
0622 }
0623 }
0624
0625 static void ubq_complete_io_cmd(struct ublk_io *io, int res)
0626 {
0627
0628 io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
0629
0630
0631
0632
0633
0634 io->flags &= ~UBLK_IO_FLAG_ACTIVE;
0635
0636
0637 io_uring_cmd_done(io->cmd, res, 0);
0638 }
0639
0640 #define UBLK_REQUEUE_DELAY_MS 3
0641
0642 static inline void __ublk_rq_task_work(struct request *req)
0643 {
0644 struct ublk_queue *ubq = req->mq_hctx->driver_data;
0645 struct ublk_device *ub = ubq->dev;
0646 int tag = req->tag;
0647 struct ublk_io *io = &ubq->ios[tag];
0648 bool task_exiting = current != ubq->ubq_daemon || ubq_daemon_is_dying(ubq);
0649 unsigned int mapped_bytes;
0650
0651 pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n",
0652 __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
0653 ublk_get_iod(ubq, req->tag)->addr);
0654
0655 if (unlikely(task_exiting)) {
0656 blk_mq_end_request(req, BLK_STS_IOERR);
0657 mod_delayed_work(system_wq, &ub->monitor_work, 0);
0658 return;
0659 }
0660
0661 if (ublk_need_get_data(ubq) &&
0662 (req_op(req) == REQ_OP_WRITE ||
0663 req_op(req) == REQ_OP_FLUSH)) {
0664
0665
0666
0667
0668
0669 if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
0670 io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
0671 pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
0672 __func__, io->cmd->cmd_op, ubq->q_id,
0673 req->tag, io->flags);
0674 ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
0675 return;
0676 }
0677
0678
0679
0680
0681
0682 io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
0683
0684 ublk_get_iod(ubq, req->tag)->addr = io->addr;
0685 pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n",
0686 __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
0687 ublk_get_iod(ubq, req->tag)->addr);
0688 }
0689
0690 mapped_bytes = ublk_map_io(ubq, req, io);
0691
0692
0693 if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
0694
0695
0696
0697
0698
0699
0700
0701 if (unlikely(!mapped_bytes)) {
0702 blk_mq_requeue_request(req, false);
0703 blk_mq_delay_kick_requeue_list(req->q,
0704 UBLK_REQUEUE_DELAY_MS);
0705 return;
0706 }
0707
0708 ublk_get_iod(ubq, req->tag)->nr_sectors =
0709 mapped_bytes >> 9;
0710 }
0711
0712 ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
0713 }
0714
0715 static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
0716 {
0717 struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
0718
0719 __ublk_rq_task_work(pdu->req);
0720 }
0721
0722 static void ublk_rq_task_work_fn(struct callback_head *work)
0723 {
0724 struct ublk_rq_data *data = container_of(work,
0725 struct ublk_rq_data, work);
0726 struct request *req = blk_mq_rq_from_pdu(data);
0727
0728 __ublk_rq_task_work(req);
0729 }
0730
0731 static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
0732 const struct blk_mq_queue_data *bd)
0733 {
0734 struct ublk_queue *ubq = hctx->driver_data;
0735 struct request *rq = bd->rq;
0736 blk_status_t res;
0737
0738
0739 res = ublk_setup_iod(ubq, rq);
0740 if (unlikely(res != BLK_STS_OK))
0741 return BLK_STS_IOERR;
0742
0743 blk_mq_start_request(bd->rq);
0744
0745 if (unlikely(ubq_daemon_is_dying(ubq))) {
0746 fail:
0747 mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
0748 return BLK_STS_IOERR;
0749 }
0750
0751 if (ublk_can_use_task_work(ubq)) {
0752 struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
0753 enum task_work_notify_mode notify_mode = bd->last ?
0754 TWA_SIGNAL_NO_IPI : TWA_NONE;
0755
0756 if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
0757 goto fail;
0758 } else {
0759 struct ublk_io *io = &ubq->ios[rq->tag];
0760 struct io_uring_cmd *cmd = io->cmd;
0761 struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775 if ((io->flags & UBLK_IO_FLAG_ABORTED))
0776 goto fail;
0777
0778 pdu->req = rq;
0779 io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
0780 }
0781
0782 return BLK_STS_OK;
0783 }
0784
0785 static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx)
0786 {
0787 struct ublk_queue *ubq = hctx->driver_data;
0788
0789 if (ublk_can_use_task_work(ubq))
0790 __set_notify_signal(ubq->ubq_daemon);
0791 }
0792
0793 static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
0794 unsigned int hctx_idx)
0795 {
0796 struct ublk_device *ub = driver_data;
0797 struct ublk_queue *ubq = ublk_get_queue(ub, hctx->queue_num);
0798
0799 hctx->driver_data = ubq;
0800 return 0;
0801 }
0802
0803 static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
0804 unsigned int hctx_idx, unsigned int numa_node)
0805 {
0806 struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
0807
0808 init_task_work(&data->work, ublk_rq_task_work_fn);
0809 return 0;
0810 }
0811
0812 static const struct blk_mq_ops ublk_mq_ops = {
0813 .queue_rq = ublk_queue_rq,
0814 .commit_rqs = ublk_commit_rqs,
0815 .init_hctx = ublk_init_hctx,
0816 .init_request = ublk_init_rq,
0817 };
0818
0819 static int ublk_ch_open(struct inode *inode, struct file *filp)
0820 {
0821 struct ublk_device *ub = container_of(inode->i_cdev,
0822 struct ublk_device, cdev);
0823
0824 if (test_and_set_bit(UB_STATE_OPEN, &ub->state))
0825 return -EBUSY;
0826 filp->private_data = ub;
0827 return 0;
0828 }
0829
0830 static int ublk_ch_release(struct inode *inode, struct file *filp)
0831 {
0832 struct ublk_device *ub = filp->private_data;
0833
0834 clear_bit(UB_STATE_OPEN, &ub->state);
0835 return 0;
0836 }
0837
0838
0839 static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
0840 {
0841 struct ublk_device *ub = filp->private_data;
0842 size_t sz = vma->vm_end - vma->vm_start;
0843 unsigned max_sz = UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc);
0844 unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT;
0845 int q_id, ret = 0;
0846
0847 spin_lock(&ub->mm_lock);
0848 if (!ub->mm)
0849 ub->mm = current->mm;
0850 if (current->mm != ub->mm)
0851 ret = -EINVAL;
0852 spin_unlock(&ub->mm_lock);
0853
0854 if (ret)
0855 return ret;
0856
0857 if (vma->vm_flags & VM_WRITE)
0858 return -EPERM;
0859
0860 end = UBLKSRV_CMD_BUF_OFFSET + ub->dev_info.nr_hw_queues * max_sz;
0861 if (phys_off < UBLKSRV_CMD_BUF_OFFSET || phys_off >= end)
0862 return -EINVAL;
0863
0864 q_id = (phys_off - UBLKSRV_CMD_BUF_OFFSET) / max_sz;
0865 pr_devel("%s: qid %d, pid %d, addr %lx pg_off %lx sz %lu\n",
0866 __func__, q_id, current->pid, vma->vm_start,
0867 phys_off, (unsigned long)sz);
0868
0869 if (sz != ublk_queue_cmd_buf_size(ub, q_id))
0870 return -EINVAL;
0871
0872 pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT;
0873 return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
0874 }
0875
0876 static void ublk_commit_completion(struct ublk_device *ub,
0877 struct ublksrv_io_cmd *ub_cmd)
0878 {
0879 u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
0880 struct ublk_queue *ubq = ublk_get_queue(ub, qid);
0881 struct ublk_io *io = &ubq->ios[tag];
0882 struct request *req;
0883
0884
0885 io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
0886 io->res = ub_cmd->result;
0887
0888
0889 req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
0890
0891 if (req && likely(!blk_should_fake_timeout(req->q)))
0892 ublk_complete_rq(req);
0893 }
0894
0895
0896
0897
0898
0899
0900 static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
0901 {
0902 int i;
0903
0904 if (!ublk_get_device(ub))
0905 return;
0906
0907 for (i = 0; i < ubq->q_depth; i++) {
0908 struct ublk_io *io = &ubq->ios[i];
0909
0910 if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) {
0911 struct request *rq;
0912
0913
0914
0915
0916
0917 rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
0918 if (rq)
0919 __ublk_fail_req(io, rq);
0920 }
0921 }
0922 ublk_put_device(ub);
0923 }
0924
0925 static void ublk_daemon_monitor_work(struct work_struct *work)
0926 {
0927 struct ublk_device *ub =
0928 container_of(work, struct ublk_device, monitor_work.work);
0929 int i;
0930
0931 for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
0932 struct ublk_queue *ubq = ublk_get_queue(ub, i);
0933
0934 if (ubq_daemon_is_dying(ubq)) {
0935 schedule_work(&ub->stop_work);
0936
0937
0938 ublk_abort_queue(ub, ubq);
0939 }
0940 }
0941
0942
0943
0944
0945
0946
0947
0948 if (ub->dev_info.state != UBLK_S_DEV_DEAD)
0949 schedule_delayed_work(&ub->monitor_work,
0950 UBLK_DAEMON_MONITOR_PERIOD);
0951 }
0952
0953 static inline bool ublk_queue_ready(struct ublk_queue *ubq)
0954 {
0955 return ubq->nr_io_ready == ubq->q_depth;
0956 }
0957
0958 static void ublk_cancel_queue(struct ublk_queue *ubq)
0959 {
0960 int i;
0961
0962 if (!ublk_queue_ready(ubq))
0963 return;
0964
0965 for (i = 0; i < ubq->q_depth; i++) {
0966 struct ublk_io *io = &ubq->ios[i];
0967
0968 if (io->flags & UBLK_IO_FLAG_ACTIVE)
0969 io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
0970 }
0971
0972
0973 ubq->nr_io_ready = 0;
0974 }
0975
0976
0977 static void ublk_cancel_dev(struct ublk_device *ub)
0978 {
0979 int i;
0980
0981 for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
0982 ublk_cancel_queue(ublk_get_queue(ub, i));
0983 }
0984
0985 static void ublk_stop_dev(struct ublk_device *ub)
0986 {
0987 mutex_lock(&ub->mutex);
0988 if (ub->dev_info.state != UBLK_S_DEV_LIVE)
0989 goto unlock;
0990
0991 del_gendisk(ub->ub_disk);
0992 ub->dev_info.state = UBLK_S_DEV_DEAD;
0993 ub->dev_info.ublksrv_pid = -1;
0994 put_disk(ub->ub_disk);
0995 ub->ub_disk = NULL;
0996 unlock:
0997 ublk_cancel_dev(ub);
0998 mutex_unlock(&ub->mutex);
0999 cancel_delayed_work_sync(&ub->monitor_work);
1000 }
1001
1002
1003 static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
1004 {
1005 mutex_lock(&ub->mutex);
1006 ubq->nr_io_ready++;
1007 if (ublk_queue_ready(ubq)) {
1008 ubq->ubq_daemon = current;
1009 get_task_struct(ubq->ubq_daemon);
1010 ub->nr_queues_ready++;
1011 }
1012 if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
1013 complete_all(&ub->completion);
1014 mutex_unlock(&ub->mutex);
1015 }
1016
1017 static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
1018 int tag, struct io_uring_cmd *cmd)
1019 {
1020 struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
1021 struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
1022
1023 if (ublk_can_use_task_work(ubq)) {
1024 struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
1025
1026
1027 task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
1028 } else {
1029 struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
1030
1031 pdu->req = req;
1032 io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
1033 }
1034 }
1035
1036 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
1037 {
1038 struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
1039 struct ublk_device *ub = cmd->file->private_data;
1040 struct ublk_queue *ubq;
1041 struct ublk_io *io;
1042 u32 cmd_op = cmd->cmd_op;
1043 unsigned tag = ub_cmd->tag;
1044 int ret = -EINVAL;
1045
1046 pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
1047 __func__, cmd->cmd_op, ub_cmd->q_id, tag,
1048 ub_cmd->result);
1049
1050 if (!(issue_flags & IO_URING_F_SQE128))
1051 goto out;
1052
1053 if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues)
1054 goto out;
1055
1056 ubq = ublk_get_queue(ub, ub_cmd->q_id);
1057 if (!ubq || ub_cmd->q_id != ubq->q_id)
1058 goto out;
1059
1060 if (ubq->ubq_daemon && ubq->ubq_daemon != current)
1061 goto out;
1062
1063 if (tag >= ubq->q_depth)
1064 goto out;
1065
1066 io = &ubq->ios[tag];
1067
1068
1069 if (io->flags & UBLK_IO_FLAG_ACTIVE) {
1070 ret = -EBUSY;
1071 goto out;
1072 }
1073
1074
1075
1076
1077
1078 if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
1079 ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
1080 goto out;
1081
1082 switch (cmd_op) {
1083 case UBLK_IO_FETCH_REQ:
1084
1085 if (ublk_queue_ready(ubq)) {
1086 ret = -EBUSY;
1087 goto out;
1088 }
1089
1090
1091
1092
1093 if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
1094 goto out;
1095
1096 if (!ub_cmd->addr)
1097 goto out;
1098 io->cmd = cmd;
1099 io->flags |= UBLK_IO_FLAG_ACTIVE;
1100 io->addr = ub_cmd->addr;
1101
1102 ublk_mark_io_ready(ub, ubq);
1103 break;
1104 case UBLK_IO_COMMIT_AND_FETCH_REQ:
1105
1106 if (!ub_cmd->addr)
1107 goto out;
1108 if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
1109 goto out;
1110 io->addr = ub_cmd->addr;
1111 io->flags |= UBLK_IO_FLAG_ACTIVE;
1112 io->cmd = cmd;
1113 ublk_commit_completion(ub, ub_cmd);
1114 break;
1115 case UBLK_IO_NEED_GET_DATA:
1116 if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
1117 goto out;
1118 io->addr = ub_cmd->addr;
1119 io->cmd = cmd;
1120 io->flags |= UBLK_IO_FLAG_ACTIVE;
1121 ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
1122 break;
1123 default:
1124 goto out;
1125 }
1126 return -EIOCBQUEUED;
1127
1128 out:
1129 io_uring_cmd_done(cmd, ret, 0);
1130 pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
1131 __func__, cmd_op, tag, ret, io->flags);
1132 return -EIOCBQUEUED;
1133 }
1134
1135 static const struct file_operations ublk_ch_fops = {
1136 .owner = THIS_MODULE,
1137 .open = ublk_ch_open,
1138 .release = ublk_ch_release,
1139 .llseek = no_llseek,
1140 .uring_cmd = ublk_ch_uring_cmd,
1141 .mmap = ublk_ch_mmap,
1142 };
1143
1144 static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
1145 {
1146 int size = ublk_queue_cmd_buf_size(ub, q_id);
1147 struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
1148
1149 if (ubq->ubq_daemon)
1150 put_task_struct(ubq->ubq_daemon);
1151 if (ubq->io_cmd_buf)
1152 free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
1153 }
1154
1155 static int ublk_init_queue(struct ublk_device *ub, int q_id)
1156 {
1157 struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
1158 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
1159 void *ptr;
1160 int size;
1161
1162 ubq->flags = ub->dev_info.flags;
1163 ubq->q_id = q_id;
1164 ubq->q_depth = ub->dev_info.queue_depth;
1165 size = ublk_queue_cmd_buf_size(ub, q_id);
1166
1167 ptr = (void *) __get_free_pages(gfp_flags, get_order(size));
1168 if (!ptr)
1169 return -ENOMEM;
1170
1171 ubq->io_cmd_buf = ptr;
1172 ubq->dev = ub;
1173 return 0;
1174 }
1175
1176 static void ublk_deinit_queues(struct ublk_device *ub)
1177 {
1178 int nr_queues = ub->dev_info.nr_hw_queues;
1179 int i;
1180
1181 if (!ub->__queues)
1182 return;
1183
1184 for (i = 0; i < nr_queues; i++)
1185 ublk_deinit_queue(ub, i);
1186 kfree(ub->__queues);
1187 }
1188
1189 static int ublk_init_queues(struct ublk_device *ub)
1190 {
1191 int nr_queues = ub->dev_info.nr_hw_queues;
1192 int depth = ub->dev_info.queue_depth;
1193 int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
1194 int i, ret = -ENOMEM;
1195
1196 ub->queue_size = ubq_size;
1197 ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL);
1198 if (!ub->__queues)
1199 return ret;
1200
1201 for (i = 0; i < nr_queues; i++) {
1202 if (ublk_init_queue(ub, i))
1203 goto fail;
1204 }
1205
1206 init_completion(&ub->completion);
1207 return 0;
1208
1209 fail:
1210 ublk_deinit_queues(ub);
1211 return ret;
1212 }
1213
1214 static int ublk_alloc_dev_number(struct ublk_device *ub, int idx)
1215 {
1216 int i = idx;
1217 int err;
1218
1219 spin_lock(&ublk_idr_lock);
1220
1221 if (i >= 0) {
1222 err = idr_alloc(&ublk_index_idr, ub, i, i + 1, GFP_NOWAIT);
1223 if (err == -ENOSPC)
1224 err = -EEXIST;
1225 } else {
1226 err = idr_alloc(&ublk_index_idr, ub, 0, 0, GFP_NOWAIT);
1227 }
1228 spin_unlock(&ublk_idr_lock);
1229
1230 if (err >= 0)
1231 ub->ub_number = err;
1232
1233 return err;
1234 }
1235
1236 static void ublk_free_dev_number(struct ublk_device *ub)
1237 {
1238 spin_lock(&ublk_idr_lock);
1239 idr_remove(&ublk_index_idr, ub->ub_number);
1240 wake_up_all(&ublk_idr_wq);
1241 spin_unlock(&ublk_idr_lock);
1242 }
1243
1244 static void ublk_cdev_rel(struct device *dev)
1245 {
1246 struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev);
1247
1248 blk_mq_free_tag_set(&ub->tag_set);
1249 ublk_deinit_queues(ub);
1250 ublk_free_dev_number(ub);
1251 mutex_destroy(&ub->mutex);
1252 kfree(ub);
1253 }
1254
1255 static int ublk_add_chdev(struct ublk_device *ub)
1256 {
1257 struct device *dev = &ub->cdev_dev;
1258 int minor = ub->ub_number;
1259 int ret;
1260
1261 dev->parent = ublk_misc.this_device;
1262 dev->devt = MKDEV(MAJOR(ublk_chr_devt), minor);
1263 dev->class = ublk_chr_class;
1264 dev->release = ublk_cdev_rel;
1265 device_initialize(dev);
1266
1267 ret = dev_set_name(dev, "ublkc%d", minor);
1268 if (ret)
1269 goto fail;
1270
1271 cdev_init(&ub->cdev, &ublk_ch_fops);
1272 ret = cdev_device_add(&ub->cdev, dev);
1273 if (ret)
1274 goto fail;
1275 return 0;
1276 fail:
1277 put_device(dev);
1278 return ret;
1279 }
1280
1281 static void ublk_stop_work_fn(struct work_struct *work)
1282 {
1283 struct ublk_device *ub =
1284 container_of(work, struct ublk_device, stop_work);
1285
1286 ublk_stop_dev(ub);
1287 }
1288
1289
1290 static void ublk_align_max_io_size(struct ublk_device *ub)
1291 {
1292 unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
1293
1294 ub->dev_info.max_io_buf_bytes =
1295 round_down(max_io_bytes, PAGE_SIZE);
1296 }
1297
1298 static int ublk_add_tag_set(struct ublk_device *ub)
1299 {
1300 ub->tag_set.ops = &ublk_mq_ops;
1301 ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues;
1302 ub->tag_set.queue_depth = ub->dev_info.queue_depth;
1303 ub->tag_set.numa_node = NUMA_NO_NODE;
1304 ub->tag_set.cmd_size = sizeof(struct ublk_rq_data);
1305 ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
1306 ub->tag_set.driver_data = ub;
1307 return blk_mq_alloc_tag_set(&ub->tag_set);
1308 }
1309
1310 static void ublk_remove(struct ublk_device *ub)
1311 {
1312 ublk_stop_dev(ub);
1313 cancel_work_sync(&ub->stop_work);
1314 cdev_device_del(&ub->cdev, &ub->cdev_dev);
1315 put_device(&ub->cdev_dev);
1316 }
1317
1318 static struct ublk_device *ublk_get_device_from_id(int idx)
1319 {
1320 struct ublk_device *ub = NULL;
1321
1322 if (idx < 0)
1323 return NULL;
1324
1325 spin_lock(&ublk_idr_lock);
1326 ub = idr_find(&ublk_index_idr, idx);
1327 if (ub)
1328 ub = ublk_get_device(ub);
1329 spin_unlock(&ublk_idr_lock);
1330
1331 return ub;
1332 }
1333
1334 static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
1335 {
1336 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1337 int ublksrv_pid = (int)header->data[0];
1338 struct ublk_device *ub;
1339 struct gendisk *disk;
1340 int ret = -EINVAL;
1341
1342 if (ublksrv_pid <= 0)
1343 return -EINVAL;
1344
1345 ub = ublk_get_device_from_id(header->dev_id);
1346 if (!ub)
1347 return -EINVAL;
1348
1349 wait_for_completion_interruptible(&ub->completion);
1350
1351 schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
1352
1353 mutex_lock(&ub->mutex);
1354 if (ub->dev_info.state == UBLK_S_DEV_LIVE ||
1355 test_bit(UB_STATE_USED, &ub->state)) {
1356 ret = -EEXIST;
1357 goto out_unlock;
1358 }
1359
1360 disk = blk_mq_alloc_disk(&ub->tag_set, ub);
1361 if (IS_ERR(disk)) {
1362 ret = PTR_ERR(disk);
1363 goto out_unlock;
1364 }
1365 sprintf(disk->disk_name, "ublkb%d", ub->ub_number);
1366 disk->fops = &ub_fops;
1367 disk->private_data = ub;
1368
1369 ub->dev_info.ublksrv_pid = ublksrv_pid;
1370 ub->ub_disk = disk;
1371
1372 ret = ublk_apply_params(ub);
1373 if (ret)
1374 goto out_put_disk;
1375
1376 get_device(&ub->cdev_dev);
1377 ret = add_disk(disk);
1378 if (ret) {
1379
1380
1381
1382
1383 ublk_put_device(ub);
1384 goto out_put_disk;
1385 }
1386 set_bit(UB_STATE_USED, &ub->state);
1387 ub->dev_info.state = UBLK_S_DEV_LIVE;
1388 out_put_disk:
1389 if (ret)
1390 put_disk(disk);
1391 out_unlock:
1392 mutex_unlock(&ub->mutex);
1393 ublk_put_device(ub);
1394 return ret;
1395 }
1396
1397 static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd)
1398 {
1399 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1400 void __user *argp = (void __user *)(unsigned long)header->addr;
1401 struct ublk_device *ub;
1402 cpumask_var_t cpumask;
1403 unsigned long queue;
1404 unsigned int retlen;
1405 unsigned int i;
1406 int ret = -EINVAL;
1407
1408 if (header->len * BITS_PER_BYTE < nr_cpu_ids)
1409 return -EINVAL;
1410 if (header->len & (sizeof(unsigned long)-1))
1411 return -EINVAL;
1412 if (!header->addr)
1413 return -EINVAL;
1414
1415 ub = ublk_get_device_from_id(header->dev_id);
1416 if (!ub)
1417 return -EINVAL;
1418
1419 queue = header->data[0];
1420 if (queue >= ub->dev_info.nr_hw_queues)
1421 goto out_put_device;
1422
1423 ret = -ENOMEM;
1424 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
1425 goto out_put_device;
1426
1427 for_each_possible_cpu(i) {
1428 if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[i] == queue)
1429 cpumask_set_cpu(i, cpumask);
1430 }
1431
1432 ret = -EFAULT;
1433 retlen = min_t(unsigned short, header->len, cpumask_size());
1434 if (copy_to_user(argp, cpumask, retlen))
1435 goto out_free_cpumask;
1436 if (retlen != header->len &&
1437 clear_user(argp + retlen, header->len - retlen))
1438 goto out_free_cpumask;
1439
1440 ret = 0;
1441 out_free_cpumask:
1442 free_cpumask_var(cpumask);
1443 out_put_device:
1444 ublk_put_device(ub);
1445 return ret;
1446 }
1447
1448 static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
1449 {
1450 pr_devel("%s: dev id %d flags %llx\n", __func__,
1451 info->dev_id, info->flags);
1452 pr_devel("\t nr_hw_queues %d queue_depth %d\n",
1453 info->nr_hw_queues, info->queue_depth);
1454 }
1455
1456 static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
1457 {
1458 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1459 void __user *argp = (void __user *)(unsigned long)header->addr;
1460 struct ublksrv_ctrl_dev_info info;
1461 struct ublk_device *ub;
1462 int ret = -EINVAL;
1463
1464 if (header->len < sizeof(info) || !header->addr)
1465 return -EINVAL;
1466 if (header->queue_id != (u16)-1) {
1467 pr_warn("%s: queue_id is wrong %x\n",
1468 __func__, header->queue_id);
1469 return -EINVAL;
1470 }
1471 if (copy_from_user(&info, argp, sizeof(info)))
1472 return -EFAULT;
1473 ublk_dump_dev_info(&info);
1474 if (header->dev_id != info.dev_id) {
1475 pr_warn("%s: dev id not match %u %u\n",
1476 __func__, header->dev_id, info.dev_id);
1477 return -EINVAL;
1478 }
1479
1480 ret = mutex_lock_killable(&ublk_ctl_mutex);
1481 if (ret)
1482 return ret;
1483
1484 ret = -ENOMEM;
1485 ub = kzalloc(sizeof(*ub), GFP_KERNEL);
1486 if (!ub)
1487 goto out_unlock;
1488 mutex_init(&ub->mutex);
1489 spin_lock_init(&ub->mm_lock);
1490 INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
1491 INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
1492
1493 ret = ublk_alloc_dev_number(ub, header->dev_id);
1494 if (ret < 0)
1495 goto out_free_ub;
1496
1497 memcpy(&ub->dev_info, &info, sizeof(info));
1498
1499
1500 ub->dev_info.dev_id = ub->ub_number;
1501
1502
1503
1504
1505
1506
1507
1508 ub->dev_info.flags &= UBLK_F_ALL;
1509
1510
1511 ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
1512
1513 ub->dev_info.nr_hw_queues = min_t(unsigned int,
1514 ub->dev_info.nr_hw_queues, nr_cpu_ids);
1515 ublk_align_max_io_size(ub);
1516
1517 ret = ublk_init_queues(ub);
1518 if (ret)
1519 goto out_free_dev_number;
1520
1521 ret = ublk_add_tag_set(ub);
1522 if (ret)
1523 goto out_deinit_queues;
1524
1525 ret = -EFAULT;
1526 if (copy_to_user(argp, &ub->dev_info, sizeof(info)))
1527 goto out_free_tag_set;
1528
1529
1530
1531
1532
1533 ret = ublk_add_chdev(ub);
1534 goto out_unlock;
1535
1536 out_free_tag_set:
1537 blk_mq_free_tag_set(&ub->tag_set);
1538 out_deinit_queues:
1539 ublk_deinit_queues(ub);
1540 out_free_dev_number:
1541 ublk_free_dev_number(ub);
1542 out_free_ub:
1543 mutex_destroy(&ub->mutex);
1544 kfree(ub);
1545 out_unlock:
1546 mutex_unlock(&ublk_ctl_mutex);
1547 return ret;
1548 }
1549
1550 static inline bool ublk_idr_freed(int id)
1551 {
1552 void *ptr;
1553
1554 spin_lock(&ublk_idr_lock);
1555 ptr = idr_find(&ublk_index_idr, id);
1556 spin_unlock(&ublk_idr_lock);
1557
1558 return ptr == NULL;
1559 }
1560
1561 static int ublk_ctrl_del_dev(int idx)
1562 {
1563 struct ublk_device *ub;
1564 int ret;
1565
1566 ret = mutex_lock_killable(&ublk_ctl_mutex);
1567 if (ret)
1568 return ret;
1569
1570 ub = ublk_get_device_from_id(idx);
1571 if (ub) {
1572 ublk_remove(ub);
1573 ublk_put_device(ub);
1574 ret = 0;
1575 } else {
1576 ret = -ENODEV;
1577 }
1578
1579
1580
1581
1582
1583 if (!ret)
1584 wait_event(ublk_idr_wq, ublk_idr_freed(idx));
1585 mutex_unlock(&ublk_ctl_mutex);
1586
1587 return ret;
1588 }
1589
1590 static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
1591 {
1592 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1593
1594 pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
1595 __func__, cmd->cmd_op, header->dev_id, header->queue_id,
1596 header->data[0], header->addr, header->len);
1597 }
1598
1599 static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd)
1600 {
1601 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1602 struct ublk_device *ub;
1603
1604 ub = ublk_get_device_from_id(header->dev_id);
1605 if (!ub)
1606 return -EINVAL;
1607
1608 ublk_stop_dev(ub);
1609 cancel_work_sync(&ub->stop_work);
1610
1611 ublk_put_device(ub);
1612 return 0;
1613 }
1614
1615 static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
1616 {
1617 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1618 void __user *argp = (void __user *)(unsigned long)header->addr;
1619 struct ublk_device *ub;
1620 int ret = 0;
1621
1622 if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
1623 return -EINVAL;
1624
1625 ub = ublk_get_device_from_id(header->dev_id);
1626 if (!ub)
1627 return -EINVAL;
1628
1629 if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info)))
1630 ret = -EFAULT;
1631 ublk_put_device(ub);
1632
1633 return ret;
1634 }
1635
1636 static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
1637 {
1638 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1639 void __user *argp = (void __user *)(unsigned long)header->addr;
1640 struct ublk_params_header ph;
1641 struct ublk_device *ub;
1642 int ret;
1643
1644 if (header->len <= sizeof(ph) || !header->addr)
1645 return -EINVAL;
1646
1647 if (copy_from_user(&ph, argp, sizeof(ph)))
1648 return -EFAULT;
1649
1650 if (ph.len > header->len || !ph.len)
1651 return -EINVAL;
1652
1653 if (ph.len > sizeof(struct ublk_params))
1654 ph.len = sizeof(struct ublk_params);
1655
1656 ub = ublk_get_device_from_id(header->dev_id);
1657 if (!ub)
1658 return -EINVAL;
1659
1660 mutex_lock(&ub->mutex);
1661 if (copy_to_user(argp, &ub->params, ph.len))
1662 ret = -EFAULT;
1663 else
1664 ret = 0;
1665 mutex_unlock(&ub->mutex);
1666
1667 ublk_put_device(ub);
1668 return ret;
1669 }
1670
1671 static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
1672 {
1673 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1674 void __user *argp = (void __user *)(unsigned long)header->addr;
1675 struct ublk_params_header ph;
1676 struct ublk_device *ub;
1677 int ret = -EFAULT;
1678
1679 if (header->len <= sizeof(ph) || !header->addr)
1680 return -EINVAL;
1681
1682 if (copy_from_user(&ph, argp, sizeof(ph)))
1683 return -EFAULT;
1684
1685 if (ph.len > header->len || !ph.len || !ph.types)
1686 return -EINVAL;
1687
1688 if (ph.len > sizeof(struct ublk_params))
1689 ph.len = sizeof(struct ublk_params);
1690
1691 ub = ublk_get_device_from_id(header->dev_id);
1692 if (!ub)
1693 return -EINVAL;
1694
1695
1696 mutex_lock(&ub->mutex);
1697 if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
1698 ret = -EACCES;
1699 } else if (copy_from_user(&ub->params, argp, ph.len)) {
1700 ret = -EFAULT;
1701 } else {
1702
1703 ub->params.types &= UBLK_PARAM_TYPE_ALL;
1704 ret = ublk_validate_params(ub);
1705 }
1706 mutex_unlock(&ub->mutex);
1707 ublk_put_device(ub);
1708
1709 return ret;
1710 }
1711
1712 static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
1713 unsigned int issue_flags)
1714 {
1715 struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
1716 int ret = -EINVAL;
1717
1718 ublk_ctrl_cmd_dump(cmd);
1719
1720 if (!(issue_flags & IO_URING_F_SQE128))
1721 goto out;
1722
1723 ret = -EPERM;
1724 if (!capable(CAP_SYS_ADMIN))
1725 goto out;
1726
1727 ret = -ENODEV;
1728 switch (cmd->cmd_op) {
1729 case UBLK_CMD_START_DEV:
1730 ret = ublk_ctrl_start_dev(cmd);
1731 break;
1732 case UBLK_CMD_STOP_DEV:
1733 ret = ublk_ctrl_stop_dev(cmd);
1734 break;
1735 case UBLK_CMD_GET_DEV_INFO:
1736 ret = ublk_ctrl_get_dev_info(cmd);
1737 break;
1738 case UBLK_CMD_ADD_DEV:
1739 ret = ublk_ctrl_add_dev(cmd);
1740 break;
1741 case UBLK_CMD_DEL_DEV:
1742 ret = ublk_ctrl_del_dev(header->dev_id);
1743 break;
1744 case UBLK_CMD_GET_QUEUE_AFFINITY:
1745 ret = ublk_ctrl_get_queue_affinity(cmd);
1746 break;
1747 case UBLK_CMD_GET_PARAMS:
1748 ret = ublk_ctrl_get_params(cmd);
1749 break;
1750 case UBLK_CMD_SET_PARAMS:
1751 ret = ublk_ctrl_set_params(cmd);
1752 break;
1753 default:
1754 break;
1755 }
1756 out:
1757 io_uring_cmd_done(cmd, ret, 0);
1758 pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
1759 __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
1760 return -EIOCBQUEUED;
1761 }
1762
1763 static const struct file_operations ublk_ctl_fops = {
1764 .open = nonseekable_open,
1765 .uring_cmd = ublk_ctrl_uring_cmd,
1766 .owner = THIS_MODULE,
1767 .llseek = noop_llseek,
1768 };
1769
1770 static struct miscdevice ublk_misc = {
1771 .minor = MISC_DYNAMIC_MINOR,
1772 .name = "ublk-control",
1773 .fops = &ublk_ctl_fops,
1774 };
1775
1776 static int __init ublk_init(void)
1777 {
1778 int ret;
1779
1780 init_waitqueue_head(&ublk_idr_wq);
1781
1782 ret = misc_register(&ublk_misc);
1783 if (ret)
1784 return ret;
1785
1786 ret = alloc_chrdev_region(&ublk_chr_devt, 0, UBLK_MINORS, "ublk-char");
1787 if (ret)
1788 goto unregister_mis;
1789
1790 ublk_chr_class = class_create(THIS_MODULE, "ublk-char");
1791 if (IS_ERR(ublk_chr_class)) {
1792 ret = PTR_ERR(ublk_chr_class);
1793 goto free_chrdev_region;
1794 }
1795 return 0;
1796
1797 free_chrdev_region:
1798 unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS);
1799 unregister_mis:
1800 misc_deregister(&ublk_misc);
1801 return ret;
1802 }
1803
1804 static void __exit ublk_exit(void)
1805 {
1806 struct ublk_device *ub;
1807 int id;
1808
1809 class_destroy(ublk_chr_class);
1810
1811 misc_deregister(&ublk_misc);
1812
1813 idr_for_each_entry(&ublk_index_idr, ub, id)
1814 ublk_remove(ub);
1815
1816 idr_destroy(&ublk_index_idr);
1817 unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS);
1818 }
1819
1820 module_init(ublk_init);
1821 module_exit(ublk_exit);
1822
1823 MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
1824 MODULE_LICENSE("GPL");