0001
0002
0003
0004
0005
0006
0007 #include <linux/hdreg.h>
0008 #include <linux/blk-mq.h>
0009 #include <linux/netdevice.h>
0010 #include <linux/delay.h>
0011 #include <linux/slab.h>
0012 #include <linux/bitmap.h>
0013 #include <linux/kdev_t.h>
0014 #include <linux/moduleparam.h>
0015 #include <linux/string.h>
0016 #include "aoe.h"
0017
0018 static void freetgt(struct aoedev *d, struct aoetgt *t);
0019 static void skbpoolfree(struct aoedev *d);
0020
0021 static int aoe_dyndevs = 1;
0022 module_param(aoe_dyndevs, int, 0644);
0023 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
0024
0025 static struct aoedev *devlist;
0026 static DEFINE_SPINLOCK(devlist_lock);
0027
0028
0029
0030
0031
0032
0033
0034
0035 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
0036
0037 static DEFINE_SPINLOCK(used_minors_lock);
0038 static DECLARE_BITMAP(used_minors, N_DEVS);
0039
0040 static int
0041 minor_get_dyn(ulong *sysminor)
0042 {
0043 ulong flags;
0044 ulong n;
0045 int error = 0;
0046
0047 spin_lock_irqsave(&used_minors_lock, flags);
0048 n = find_first_zero_bit(used_minors, N_DEVS);
0049 if (n < N_DEVS)
0050 set_bit(n, used_minors);
0051 else
0052 error = -1;
0053 spin_unlock_irqrestore(&used_minors_lock, flags);
0054
0055 *sysminor = n * AOE_PARTITIONS;
0056 return error;
0057 }
0058
0059 static int
0060 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
0061 {
0062 ulong flags;
0063 ulong n;
0064 int error = 0;
0065 enum {
0066
0067
0068 NPERSHELF = 16,
0069 };
0070
0071 if (aoemin >= NPERSHELF) {
0072 pr_err("aoe: %s %d slots per shelf\n",
0073 "static minor device numbers support only",
0074 NPERSHELF);
0075 error = -1;
0076 goto out;
0077 }
0078
0079 n = aoemaj * NPERSHELF + aoemin;
0080 if (n >= N_DEVS) {
0081 pr_err("aoe: %s with e%ld.%d\n",
0082 "cannot use static minor device numbers",
0083 aoemaj, aoemin);
0084 error = -1;
0085 goto out;
0086 }
0087
0088 spin_lock_irqsave(&used_minors_lock, flags);
0089 if (test_bit(n, used_minors)) {
0090 pr_err("aoe: %s %lu\n",
0091 "existing device already has static minor number",
0092 n);
0093 error = -1;
0094 } else
0095 set_bit(n, used_minors);
0096 spin_unlock_irqrestore(&used_minors_lock, flags);
0097 *sysminor = n * AOE_PARTITIONS;
0098 out:
0099 return error;
0100 }
0101
0102 static int
0103 minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
0104 {
0105 if (aoe_dyndevs)
0106 return minor_get_dyn(sysminor);
0107 else
0108 return minor_get_static(sysminor, aoemaj, aoemin);
0109 }
0110
0111 static void
0112 minor_free(ulong minor)
0113 {
0114 ulong flags;
0115
0116 minor /= AOE_PARTITIONS;
0117 BUG_ON(minor >= N_DEVS);
0118
0119 spin_lock_irqsave(&used_minors_lock, flags);
0120 BUG_ON(!test_bit(minor, used_minors));
0121 clear_bit(minor, used_minors);
0122 spin_unlock_irqrestore(&used_minors_lock, flags);
0123 }
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137 void
0138 aoedev_put(struct aoedev *d)
0139 {
0140 ulong flags;
0141
0142 spin_lock_irqsave(&devlist_lock, flags);
0143 d->ref--;
0144 spin_unlock_irqrestore(&devlist_lock, flags);
0145 }
0146
0147 static void
0148 dummy_timer(struct timer_list *t)
0149 {
0150 struct aoedev *d;
0151
0152 d = from_timer(d, t, timer);
0153 if (d->flags & DEVFL_TKILL)
0154 return;
0155 d->timer.expires = jiffies + HZ;
0156 add_timer(&d->timer);
0157 }
0158
0159 static void
0160 aoe_failip(struct aoedev *d)
0161 {
0162 struct request *rq;
0163 struct aoe_req *req;
0164 struct bio *bio;
0165
0166 aoe_failbuf(d, d->ip.buf);
0167 rq = d->ip.rq;
0168 if (rq == NULL)
0169 return;
0170
0171 req = blk_mq_rq_to_pdu(rq);
0172 while ((bio = d->ip.nxbio)) {
0173 bio->bi_status = BLK_STS_IOERR;
0174 d->ip.nxbio = bio->bi_next;
0175 req->nr_bios--;
0176 }
0177
0178 if (!req->nr_bios)
0179 aoe_end_request(d, rq, 0);
0180 }
0181
0182 static void
0183 downdev_frame(struct list_head *pos)
0184 {
0185 struct frame *f;
0186
0187 f = list_entry(pos, struct frame, head);
0188 list_del(pos);
0189 if (f->buf) {
0190 f->buf->nframesout--;
0191 aoe_failbuf(f->t->d, f->buf);
0192 }
0193 aoe_freetframe(f);
0194 }
0195
0196 void
0197 aoedev_downdev(struct aoedev *d)
0198 {
0199 struct aoetgt *t, **tt, **te;
0200 struct list_head *head, *pos, *nx;
0201 int i;
0202
0203 d->flags &= ~DEVFL_UP;
0204
0205
0206 for (i = 0; i < NFACTIVE; i++) {
0207 head = &d->factive[i];
0208 list_for_each_safe(pos, nx, head)
0209 downdev_frame(pos);
0210 }
0211 head = &d->rexmitq;
0212 list_for_each_safe(pos, nx, head)
0213 downdev_frame(pos);
0214
0215
0216 tt = d->targets;
0217 te = tt + d->ntargets;
0218 for (; tt < te && (t = *tt); tt++) {
0219 aoecmd_wreset(t);
0220 t->nout = 0;
0221 }
0222
0223
0224 aoe_failip(d);
0225
0226
0227 if (d->blkq) {
0228
0229 blk_mq_freeze_queue(d->blkq);
0230 blk_mq_quiesce_queue(d->blkq);
0231 blk_mq_unquiesce_queue(d->blkq);
0232 blk_mq_unfreeze_queue(d->blkq);
0233 }
0234
0235 if (d->gd)
0236 set_capacity(d->gd, 0);
0237 }
0238
0239
0240
0241
0242 static int
0243 user_req(char *s, size_t slen, struct aoedev *d)
0244 {
0245 const char *p;
0246 size_t lim;
0247
0248 if (!d->gd)
0249 return 0;
0250 p = kbasename(d->gd->disk_name);
0251 lim = sizeof(d->gd->disk_name);
0252 lim -= p - d->gd->disk_name;
0253 if (slen < lim)
0254 lim = slen;
0255
0256 return !strncmp(s, p, lim);
0257 }
0258
0259 static void
0260 freedev(struct aoedev *d)
0261 {
0262 struct aoetgt **t, **e;
0263 int freeing = 0;
0264 unsigned long flags;
0265
0266 spin_lock_irqsave(&d->lock, flags);
0267 if (d->flags & DEVFL_TKILL
0268 && !(d->flags & DEVFL_FREEING)) {
0269 d->flags |= DEVFL_FREEING;
0270 freeing = 1;
0271 }
0272 spin_unlock_irqrestore(&d->lock, flags);
0273 if (!freeing)
0274 return;
0275
0276 del_timer_sync(&d->timer);
0277 if (d->gd) {
0278 aoedisk_rm_debugfs(d);
0279 del_gendisk(d->gd);
0280 put_disk(d->gd);
0281 blk_mq_free_tag_set(&d->tag_set);
0282 }
0283 t = d->targets;
0284 e = t + d->ntargets;
0285 for (; t < e && *t; t++)
0286 freetgt(d, *t);
0287
0288 mempool_destroy(d->bufpool);
0289 skbpoolfree(d);
0290 minor_free(d->sysminor);
0291
0292 spin_lock_irqsave(&d->lock, flags);
0293 d->flags |= DEVFL_FREED;
0294 spin_unlock_irqrestore(&d->lock, flags);
0295 }
0296
0297 enum flush_parms {
0298 NOT_EXITING = 0,
0299 EXITING = 1,
0300 };
0301
0302 static int
0303 flush(const char __user *str, size_t cnt, int exiting)
0304 {
0305 ulong flags;
0306 struct aoedev *d, **dd;
0307 char buf[16];
0308 int all = 0;
0309 int specified = 0;
0310 unsigned int skipflags;
0311
0312 skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
0313
0314 if (!exiting && cnt >= 3) {
0315 if (cnt > sizeof buf)
0316 cnt = sizeof buf;
0317 if (copy_from_user(buf, str, cnt))
0318 return -EFAULT;
0319 all = !strncmp(buf, "all", 3);
0320 if (!all)
0321 specified = 1;
0322 }
0323
0324 flush_workqueue(aoe_wq);
0325
0326 restart1:
0327 spin_lock_irqsave(&devlist_lock, flags);
0328 for (d = devlist; d; d = d->next) {
0329 spin_lock(&d->lock);
0330 if (d->flags & DEVFL_TKILL)
0331 goto cont;
0332
0333 if (exiting) {
0334
0335 } else if (specified) {
0336 if (!user_req(buf, cnt, d))
0337 goto cont;
0338 } else if ((!all && (d->flags & DEVFL_UP))
0339 || d->flags & skipflags
0340 || d->nopen
0341 || d->ref)
0342 goto cont;
0343
0344 spin_unlock(&d->lock);
0345 spin_unlock_irqrestore(&devlist_lock, flags);
0346 aoedev_downdev(d);
0347 d->flags |= DEVFL_TKILL;
0348 goto restart1;
0349 cont:
0350 spin_unlock(&d->lock);
0351 }
0352 spin_unlock_irqrestore(&devlist_lock, flags);
0353
0354
0355
0356
0357 restart2:
0358 spin_lock_irqsave(&devlist_lock, flags);
0359 for (d = devlist; d; d = d->next) {
0360 spin_lock(&d->lock);
0361 if (d->flags & DEVFL_TKILL
0362 && !(d->flags & DEVFL_FREEING)) {
0363 spin_unlock(&d->lock);
0364 spin_unlock_irqrestore(&devlist_lock, flags);
0365 freedev(d);
0366 goto restart2;
0367 }
0368 spin_unlock(&d->lock);
0369 }
0370
0371
0372 for (dd = &devlist, d = *dd; d; d = *dd) {
0373 struct aoedev *doomed = NULL;
0374
0375 spin_lock(&d->lock);
0376 if (d->flags & DEVFL_FREED) {
0377 *dd = d->next;
0378 doomed = d;
0379 } else {
0380 dd = &d->next;
0381 }
0382 spin_unlock(&d->lock);
0383 if (doomed)
0384 kfree(doomed->targets);
0385 kfree(doomed);
0386 }
0387 spin_unlock_irqrestore(&devlist_lock, flags);
0388
0389 return 0;
0390 }
0391
0392 int
0393 aoedev_flush(const char __user *str, size_t cnt)
0394 {
0395 return flush(str, cnt, NOT_EXITING);
0396 }
0397
0398
0399
0400
0401
0402
0403 static void
0404 skbfree(struct sk_buff *skb)
0405 {
0406 enum { Sms = 250, Tms = 30 * 1000};
0407 int i = Tms / Sms;
0408
0409 if (skb == NULL)
0410 return;
0411 while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
0412 msleep(Sms);
0413 if (i < 0) {
0414 printk(KERN_ERR
0415 "aoe: %s holds ref: %s\n",
0416 skb->dev ? skb->dev->name : "netif",
0417 "cannot free skb -- memory leaked.");
0418 return;
0419 }
0420 skb->truesize -= skb->data_len;
0421 skb_shinfo(skb)->nr_frags = skb->data_len = 0;
0422 skb_trim(skb, 0);
0423 dev_kfree_skb(skb);
0424 }
0425
0426 static void
0427 skbpoolfree(struct aoedev *d)
0428 {
0429 struct sk_buff *skb, *tmp;
0430
0431 skb_queue_walk_safe(&d->skbpool, skb, tmp)
0432 skbfree(skb);
0433
0434 __skb_queue_head_init(&d->skbpool);
0435 }
0436
0437
0438 struct aoedev *
0439 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
0440 {
0441 struct aoedev *d;
0442 int i;
0443 ulong flags;
0444 ulong sysminor = 0;
0445
0446 spin_lock_irqsave(&devlist_lock, flags);
0447
0448 for (d=devlist; d; d=d->next)
0449 if (d->aoemajor == maj && d->aoeminor == min) {
0450 spin_lock(&d->lock);
0451 if (d->flags & DEVFL_TKILL) {
0452 spin_unlock(&d->lock);
0453 d = NULL;
0454 goto out;
0455 }
0456 d->ref++;
0457 spin_unlock(&d->lock);
0458 break;
0459 }
0460 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
0461 goto out;
0462 d = kcalloc(1, sizeof *d, GFP_ATOMIC);
0463 if (!d)
0464 goto out;
0465 d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
0466 if (!d->targets) {
0467 kfree(d);
0468 d = NULL;
0469 goto out;
0470 }
0471 d->ntargets = NTARGETS;
0472 INIT_WORK(&d->work, aoecmd_sleepwork);
0473 spin_lock_init(&d->lock);
0474 INIT_LIST_HEAD(&d->rq_list);
0475 skb_queue_head_init(&d->skbpool);
0476 timer_setup(&d->timer, dummy_timer, 0);
0477 d->timer.expires = jiffies + HZ;
0478 add_timer(&d->timer);
0479 d->bufpool = NULL;
0480 d->tgt = d->targets;
0481 d->ref = 1;
0482 for (i = 0; i < NFACTIVE; i++)
0483 INIT_LIST_HEAD(&d->factive[i]);
0484 INIT_LIST_HEAD(&d->rexmitq);
0485 d->sysminor = sysminor;
0486 d->aoemajor = maj;
0487 d->aoeminor = min;
0488 d->rttavg = RTTAVG_INIT;
0489 d->rttdev = RTTDEV_INIT;
0490 d->next = devlist;
0491 devlist = d;
0492 out:
0493 spin_unlock_irqrestore(&devlist_lock, flags);
0494 return d;
0495 }
0496
0497 static void
0498 freetgt(struct aoedev *d, struct aoetgt *t)
0499 {
0500 struct frame *f;
0501 struct list_head *pos, *nx, *head;
0502 struct aoeif *ifp;
0503
0504 for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
0505 if (!ifp->nd)
0506 break;
0507 dev_put(ifp->nd);
0508 }
0509
0510 head = &t->ffree;
0511 list_for_each_safe(pos, nx, head) {
0512 list_del(pos);
0513 f = list_entry(pos, struct frame, head);
0514 skbfree(f->skb);
0515 kfree(f);
0516 }
0517 kfree(t);
0518 }
0519
0520 void
0521 aoedev_exit(void)
0522 {
0523 flush_workqueue(aoe_wq);
0524 flush(NULL, 0, EXITING);
0525 }
0526
0527 int __init
0528 aoedev_init(void)
0529 {
0530 return 0;
0531 }