Back to home page

OSCL-LXR

 
 

    


0001 /* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
0002 /*
0003  * aoedev.c
0004  * AoE device utility functions; maintains device list.
0005  */
0006 
0007 #include <linux/hdreg.h>
0008 #include <linux/blk-mq.h>
0009 #include <linux/netdevice.h>
0010 #include <linux/delay.h>
0011 #include <linux/slab.h>
0012 #include <linux/bitmap.h>
0013 #include <linux/kdev_t.h>
0014 #include <linux/moduleparam.h>
0015 #include <linux/string.h>
0016 #include "aoe.h"
0017 
0018 static void freetgt(struct aoedev *d, struct aoetgt *t);
0019 static void skbpoolfree(struct aoedev *d);
0020 
0021 static int aoe_dyndevs = 1;
0022 module_param(aoe_dyndevs, int, 0644);
0023 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
0024 
0025 static struct aoedev *devlist;
0026 static DEFINE_SPINLOCK(devlist_lock);
0027 
0028 /* Because some systems will have one, many, or no
0029  *   - partitions,
0030  *   - slots per shelf,
0031  *   - or shelves,
0032  * we need some flexibility in the way the minor numbers
0033  * are allocated.  So they are dynamic.
0034  */
0035 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
0036 
0037 static DEFINE_SPINLOCK(used_minors_lock);
0038 static DECLARE_BITMAP(used_minors, N_DEVS);
0039 
0040 static int
0041 minor_get_dyn(ulong *sysminor)
0042 {
0043     ulong flags;
0044     ulong n;
0045     int error = 0;
0046 
0047     spin_lock_irqsave(&used_minors_lock, flags);
0048     n = find_first_zero_bit(used_minors, N_DEVS);
0049     if (n < N_DEVS)
0050         set_bit(n, used_minors);
0051     else
0052         error = -1;
0053     spin_unlock_irqrestore(&used_minors_lock, flags);
0054 
0055     *sysminor = n * AOE_PARTITIONS;
0056     return error;
0057 }
0058 
0059 static int
0060 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
0061 {
0062     ulong flags;
0063     ulong n;
0064     int error = 0;
0065     enum {
0066         /* for backwards compatibility when !aoe_dyndevs,
0067          * a static number of supported slots per shelf */
0068         NPERSHELF = 16,
0069     };
0070 
0071     if (aoemin >= NPERSHELF) {
0072         pr_err("aoe: %s %d slots per shelf\n",
0073             "static minor device numbers support only",
0074             NPERSHELF);
0075         error = -1;
0076         goto out;
0077     }
0078 
0079     n = aoemaj * NPERSHELF + aoemin;
0080     if (n >= N_DEVS) {
0081         pr_err("aoe: %s with e%ld.%d\n",
0082             "cannot use static minor device numbers",
0083             aoemaj, aoemin);
0084         error = -1;
0085         goto out;
0086     }
0087 
0088     spin_lock_irqsave(&used_minors_lock, flags);
0089     if (test_bit(n, used_minors)) {
0090         pr_err("aoe: %s %lu\n",
0091             "existing device already has static minor number",
0092             n);
0093         error = -1;
0094     } else
0095         set_bit(n, used_minors);
0096     spin_unlock_irqrestore(&used_minors_lock, flags);
0097     *sysminor = n * AOE_PARTITIONS;
0098 out:
0099     return error;
0100 }
0101 
0102 static int
0103 minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
0104 {
0105     if (aoe_dyndevs)
0106         return minor_get_dyn(sysminor);
0107     else
0108         return minor_get_static(sysminor, aoemaj, aoemin);
0109 }
0110 
0111 static void
0112 minor_free(ulong minor)
0113 {
0114     ulong flags;
0115 
0116     minor /= AOE_PARTITIONS;
0117     BUG_ON(minor >= N_DEVS);
0118 
0119     spin_lock_irqsave(&used_minors_lock, flags);
0120     BUG_ON(!test_bit(minor, used_minors));
0121     clear_bit(minor, used_minors);
0122     spin_unlock_irqrestore(&used_minors_lock, flags);
0123 }
0124 
0125 /*
0126  * Users who grab a pointer to the device with aoedev_by_aoeaddr
0127  * automatically get a reference count and must be responsible
0128  * for performing a aoedev_put.  With the addition of async
0129  * kthread processing I'm no longer confident that we can
0130  * guarantee consistency in the face of device flushes.
0131  *
0132  * For the time being, we only bother to add extra references for
0133  * frames sitting on the iocq.  When the kthreads finish processing
0134  * these frames, they will aoedev_put the device.
0135  */
0136 
0137 void
0138 aoedev_put(struct aoedev *d)
0139 {
0140     ulong flags;
0141 
0142     spin_lock_irqsave(&devlist_lock, flags);
0143     d->ref--;
0144     spin_unlock_irqrestore(&devlist_lock, flags);
0145 }
0146 
0147 static void
0148 dummy_timer(struct timer_list *t)
0149 {
0150     struct aoedev *d;
0151 
0152     d = from_timer(d, t, timer);
0153     if (d->flags & DEVFL_TKILL)
0154         return;
0155     d->timer.expires = jiffies + HZ;
0156     add_timer(&d->timer);
0157 }
0158 
0159 static void
0160 aoe_failip(struct aoedev *d)
0161 {
0162     struct request *rq;
0163     struct aoe_req *req;
0164     struct bio *bio;
0165 
0166     aoe_failbuf(d, d->ip.buf);
0167     rq = d->ip.rq;
0168     if (rq == NULL)
0169         return;
0170 
0171     req = blk_mq_rq_to_pdu(rq);
0172     while ((bio = d->ip.nxbio)) {
0173         bio->bi_status = BLK_STS_IOERR;
0174         d->ip.nxbio = bio->bi_next;
0175         req->nr_bios--;
0176     }
0177 
0178     if (!req->nr_bios)
0179         aoe_end_request(d, rq, 0);
0180 }
0181 
0182 static void
0183 downdev_frame(struct list_head *pos)
0184 {
0185     struct frame *f;
0186 
0187     f = list_entry(pos, struct frame, head);
0188     list_del(pos);
0189     if (f->buf) {
0190         f->buf->nframesout--;
0191         aoe_failbuf(f->t->d, f->buf);
0192     }
0193     aoe_freetframe(f);
0194 }
0195 
0196 void
0197 aoedev_downdev(struct aoedev *d)
0198 {
0199     struct aoetgt *t, **tt, **te;
0200     struct list_head *head, *pos, *nx;
0201     int i;
0202 
0203     d->flags &= ~DEVFL_UP;
0204 
0205     /* clean out active and to-be-retransmitted buffers */
0206     for (i = 0; i < NFACTIVE; i++) {
0207         head = &d->factive[i];
0208         list_for_each_safe(pos, nx, head)
0209             downdev_frame(pos);
0210     }
0211     head = &d->rexmitq;
0212     list_for_each_safe(pos, nx, head)
0213         downdev_frame(pos);
0214 
0215     /* reset window dressings */
0216     tt = d->targets;
0217     te = tt + d->ntargets;
0218     for (; tt < te && (t = *tt); tt++) {
0219         aoecmd_wreset(t);
0220         t->nout = 0;
0221     }
0222 
0223     /* clean out the in-process request (if any) */
0224     aoe_failip(d);
0225 
0226     /* fast fail all pending I/O */
0227     if (d->blkq) {
0228         /* UP is cleared, freeze+quiesce to insure all are errored */
0229         blk_mq_freeze_queue(d->blkq);
0230         blk_mq_quiesce_queue(d->blkq);
0231         blk_mq_unquiesce_queue(d->blkq);
0232         blk_mq_unfreeze_queue(d->blkq);
0233     }
0234 
0235     if (d->gd)
0236         set_capacity(d->gd, 0);
0237 }
0238 
0239 /* return whether the user asked for this particular
0240  * device to be flushed
0241  */
0242 static int
0243 user_req(char *s, size_t slen, struct aoedev *d)
0244 {
0245     const char *p;
0246     size_t lim;
0247 
0248     if (!d->gd)
0249         return 0;
0250     p = kbasename(d->gd->disk_name);
0251     lim = sizeof(d->gd->disk_name);
0252     lim -= p - d->gd->disk_name;
0253     if (slen < lim)
0254         lim = slen;
0255 
0256     return !strncmp(s, p, lim);
0257 }
0258 
0259 static void
0260 freedev(struct aoedev *d)
0261 {
0262     struct aoetgt **t, **e;
0263     int freeing = 0;
0264     unsigned long flags;
0265 
0266     spin_lock_irqsave(&d->lock, flags);
0267     if (d->flags & DEVFL_TKILL
0268     && !(d->flags & DEVFL_FREEING)) {
0269         d->flags |= DEVFL_FREEING;
0270         freeing = 1;
0271     }
0272     spin_unlock_irqrestore(&d->lock, flags);
0273     if (!freeing)
0274         return;
0275 
0276     del_timer_sync(&d->timer);
0277     if (d->gd) {
0278         aoedisk_rm_debugfs(d);
0279         del_gendisk(d->gd);
0280         put_disk(d->gd);
0281         blk_mq_free_tag_set(&d->tag_set);
0282     }
0283     t = d->targets;
0284     e = t + d->ntargets;
0285     for (; t < e && *t; t++)
0286         freetgt(d, *t);
0287 
0288     mempool_destroy(d->bufpool);
0289     skbpoolfree(d);
0290     minor_free(d->sysminor);
0291 
0292     spin_lock_irqsave(&d->lock, flags);
0293     d->flags |= DEVFL_FREED;
0294     spin_unlock_irqrestore(&d->lock, flags);
0295 }
0296 
0297 enum flush_parms {
0298     NOT_EXITING = 0,
0299     EXITING = 1,
0300 };
0301 
0302 static int
0303 flush(const char __user *str, size_t cnt, int exiting)
0304 {
0305     ulong flags;
0306     struct aoedev *d, **dd;
0307     char buf[16];
0308     int all = 0;
0309     int specified = 0;  /* flush a specific device */
0310     unsigned int skipflags;
0311 
0312     skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
0313 
0314     if (!exiting && cnt >= 3) {
0315         if (cnt > sizeof buf)
0316             cnt = sizeof buf;
0317         if (copy_from_user(buf, str, cnt))
0318             return -EFAULT;
0319         all = !strncmp(buf, "all", 3);
0320         if (!all)
0321             specified = 1;
0322     }
0323 
0324     flush_workqueue(aoe_wq);
0325     /* pass one: do aoedev_downdev, which might sleep */
0326 restart1:
0327     spin_lock_irqsave(&devlist_lock, flags);
0328     for (d = devlist; d; d = d->next) {
0329         spin_lock(&d->lock);
0330         if (d->flags & DEVFL_TKILL)
0331             goto cont;
0332 
0333         if (exiting) {
0334             /* unconditionally take each device down */
0335         } else if (specified) {
0336             if (!user_req(buf, cnt, d))
0337                 goto cont;
0338         } else if ((!all && (d->flags & DEVFL_UP))
0339         || d->flags & skipflags
0340         || d->nopen
0341         || d->ref)
0342             goto cont;
0343 
0344         spin_unlock(&d->lock);
0345         spin_unlock_irqrestore(&devlist_lock, flags);
0346         aoedev_downdev(d);
0347         d->flags |= DEVFL_TKILL;
0348         goto restart1;
0349 cont:
0350         spin_unlock(&d->lock);
0351     }
0352     spin_unlock_irqrestore(&devlist_lock, flags);
0353 
0354     /* pass two: call freedev, which might sleep,
0355      * for aoedevs marked with DEVFL_TKILL
0356      */
0357 restart2:
0358     spin_lock_irqsave(&devlist_lock, flags);
0359     for (d = devlist; d; d = d->next) {
0360         spin_lock(&d->lock);
0361         if (d->flags & DEVFL_TKILL
0362         && !(d->flags & DEVFL_FREEING)) {
0363             spin_unlock(&d->lock);
0364             spin_unlock_irqrestore(&devlist_lock, flags);
0365             freedev(d);
0366             goto restart2;
0367         }
0368         spin_unlock(&d->lock);
0369     }
0370 
0371     /* pass three: remove aoedevs marked with DEVFL_FREED */
0372     for (dd = &devlist, d = *dd; d; d = *dd) {
0373         struct aoedev *doomed = NULL;
0374 
0375         spin_lock(&d->lock);
0376         if (d->flags & DEVFL_FREED) {
0377             *dd = d->next;
0378             doomed = d;
0379         } else {
0380             dd = &d->next;
0381         }
0382         spin_unlock(&d->lock);
0383         if (doomed)
0384             kfree(doomed->targets);
0385         kfree(doomed);
0386     }
0387     spin_unlock_irqrestore(&devlist_lock, flags);
0388 
0389     return 0;
0390 }
0391 
0392 int
0393 aoedev_flush(const char __user *str, size_t cnt)
0394 {
0395     return flush(str, cnt, NOT_EXITING);
0396 }
0397 
0398 /* This has been confirmed to occur once with Tms=3*1000 due to the
0399  * driver changing link and not processing its transmit ring.  The
0400  * problem is hard enough to solve by returning an error that I'm
0401  * still punting on "solving" this.
0402  */
0403 static void
0404 skbfree(struct sk_buff *skb)
0405 {
0406     enum { Sms = 250, Tms = 30 * 1000};
0407     int i = Tms / Sms;
0408 
0409     if (skb == NULL)
0410         return;
0411     while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
0412         msleep(Sms);
0413     if (i < 0) {
0414         printk(KERN_ERR
0415             "aoe: %s holds ref: %s\n",
0416             skb->dev ? skb->dev->name : "netif",
0417             "cannot free skb -- memory leaked.");
0418         return;
0419     }
0420     skb->truesize -= skb->data_len;
0421     skb_shinfo(skb)->nr_frags = skb->data_len = 0;
0422     skb_trim(skb, 0);
0423     dev_kfree_skb(skb);
0424 }
0425 
0426 static void
0427 skbpoolfree(struct aoedev *d)
0428 {
0429     struct sk_buff *skb, *tmp;
0430 
0431     skb_queue_walk_safe(&d->skbpool, skb, tmp)
0432         skbfree(skb);
0433 
0434     __skb_queue_head_init(&d->skbpool);
0435 }
0436 
0437 /* find it or allocate it */
0438 struct aoedev *
0439 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
0440 {
0441     struct aoedev *d;
0442     int i;
0443     ulong flags;
0444     ulong sysminor = 0;
0445 
0446     spin_lock_irqsave(&devlist_lock, flags);
0447 
0448     for (d=devlist; d; d=d->next)
0449         if (d->aoemajor == maj && d->aoeminor == min) {
0450             spin_lock(&d->lock);
0451             if (d->flags & DEVFL_TKILL) {
0452                 spin_unlock(&d->lock);
0453                 d = NULL;
0454                 goto out;
0455             }
0456             d->ref++;
0457             spin_unlock(&d->lock);
0458             break;
0459         }
0460     if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
0461         goto out;
0462     d = kcalloc(1, sizeof *d, GFP_ATOMIC);
0463     if (!d)
0464         goto out;
0465     d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
0466     if (!d->targets) {
0467         kfree(d);
0468         d = NULL;
0469         goto out;
0470     }
0471     d->ntargets = NTARGETS;
0472     INIT_WORK(&d->work, aoecmd_sleepwork);
0473     spin_lock_init(&d->lock);
0474     INIT_LIST_HEAD(&d->rq_list);
0475     skb_queue_head_init(&d->skbpool);
0476     timer_setup(&d->timer, dummy_timer, 0);
0477     d->timer.expires = jiffies + HZ;
0478     add_timer(&d->timer);
0479     d->bufpool = NULL;  /* defer to aoeblk_gdalloc */
0480     d->tgt = d->targets;
0481     d->ref = 1;
0482     for (i = 0; i < NFACTIVE; i++)
0483         INIT_LIST_HEAD(&d->factive[i]);
0484     INIT_LIST_HEAD(&d->rexmitq);
0485     d->sysminor = sysminor;
0486     d->aoemajor = maj;
0487     d->aoeminor = min;
0488     d->rttavg = RTTAVG_INIT;
0489     d->rttdev = RTTDEV_INIT;
0490     d->next = devlist;
0491     devlist = d;
0492  out:
0493     spin_unlock_irqrestore(&devlist_lock, flags);
0494     return d;
0495 }
0496 
0497 static void
0498 freetgt(struct aoedev *d, struct aoetgt *t)
0499 {
0500     struct frame *f;
0501     struct list_head *pos, *nx, *head;
0502     struct aoeif *ifp;
0503 
0504     for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
0505         if (!ifp->nd)
0506             break;
0507         dev_put(ifp->nd);
0508     }
0509 
0510     head = &t->ffree;
0511     list_for_each_safe(pos, nx, head) {
0512         list_del(pos);
0513         f = list_entry(pos, struct frame, head);
0514         skbfree(f->skb);
0515         kfree(f);
0516     }
0517     kfree(t);
0518 }
0519 
0520 void
0521 aoedev_exit(void)
0522 {
0523     flush_workqueue(aoe_wq);
0524     flush(NULL, 0, EXITING);
0525 }
0526 
0527 int __init
0528 aoedev_init(void)
0529 {
0530     return 0;
0531 }