Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2006-2009 Red Hat, Inc.
0003  *
0004  * This file is released under the LGPL.
0005  */
0006 
0007 #include <linux/bio.h>
0008 #include <linux/slab.h>
0009 #include <linux/jiffies.h>
0010 #include <linux/dm-dirty-log.h>
0011 #include <linux/device-mapper.h>
0012 #include <linux/dm-log-userspace.h>
0013 #include <linux/module.h>
0014 #include <linux/workqueue.h>
0015 
0016 #include "dm-log-userspace-transfer.h"
0017 
0018 #define DM_LOG_USERSPACE_VSN "1.3.0"
0019 
0020 #define FLUSH_ENTRY_POOL_SIZE 16
0021 
0022 struct dm_dirty_log_flush_entry {
0023     int type;
0024     region_t region;
0025     struct list_head list;
0026 };
0027 
0028 /*
0029  * This limit on the number of mark and clear request is, to a degree,
0030  * arbitrary.  However, there is some basis for the choice in the limits
0031  * imposed on the size of data payload by dm-log-userspace-transfer.c:
0032  * dm_consult_userspace().
0033  */
0034 #define MAX_FLUSH_GROUP_COUNT 32
0035 
0036 struct log_c {
0037     struct dm_target *ti;
0038     struct dm_dev *log_dev;
0039 
0040     char *usr_argv_str;
0041     uint32_t usr_argc;
0042 
0043     uint32_t region_size;
0044     region_t region_count;
0045     uint64_t luid;
0046     char uuid[DM_UUID_LEN];
0047 
0048     /*
0049      * Mark and clear requests are held until a flush is issued
0050      * so that we can group, and thereby limit, the amount of
0051      * network traffic between kernel and userspace.  The 'flush_lock'
0052      * is used to protect these lists.
0053      */
0054     spinlock_t flush_lock;
0055     struct list_head mark_list;
0056     struct list_head clear_list;
0057 
0058     /*
0059      * in_sync_hint gets set when doing is_remote_recovering.  It
0060      * represents the first region that needs recovery.  IOW, the
0061      * first zero bit of sync_bits.  This can be useful for to limit
0062      * traffic for calls like is_remote_recovering and get_resync_work,
0063      * but be take care in its use for anything else.
0064      */
0065     uint64_t in_sync_hint;
0066 
0067     /*
0068      * Workqueue for flush of clear region requests.
0069      */
0070     struct workqueue_struct *dmlog_wq;
0071     struct delayed_work flush_log_work;
0072     atomic_t sched_flush;
0073 
0074     /*
0075      * Combine userspace flush and mark requests for efficiency.
0076      */
0077     uint32_t integrated_flush;
0078 
0079     mempool_t flush_entry_pool;
0080 };
0081 
0082 static struct kmem_cache *_flush_entry_cache;
0083 
0084 static int userspace_do_request(struct log_c *lc, const char *uuid,
0085                 int request_type, char *data, size_t data_size,
0086                 char *rdata, size_t *rdata_size)
0087 {
0088     int r;
0089 
0090     /*
0091      * If the server isn't there, -ESRCH is returned,
0092      * and we must keep trying until the server is
0093      * restored.
0094      */
0095 retry:
0096     r = dm_consult_userspace(uuid, lc->luid, request_type, data,
0097                  data_size, rdata, rdata_size);
0098 
0099     if (r != -ESRCH)
0100         return r;
0101 
0102     DMERR(" Userspace log server not found.");
0103     while (1) {
0104         set_current_state(TASK_INTERRUPTIBLE);
0105         schedule_timeout(2*HZ);
0106         DMWARN("Attempting to contact userspace log server...");
0107         r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
0108                      lc->usr_argv_str,
0109                      strlen(lc->usr_argv_str) + 1,
0110                      NULL, NULL);
0111         if (!r)
0112             break;
0113     }
0114     DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
0115     r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
0116                  0, NULL, NULL);
0117     if (!r)
0118         goto retry;
0119 
0120     DMERR("Error trying to resume userspace log: %d", r);
0121 
0122     return -ESRCH;
0123 }
0124 
0125 static int build_constructor_string(struct dm_target *ti,
0126                     unsigned argc, char **argv,
0127                     char **ctr_str)
0128 {
0129     int i, str_size;
0130     char *str = NULL;
0131 
0132     *ctr_str = NULL;
0133 
0134     /*
0135      * Determine overall size of the string.
0136      */
0137     for (i = 0, str_size = 0; i < argc; i++)
0138         str_size += strlen(argv[i]) + 1; /* +1 for space between args */
0139 
0140     str_size += 20; /* Max number of chars in a printed u64 number */
0141 
0142     str = kzalloc(str_size, GFP_KERNEL);
0143     if (!str) {
0144         DMWARN("Unable to allocate memory for constructor string");
0145         return -ENOMEM;
0146     }
0147 
0148     str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
0149     for (i = 0; i < argc; i++)
0150         str_size += sprintf(str + str_size, " %s", argv[i]);
0151 
0152     *ctr_str = str;
0153     return str_size;
0154 }
0155 
0156 static void do_flush(struct work_struct *work)
0157 {
0158     int r;
0159     struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);
0160 
0161     atomic_set(&lc->sched_flush, 0);
0162 
0163     r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);
0164 
0165     if (r)
0166         dm_table_event(lc->ti->table);
0167 }
0168 
0169 /*
0170  * userspace_ctr
0171  *
0172  * argv contains:
0173  *  <UUID> [integrated_flush] <other args>
0174  * Where 'other args' are the userspace implementation-specific log
0175  * arguments.
0176  *
0177  * Example:
0178  *  <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
0179  *  <region_size> [[no]sync]
0180  *
0181  * This module strips off the <UUID> and uses it for identification
0182  * purposes when communicating with userspace about a log.
0183  *
0184  * If integrated_flush is defined, the kernel combines flush
0185  * and mark requests.
0186  *
0187  * The rest of the line, beginning with 'clustered-disk', is passed
0188  * to the userspace ctr function.
0189  */
0190 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
0191              unsigned argc, char **argv)
0192 {
0193     int r = 0;
0194     int str_size;
0195     char *ctr_str = NULL;
0196     struct log_c *lc = NULL;
0197     uint64_t rdata;
0198     size_t rdata_size = sizeof(rdata);
0199     char *devices_rdata = NULL;
0200     size_t devices_rdata_size = DM_NAME_LEN;
0201 
0202     if (argc < 3) {
0203         DMWARN("Too few arguments to userspace dirty log");
0204         return -EINVAL;
0205     }
0206 
0207     lc = kzalloc(sizeof(*lc), GFP_KERNEL);
0208     if (!lc) {
0209         DMWARN("Unable to allocate userspace log context.");
0210         return -ENOMEM;
0211     }
0212 
0213     /* The ptr value is sufficient for local unique id */
0214     lc->luid = (unsigned long)lc;
0215 
0216     lc->ti = ti;
0217 
0218     if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
0219         DMWARN("UUID argument too long.");
0220         kfree(lc);
0221         return -EINVAL;
0222     }
0223 
0224     lc->usr_argc = argc;
0225 
0226     strncpy(lc->uuid, argv[0], DM_UUID_LEN);
0227     argc--;
0228     argv++;
0229     spin_lock_init(&lc->flush_lock);
0230     INIT_LIST_HEAD(&lc->mark_list);
0231     INIT_LIST_HEAD(&lc->clear_list);
0232 
0233     if (!strcasecmp(argv[0], "integrated_flush")) {
0234         lc->integrated_flush = 1;
0235         argc--;
0236         argv++;
0237     }
0238 
0239     str_size = build_constructor_string(ti, argc, argv, &ctr_str);
0240     if (str_size < 0) {
0241         kfree(lc);
0242         return str_size;
0243     }
0244 
0245     devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
0246     if (!devices_rdata) {
0247         DMERR("Failed to allocate memory for device information");
0248         r = -ENOMEM;
0249         goto out;
0250     }
0251 
0252     r = mempool_init_slab_pool(&lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE,
0253                    _flush_entry_cache);
0254     if (r) {
0255         DMERR("Failed to create flush_entry_pool");
0256         goto out;
0257     }
0258 
0259     /*
0260      * Send table string and get back any opened device.
0261      */
0262     r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
0263                  ctr_str, str_size,
0264                  devices_rdata, &devices_rdata_size);
0265 
0266     if (r < 0) {
0267         if (r == -ESRCH)
0268             DMERR("Userspace log server not found");
0269         else
0270             DMERR("Userspace log server failed to create log");
0271         goto out;
0272     }
0273 
0274     /* Since the region size does not change, get it now */
0275     rdata_size = sizeof(rdata);
0276     r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
0277                  NULL, 0, (char *)&rdata, &rdata_size);
0278 
0279     if (r) {
0280         DMERR("Failed to get region size of dirty log");
0281         goto out;
0282     }
0283 
0284     lc->region_size = (uint32_t)rdata;
0285     lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
0286 
0287     if (devices_rdata_size) {
0288         if (devices_rdata[devices_rdata_size - 1] != '\0') {
0289             DMERR("DM_ULOG_CTR device return string not properly terminated");
0290             r = -EINVAL;
0291             goto out;
0292         }
0293         r = dm_get_device(ti, devices_rdata,
0294                   dm_table_get_mode(ti->table), &lc->log_dev);
0295         if (r)
0296             DMERR("Failed to register %s with device-mapper",
0297                   devices_rdata);
0298     }
0299 
0300     if (lc->integrated_flush) {
0301         lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
0302         if (!lc->dmlog_wq) {
0303             DMERR("couldn't start dmlogd");
0304             r = -ENOMEM;
0305             goto out;
0306         }
0307 
0308         INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
0309         atomic_set(&lc->sched_flush, 0);
0310     }
0311 
0312 out:
0313     kfree(devices_rdata);
0314     if (r) {
0315         mempool_exit(&lc->flush_entry_pool);
0316         kfree(lc);
0317         kfree(ctr_str);
0318     } else {
0319         lc->usr_argv_str = ctr_str;
0320         log->context = lc;
0321     }
0322 
0323     return r;
0324 }
0325 
0326 static void userspace_dtr(struct dm_dirty_log *log)
0327 {
0328     struct log_c *lc = log->context;
0329 
0330     if (lc->integrated_flush) {
0331         /* flush workqueue */
0332         if (atomic_read(&lc->sched_flush))
0333             flush_delayed_work(&lc->flush_log_work);
0334 
0335         destroy_workqueue(lc->dmlog_wq);
0336     }
0337 
0338     (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
0339                     NULL, 0, NULL, NULL);
0340 
0341     if (lc->log_dev)
0342         dm_put_device(lc->ti, lc->log_dev);
0343 
0344     mempool_exit(&lc->flush_entry_pool);
0345 
0346     kfree(lc->usr_argv_str);
0347     kfree(lc);
0348 
0349     return;
0350 }
0351 
0352 static int userspace_presuspend(struct dm_dirty_log *log)
0353 {
0354     int r;
0355     struct log_c *lc = log->context;
0356 
0357     r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
0358                  NULL, 0, NULL, NULL);
0359 
0360     return r;
0361 }
0362 
0363 static int userspace_postsuspend(struct dm_dirty_log *log)
0364 {
0365     int r;
0366     struct log_c *lc = log->context;
0367 
0368     /*
0369      * Run planned flush earlier.
0370      */
0371     if (lc->integrated_flush && atomic_read(&lc->sched_flush))
0372         flush_delayed_work(&lc->flush_log_work);
0373 
0374     r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
0375                  NULL, 0, NULL, NULL);
0376 
0377     return r;
0378 }
0379 
0380 static int userspace_resume(struct dm_dirty_log *log)
0381 {
0382     int r;
0383     struct log_c *lc = log->context;
0384 
0385     lc->in_sync_hint = 0;
0386     r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
0387                  NULL, 0, NULL, NULL);
0388 
0389     return r;
0390 }
0391 
0392 static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
0393 {
0394     struct log_c *lc = log->context;
0395 
0396     return lc->region_size;
0397 }
0398 
0399 /*
0400  * userspace_is_clean
0401  *
0402  * Check whether a region is clean.  If there is any sort of
0403  * failure when consulting the server, we return not clean.
0404  *
0405  * Returns: 1 if clean, 0 otherwise
0406  */
0407 static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
0408 {
0409     int r;
0410     uint64_t region64 = (uint64_t)region;
0411     int64_t is_clean;
0412     size_t rdata_size;
0413     struct log_c *lc = log->context;
0414 
0415     rdata_size = sizeof(is_clean);
0416     r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
0417                  (char *)&region64, sizeof(region64),
0418                  (char *)&is_clean, &rdata_size);
0419 
0420     return (r) ? 0 : (int)is_clean;
0421 }
0422 
0423 /*
0424  * userspace_in_sync
0425  *
0426  * Check if the region is in-sync.  If there is any sort
0427  * of failure when consulting the server, we assume that
0428  * the region is not in sync.
0429  *
0430  * If 'can_block' is set, return immediately
0431  *
0432  * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
0433  */
0434 static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
0435                  int can_block)
0436 {
0437     int r;
0438     uint64_t region64 = region;
0439     int64_t in_sync;
0440     size_t rdata_size;
0441     struct log_c *lc = log->context;
0442 
0443     /*
0444      * We can never respond directly - even if in_sync_hint is
0445      * set.  This is because another machine could see a device
0446      * failure and mark the region out-of-sync.  If we don't go
0447      * to userspace to ask, we might think the region is in-sync
0448      * and allow a read to pick up data that is stale.  (This is
0449      * very unlikely if a device actually fails; but it is very
0450      * likely if a connection to one device from one machine fails.)
0451      *
0452      * There still might be a problem if the mirror caches the region
0453      * state as in-sync... but then this call would not be made.  So,
0454      * that is a mirror problem.
0455      */
0456     if (!can_block)
0457         return -EWOULDBLOCK;
0458 
0459     rdata_size = sizeof(in_sync);
0460     r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
0461                  (char *)&region64, sizeof(region64),
0462                  (char *)&in_sync, &rdata_size);
0463     return (r) ? 0 : (int)in_sync;
0464 }
0465 
0466 static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
0467 {
0468     int r = 0;
0469     struct dm_dirty_log_flush_entry *fe;
0470 
0471     list_for_each_entry(fe, flush_list, list) {
0472         r = userspace_do_request(lc, lc->uuid, fe->type,
0473                      (char *)&fe->region,
0474                      sizeof(fe->region),
0475                      NULL, NULL);
0476         if (r)
0477             break;
0478     }
0479 
0480     return r;
0481 }
0482 
0483 static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
0484               int flush_with_payload)
0485 {
0486     int r = 0;
0487     int count;
0488     uint32_t type = 0;
0489     struct dm_dirty_log_flush_entry *fe, *tmp_fe;
0490     LIST_HEAD(tmp_list);
0491     uint64_t group[MAX_FLUSH_GROUP_COUNT];
0492 
0493     /*
0494      * Group process the requests
0495      */
0496     while (!list_empty(flush_list)) {
0497         count = 0;
0498 
0499         list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
0500             group[count] = fe->region;
0501             count++;
0502 
0503             list_move(&fe->list, &tmp_list);
0504 
0505             type = fe->type;
0506             if (count >= MAX_FLUSH_GROUP_COUNT)
0507                 break;
0508         }
0509 
0510         if (flush_with_payload) {
0511             r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
0512                          (char *)(group),
0513                          count * sizeof(uint64_t),
0514                          NULL, NULL);
0515             /*
0516              * Integrated flush failed.
0517              */
0518             if (r)
0519                 break;
0520         } else {
0521             r = userspace_do_request(lc, lc->uuid, type,
0522                          (char *)(group),
0523                          count * sizeof(uint64_t),
0524                          NULL, NULL);
0525             if (r) {
0526                 /*
0527                  * Group send failed.  Attempt one-by-one.
0528                  */
0529                 list_splice_init(&tmp_list, flush_list);
0530                 r = flush_one_by_one(lc, flush_list);
0531                 break;
0532             }
0533         }
0534     }
0535 
0536     /*
0537      * Must collect flush_entrys that were successfully processed
0538      * as a group so that they will be free'd by the caller.
0539      */
0540     list_splice_init(&tmp_list, flush_list);
0541 
0542     return r;
0543 }
0544 
0545 /*
0546  * userspace_flush
0547  *
0548  * This function is ok to block.
0549  * The flush happens in two stages.  First, it sends all
0550  * clear/mark requests that are on the list.  Then it
0551  * tells the server to commit them.  This gives the
0552  * server a chance to optimise the commit, instead of
0553  * doing it for every request.
0554  *
0555  * Additionally, we could implement another thread that
0556  * sends the requests up to the server - reducing the
0557  * load on flush.  Then the flush would have less in
0558  * the list and be responsible for the finishing commit.
0559  *
0560  * Returns: 0 on success, < 0 on failure
0561  */
0562 static int userspace_flush(struct dm_dirty_log *log)
0563 {
0564     int r = 0;
0565     unsigned long flags;
0566     struct log_c *lc = log->context;
0567     LIST_HEAD(mark_list);
0568     LIST_HEAD(clear_list);
0569     int mark_list_is_empty;
0570     int clear_list_is_empty;
0571     struct dm_dirty_log_flush_entry *fe, *tmp_fe;
0572     mempool_t *flush_entry_pool = &lc->flush_entry_pool;
0573 
0574     spin_lock_irqsave(&lc->flush_lock, flags);
0575     list_splice_init(&lc->mark_list, &mark_list);
0576     list_splice_init(&lc->clear_list, &clear_list);
0577     spin_unlock_irqrestore(&lc->flush_lock, flags);
0578 
0579     mark_list_is_empty = list_empty(&mark_list);
0580     clear_list_is_empty = list_empty(&clear_list);
0581 
0582     if (mark_list_is_empty && clear_list_is_empty)
0583         return 0;
0584 
0585     r = flush_by_group(lc, &clear_list, 0);
0586     if (r)
0587         goto out;
0588 
0589     if (!lc->integrated_flush) {
0590         r = flush_by_group(lc, &mark_list, 0);
0591         if (r)
0592             goto out;
0593         r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
0594                      NULL, 0, NULL, NULL);
0595         goto out;
0596     }
0597 
0598     /*
0599      * Send integrated flush request with mark_list as payload.
0600      */
0601     r = flush_by_group(lc, &mark_list, 1);
0602     if (r)
0603         goto out;
0604 
0605     if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
0606         /*
0607          * When there are only clear region requests,
0608          * we schedule a flush in the future.
0609          */
0610         queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
0611         atomic_set(&lc->sched_flush, 1);
0612     } else {
0613         /*
0614          * Cancel pending flush because we
0615          * have already flushed in mark_region.
0616          */
0617         cancel_delayed_work(&lc->flush_log_work);
0618         atomic_set(&lc->sched_flush, 0);
0619     }
0620 
0621 out:
0622     /*
0623      * We can safely remove these entries, even after failure.
0624      * Calling code will receive an error and will know that
0625      * the log facility has failed.
0626      */
0627     list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
0628         list_del(&fe->list);
0629         mempool_free(fe, flush_entry_pool);
0630     }
0631     list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
0632         list_del(&fe->list);
0633         mempool_free(fe, flush_entry_pool);
0634     }
0635 
0636     if (r)
0637         dm_table_event(lc->ti->table);
0638 
0639     return r;
0640 }
0641 
0642 /*
0643  * userspace_mark_region
0644  *
0645  * This function should avoid blocking unless absolutely required.
0646  * (Memory allocation is valid for blocking.)
0647  */
0648 static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
0649 {
0650     unsigned long flags;
0651     struct log_c *lc = log->context;
0652     struct dm_dirty_log_flush_entry *fe;
0653 
0654     /* Wait for an allocation, but _never_ fail */
0655     fe = mempool_alloc(&lc->flush_entry_pool, GFP_NOIO);
0656     BUG_ON(!fe);
0657 
0658     spin_lock_irqsave(&lc->flush_lock, flags);
0659     fe->type = DM_ULOG_MARK_REGION;
0660     fe->region = region;
0661     list_add(&fe->list, &lc->mark_list);
0662     spin_unlock_irqrestore(&lc->flush_lock, flags);
0663 
0664     return;
0665 }
0666 
0667 /*
0668  * userspace_clear_region
0669  *
0670  * This function must not block.
0671  * So, the alloc can't block.  In the worst case, it is ok to
0672  * fail.  It would simply mean we can't clear the region.
0673  * Does nothing to current sync context, but does mean
0674  * the region will be re-sync'ed on a reload of the mirror
0675  * even though it is in-sync.
0676  */
0677 static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
0678 {
0679     unsigned long flags;
0680     struct log_c *lc = log->context;
0681     struct dm_dirty_log_flush_entry *fe;
0682 
0683     /*
0684      * If we fail to allocate, we skip the clearing of
0685      * the region.  This doesn't hurt us in any way, except
0686      * to cause the region to be resync'ed when the
0687      * device is activated next time.
0688      */
0689     fe = mempool_alloc(&lc->flush_entry_pool, GFP_ATOMIC);
0690     if (!fe) {
0691         DMERR("Failed to allocate memory to clear region.");
0692         return;
0693     }
0694 
0695     spin_lock_irqsave(&lc->flush_lock, flags);
0696     fe->type = DM_ULOG_CLEAR_REGION;
0697     fe->region = region;
0698     list_add(&fe->list, &lc->clear_list);
0699     spin_unlock_irqrestore(&lc->flush_lock, flags);
0700 
0701     return;
0702 }
0703 
0704 /*
0705  * userspace_get_resync_work
0706  *
0707  * Get a region that needs recovery.  It is valid to return
0708  * an error for this function.
0709  *
0710  * Returns: 1 if region filled, 0 if no work, <0 on error
0711  */
0712 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
0713 {
0714     int r;
0715     size_t rdata_size;
0716     struct log_c *lc = log->context;
0717     struct {
0718         int64_t i; /* 64-bit for mix arch compatibility */
0719         region_t r;
0720     } pkg;
0721 
0722     if (lc->in_sync_hint >= lc->region_count)
0723         return 0;
0724 
0725     rdata_size = sizeof(pkg);
0726     r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
0727                  NULL, 0, (char *)&pkg, &rdata_size);
0728 
0729     *region = pkg.r;
0730     return (r) ? r : (int)pkg.i;
0731 }
0732 
0733 /*
0734  * userspace_set_region_sync
0735  *
0736  * Set the sync status of a given region.  This function
0737  * must not fail.
0738  */
0739 static void userspace_set_region_sync(struct dm_dirty_log *log,
0740                       region_t region, int in_sync)
0741 {
0742     struct log_c *lc = log->context;
0743     struct {
0744         region_t r;
0745         int64_t i;
0746     } pkg;
0747 
0748     pkg.r = region;
0749     pkg.i = (int64_t)in_sync;
0750 
0751     (void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
0752                     (char *)&pkg, sizeof(pkg), NULL, NULL);
0753 
0754     /*
0755      * It would be nice to be able to report failures.
0756      * However, it is easy enough to detect and resolve.
0757      */
0758     return;
0759 }
0760 
0761 /*
0762  * userspace_get_sync_count
0763  *
0764  * If there is any sort of failure when consulting the server,
0765  * we assume that the sync count is zero.
0766  *
0767  * Returns: sync count on success, 0 on failure
0768  */
0769 static region_t userspace_get_sync_count(struct dm_dirty_log *log)
0770 {
0771     int r;
0772     size_t rdata_size;
0773     uint64_t sync_count;
0774     struct log_c *lc = log->context;
0775 
0776     rdata_size = sizeof(sync_count);
0777     r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
0778                  NULL, 0, (char *)&sync_count, &rdata_size);
0779 
0780     if (r)
0781         return 0;
0782 
0783     if (sync_count >= lc->region_count)
0784         lc->in_sync_hint = lc->region_count;
0785 
0786     return (region_t)sync_count;
0787 }
0788 
0789 /*
0790  * userspace_status
0791  *
0792  * Returns: amount of space consumed
0793  */
0794 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
0795                 char *result, unsigned maxlen)
0796 {
0797     int r = 0;
0798     char *table_args;
0799     size_t sz = (size_t)maxlen;
0800     struct log_c *lc = log->context;
0801 
0802     switch (status_type) {
0803     case STATUSTYPE_INFO:
0804         r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
0805                      NULL, 0, result, &sz);
0806 
0807         if (r) {
0808             sz = 0;
0809             DMEMIT("%s 1 COM_FAILURE", log->type->name);
0810         }
0811         break;
0812     case STATUSTYPE_TABLE:
0813         sz = 0;
0814         table_args = strchr(lc->usr_argv_str, ' ');
0815         BUG_ON(!table_args); /* There will always be a ' ' */
0816         table_args++;
0817 
0818         DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
0819         if (lc->integrated_flush)
0820             DMEMIT("integrated_flush ");
0821         DMEMIT("%s ", table_args);
0822         break;
0823     case STATUSTYPE_IMA:
0824         *result = '\0';
0825         break;
0826     }
0827     return (r) ? 0 : (int)sz;
0828 }
0829 
0830 /*
0831  * userspace_is_remote_recovering
0832  *
0833  * Returns: 1 if region recovering, 0 otherwise
0834  */
0835 static int userspace_is_remote_recovering(struct dm_dirty_log *log,
0836                       region_t region)
0837 {
0838     int r;
0839     uint64_t region64 = region;
0840     struct log_c *lc = log->context;
0841     static unsigned long limit;
0842     struct {
0843         int64_t is_recovering;
0844         uint64_t in_sync_hint;
0845     } pkg;
0846     size_t rdata_size = sizeof(pkg);
0847 
0848     /*
0849      * Once the mirror has been reported to be in-sync,
0850      * it will never again ask for recovery work.  So,
0851      * we can safely say there is not a remote machine
0852      * recovering if the device is in-sync.  (in_sync_hint
0853      * must be reset at resume time.)
0854      */
0855     if (region < lc->in_sync_hint)
0856         return 0;
0857     else if (time_after(limit, jiffies))
0858         return 1;
0859 
0860     limit = jiffies + (HZ / 4);
0861     r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
0862                  (char *)&region64, sizeof(region64),
0863                  (char *)&pkg, &rdata_size);
0864     if (r)
0865         return 1;
0866 
0867     lc->in_sync_hint = pkg.in_sync_hint;
0868 
0869     return (int)pkg.is_recovering;
0870 }
0871 
0872 static struct dm_dirty_log_type _userspace_type = {
0873     .name = "userspace",
0874     .module = THIS_MODULE,
0875     .ctr = userspace_ctr,
0876     .dtr = userspace_dtr,
0877     .presuspend = userspace_presuspend,
0878     .postsuspend = userspace_postsuspend,
0879     .resume = userspace_resume,
0880     .get_region_size = userspace_get_region_size,
0881     .is_clean = userspace_is_clean,
0882     .in_sync = userspace_in_sync,
0883     .flush = userspace_flush,
0884     .mark_region = userspace_mark_region,
0885     .clear_region = userspace_clear_region,
0886     .get_resync_work = userspace_get_resync_work,
0887     .set_region_sync = userspace_set_region_sync,
0888     .get_sync_count = userspace_get_sync_count,
0889     .status = userspace_status,
0890     .is_remote_recovering = userspace_is_remote_recovering,
0891 };
0892 
0893 static int __init userspace_dirty_log_init(void)
0894 {
0895     int r = 0;
0896 
0897     _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0);
0898     if (!_flush_entry_cache) {
0899         DMWARN("Unable to create flush_entry_cache: No memory.");
0900         return -ENOMEM;
0901     }
0902 
0903     r = dm_ulog_tfr_init();
0904     if (r) {
0905         DMWARN("Unable to initialize userspace log communications");
0906         kmem_cache_destroy(_flush_entry_cache);
0907         return r;
0908     }
0909 
0910     r = dm_dirty_log_type_register(&_userspace_type);
0911     if (r) {
0912         DMWARN("Couldn't register userspace dirty log type");
0913         dm_ulog_tfr_exit();
0914         kmem_cache_destroy(_flush_entry_cache);
0915         return r;
0916     }
0917 
0918     DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
0919     return 0;
0920 }
0921 
0922 static void __exit userspace_dirty_log_exit(void)
0923 {
0924     dm_dirty_log_type_unregister(&_userspace_type);
0925     dm_ulog_tfr_exit();
0926     kmem_cache_destroy(_flush_entry_cache);
0927 
0928     DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
0929     return;
0930 }
0931 
0932 module_init(userspace_dirty_log_init);
0933 module_exit(userspace_dirty_log_exit);
0934 
0935 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
0936 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
0937 MODULE_LICENSE("GPL");