Back to home page

LXR

 
 

    


0001 /*
0002  * Public API and common code for kernel->userspace relay file support.
0003  *
0004  * See Documentation/filesystems/relay.txt for an overview.
0005  *
0006  * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
0007  * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
0008  *
0009  * Moved to kernel/relay.c by Paul Mundt, 2006.
0010  * November 2006 - CPU hotplug support by Mathieu Desnoyers
0011  *  (mathieu.desnoyers@polymtl.ca)
0012  *
0013  * This file is released under the GPL.
0014  */
0015 #include <linux/errno.h>
0016 #include <linux/stddef.h>
0017 #include <linux/slab.h>
0018 #include <linux/export.h>
0019 #include <linux/string.h>
0020 #include <linux/relay.h>
0021 #include <linux/vmalloc.h>
0022 #include <linux/mm.h>
0023 #include <linux/cpu.h>
0024 #include <linux/splice.h>
0025 
0026 /* list of open channels, for cpu hotplug */
0027 static DEFINE_MUTEX(relay_channels_mutex);
0028 static LIST_HEAD(relay_channels);
0029 
0030 /*
0031  * close() vm_op implementation for relay file mapping.
0032  */
0033 static void relay_file_mmap_close(struct vm_area_struct *vma)
0034 {
0035     struct rchan_buf *buf = vma->vm_private_data;
0036     buf->chan->cb->buf_unmapped(buf, vma->vm_file);
0037 }
0038 
0039 /*
0040  * fault() vm_op implementation for relay file mapping.
0041  */
0042 static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
0043 {
0044     struct page *page;
0045     struct rchan_buf *buf = vma->vm_private_data;
0046     pgoff_t pgoff = vmf->pgoff;
0047 
0048     if (!buf)
0049         return VM_FAULT_OOM;
0050 
0051     page = vmalloc_to_page(buf->start + (pgoff << PAGE_SHIFT));
0052     if (!page)
0053         return VM_FAULT_SIGBUS;
0054     get_page(page);
0055     vmf->page = page;
0056 
0057     return 0;
0058 }
0059 
0060 /*
0061  * vm_ops for relay file mappings.
0062  */
0063 static const struct vm_operations_struct relay_file_mmap_ops = {
0064     .fault = relay_buf_fault,
0065     .close = relay_file_mmap_close,
0066 };
0067 
0068 /*
0069  * allocate an array of pointers of struct page
0070  */
0071 static struct page **relay_alloc_page_array(unsigned int n_pages)
0072 {
0073     const size_t pa_size = n_pages * sizeof(struct page *);
0074     if (pa_size > PAGE_SIZE)
0075         return vzalloc(pa_size);
0076     return kzalloc(pa_size, GFP_KERNEL);
0077 }
0078 
0079 /*
0080  * free an array of pointers of struct page
0081  */
0082 static void relay_free_page_array(struct page **array)
0083 {
0084     kvfree(array);
0085 }
0086 
0087 /**
0088  *  relay_mmap_buf: - mmap channel buffer to process address space
0089  *  @buf: relay channel buffer
0090  *  @vma: vm_area_struct describing memory to be mapped
0091  *
0092  *  Returns 0 if ok, negative on error
0093  *
0094  *  Caller should already have grabbed mmap_sem.
0095  */
0096 static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
0097 {
0098     unsigned long length = vma->vm_end - vma->vm_start;
0099     struct file *filp = vma->vm_file;
0100 
0101     if (!buf)
0102         return -EBADF;
0103 
0104     if (length != (unsigned long)buf->chan->alloc_size)
0105         return -EINVAL;
0106 
0107     vma->vm_ops = &relay_file_mmap_ops;
0108     vma->vm_flags |= VM_DONTEXPAND;
0109     vma->vm_private_data = buf;
0110     buf->chan->cb->buf_mapped(buf, filp);
0111 
0112     return 0;
0113 }
0114 
0115 /**
0116  *  relay_alloc_buf - allocate a channel buffer
0117  *  @buf: the buffer struct
0118  *  @size: total size of the buffer
0119  *
0120  *  Returns a pointer to the resulting buffer, %NULL if unsuccessful. The
0121  *  passed in size will get page aligned, if it isn't already.
0122  */
0123 static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
0124 {
0125     void *mem;
0126     unsigned int i, j, n_pages;
0127 
0128     *size = PAGE_ALIGN(*size);
0129     n_pages = *size >> PAGE_SHIFT;
0130 
0131     buf->page_array = relay_alloc_page_array(n_pages);
0132     if (!buf->page_array)
0133         return NULL;
0134 
0135     for (i = 0; i < n_pages; i++) {
0136         buf->page_array[i] = alloc_page(GFP_KERNEL);
0137         if (unlikely(!buf->page_array[i]))
0138             goto depopulate;
0139         set_page_private(buf->page_array[i], (unsigned long)buf);
0140     }
0141     mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
0142     if (!mem)
0143         goto depopulate;
0144 
0145     memset(mem, 0, *size);
0146     buf->page_count = n_pages;
0147     return mem;
0148 
0149 depopulate:
0150     for (j = 0; j < i; j++)
0151         __free_page(buf->page_array[j]);
0152     relay_free_page_array(buf->page_array);
0153     return NULL;
0154 }
0155 
0156 /**
0157  *  relay_create_buf - allocate and initialize a channel buffer
0158  *  @chan: the relay channel
0159  *
0160  *  Returns channel buffer if successful, %NULL otherwise.
0161  */
0162 static struct rchan_buf *relay_create_buf(struct rchan *chan)
0163 {
0164     struct rchan_buf *buf;
0165 
0166     if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
0167         return NULL;
0168 
0169     buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
0170     if (!buf)
0171         return NULL;
0172     buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
0173     if (!buf->padding)
0174         goto free_buf;
0175 
0176     buf->start = relay_alloc_buf(buf, &chan->alloc_size);
0177     if (!buf->start)
0178         goto free_buf;
0179 
0180     buf->chan = chan;
0181     kref_get(&buf->chan->kref);
0182     return buf;
0183 
0184 free_buf:
0185     kfree(buf->padding);
0186     kfree(buf);
0187     return NULL;
0188 }
0189 
0190 /**
0191  *  relay_destroy_channel - free the channel struct
0192  *  @kref: target kernel reference that contains the relay channel
0193  *
0194  *  Should only be called from kref_put().
0195  */
0196 static void relay_destroy_channel(struct kref *kref)
0197 {
0198     struct rchan *chan = container_of(kref, struct rchan, kref);
0199     kfree(chan);
0200 }
0201 
0202 /**
0203  *  relay_destroy_buf - destroy an rchan_buf struct and associated buffer
0204  *  @buf: the buffer struct
0205  */
0206 static void relay_destroy_buf(struct rchan_buf *buf)
0207 {
0208     struct rchan *chan = buf->chan;
0209     unsigned int i;
0210 
0211     if (likely(buf->start)) {
0212         vunmap(buf->start);
0213         for (i = 0; i < buf->page_count; i++)
0214             __free_page(buf->page_array[i]);
0215         relay_free_page_array(buf->page_array);
0216     }
0217     *per_cpu_ptr(chan->buf, buf->cpu) = NULL;
0218     kfree(buf->padding);
0219     kfree(buf);
0220     kref_put(&chan->kref, relay_destroy_channel);
0221 }
0222 
0223 /**
0224  *  relay_remove_buf - remove a channel buffer
0225  *  @kref: target kernel reference that contains the relay buffer
0226  *
0227  *  Removes the file from the filesystem, which also frees the
0228  *  rchan_buf_struct and the channel buffer.  Should only be called from
0229  *  kref_put().
0230  */
0231 static void relay_remove_buf(struct kref *kref)
0232 {
0233     struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
0234     relay_destroy_buf(buf);
0235 }
0236 
0237 /**
0238  *  relay_buf_empty - boolean, is the channel buffer empty?
0239  *  @buf: channel buffer
0240  *
0241  *  Returns 1 if the buffer is empty, 0 otherwise.
0242  */
0243 static int relay_buf_empty(struct rchan_buf *buf)
0244 {
0245     return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
0246 }
0247 
0248 /**
0249  *  relay_buf_full - boolean, is the channel buffer full?
0250  *  @buf: channel buffer
0251  *
0252  *  Returns 1 if the buffer is full, 0 otherwise.
0253  */
0254 int relay_buf_full(struct rchan_buf *buf)
0255 {
0256     size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
0257     return (ready >= buf->chan->n_subbufs) ? 1 : 0;
0258 }
0259 EXPORT_SYMBOL_GPL(relay_buf_full);
0260 
0261 /*
0262  * High-level relay kernel API and associated functions.
0263  */
0264 
0265 /*
0266  * rchan_callback implementations defining default channel behavior.  Used
0267  * in place of corresponding NULL values in client callback struct.
0268  */
0269 
0270 /*
0271  * subbuf_start() default callback.  Does nothing.
0272  */
0273 static int subbuf_start_default_callback (struct rchan_buf *buf,
0274                       void *subbuf,
0275                       void *prev_subbuf,
0276                       size_t prev_padding)
0277 {
0278     if (relay_buf_full(buf))
0279         return 0;
0280 
0281     return 1;
0282 }
0283 
0284 /*
0285  * buf_mapped() default callback.  Does nothing.
0286  */
0287 static void buf_mapped_default_callback(struct rchan_buf *buf,
0288                     struct file *filp)
0289 {
0290 }
0291 
0292 /*
0293  * buf_unmapped() default callback.  Does nothing.
0294  */
0295 static void buf_unmapped_default_callback(struct rchan_buf *buf,
0296                       struct file *filp)
0297 {
0298 }
0299 
0300 /*
0301  * create_buf_file_create() default callback.  Does nothing.
0302  */
0303 static struct dentry *create_buf_file_default_callback(const char *filename,
0304                                struct dentry *parent,
0305                                umode_t mode,
0306                                struct rchan_buf *buf,
0307                                int *is_global)
0308 {
0309     return NULL;
0310 }
0311 
0312 /*
0313  * remove_buf_file() default callback.  Does nothing.
0314  */
0315 static int remove_buf_file_default_callback(struct dentry *dentry)
0316 {
0317     return -EINVAL;
0318 }
0319 
0320 /* relay channel default callbacks */
0321 static struct rchan_callbacks default_channel_callbacks = {
0322     .subbuf_start = subbuf_start_default_callback,
0323     .buf_mapped = buf_mapped_default_callback,
0324     .buf_unmapped = buf_unmapped_default_callback,
0325     .create_buf_file = create_buf_file_default_callback,
0326     .remove_buf_file = remove_buf_file_default_callback,
0327 };
0328 
0329 /**
0330  *  wakeup_readers - wake up readers waiting on a channel
0331  *  @work: contains the channel buffer
0332  *
0333  *  This is the function used to defer reader waking
0334  */
0335 static void wakeup_readers(struct irq_work *work)
0336 {
0337     struct rchan_buf *buf;
0338 
0339     buf = container_of(work, struct rchan_buf, wakeup_work);
0340     wake_up_interruptible(&buf->read_wait);
0341 }
0342 
0343 /**
0344  *  __relay_reset - reset a channel buffer
0345  *  @buf: the channel buffer
0346  *  @init: 1 if this is a first-time initialization
0347  *
0348  *  See relay_reset() for description of effect.
0349  */
0350 static void __relay_reset(struct rchan_buf *buf, unsigned int init)
0351 {
0352     size_t i;
0353 
0354     if (init) {
0355         init_waitqueue_head(&buf->read_wait);
0356         kref_init(&buf->kref);
0357         init_irq_work(&buf->wakeup_work, wakeup_readers);
0358     } else {
0359         irq_work_sync(&buf->wakeup_work);
0360     }
0361 
0362     buf->subbufs_produced = 0;
0363     buf->subbufs_consumed = 0;
0364     buf->bytes_consumed = 0;
0365     buf->finalized = 0;
0366     buf->data = buf->start;
0367     buf->offset = 0;
0368 
0369     for (i = 0; i < buf->chan->n_subbufs; i++)
0370         buf->padding[i] = 0;
0371 
0372     buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
0373 }
0374 
0375 /**
0376  *  relay_reset - reset the channel
0377  *  @chan: the channel
0378  *
0379  *  This has the effect of erasing all data from all channel buffers
0380  *  and restarting the channel in its initial state.  The buffers
0381  *  are not freed, so any mappings are still in effect.
0382  *
0383  *  NOTE. Care should be taken that the channel isn't actually
0384  *  being used by anything when this call is made.
0385  */
0386 void relay_reset(struct rchan *chan)
0387 {
0388     struct rchan_buf *buf;
0389     unsigned int i;
0390 
0391     if (!chan)
0392         return;
0393 
0394     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
0395         __relay_reset(buf, 0);
0396         return;
0397     }
0398 
0399     mutex_lock(&relay_channels_mutex);
0400     for_each_possible_cpu(i)
0401         if ((buf = *per_cpu_ptr(chan->buf, i)))
0402             __relay_reset(buf, 0);
0403     mutex_unlock(&relay_channels_mutex);
0404 }
0405 EXPORT_SYMBOL_GPL(relay_reset);
0406 
0407 static inline void relay_set_buf_dentry(struct rchan_buf *buf,
0408                     struct dentry *dentry)
0409 {
0410     buf->dentry = dentry;
0411     d_inode(buf->dentry)->i_size = buf->early_bytes;
0412 }
0413 
0414 static struct dentry *relay_create_buf_file(struct rchan *chan,
0415                         struct rchan_buf *buf,
0416                         unsigned int cpu)
0417 {
0418     struct dentry *dentry;
0419     char *tmpname;
0420 
0421     tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
0422     if (!tmpname)
0423         return NULL;
0424     snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
0425 
0426     /* Create file in fs */
0427     dentry = chan->cb->create_buf_file(tmpname, chan->parent,
0428                        S_IRUSR, buf,
0429                        &chan->is_global);
0430 
0431     kfree(tmpname);
0432 
0433     return dentry;
0434 }
0435 
0436 /*
0437  *  relay_open_buf - create a new relay channel buffer
0438  *
0439  *  used by relay_open() and CPU hotplug.
0440  */
0441 static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
0442 {
0443     struct rchan_buf *buf = NULL;
0444     struct dentry *dentry;
0445 
0446     if (chan->is_global)
0447         return *per_cpu_ptr(chan->buf, 0);
0448 
0449     buf = relay_create_buf(chan);
0450     if (!buf)
0451         return NULL;
0452 
0453     if (chan->has_base_filename) {
0454         dentry = relay_create_buf_file(chan, buf, cpu);
0455         if (!dentry)
0456             goto free_buf;
0457         relay_set_buf_dentry(buf, dentry);
0458     } else {
0459         /* Only retrieve global info, nothing more, nothing less */
0460         dentry = chan->cb->create_buf_file(NULL, NULL,
0461                            S_IRUSR, buf,
0462                            &chan->is_global);
0463         if (WARN_ON(dentry))
0464             goto free_buf;
0465     }
0466 
0467     buf->cpu = cpu;
0468     __relay_reset(buf, 1);
0469 
0470     if(chan->is_global) {
0471         *per_cpu_ptr(chan->buf, 0) = buf;
0472         buf->cpu = 0;
0473     }
0474 
0475     return buf;
0476 
0477 free_buf:
0478     relay_destroy_buf(buf);
0479     return NULL;
0480 }
0481 
0482 /**
0483  *  relay_close_buf - close a channel buffer
0484  *  @buf: channel buffer
0485  *
0486  *  Marks the buffer finalized and restores the default callbacks.
0487  *  The channel buffer and channel buffer data structure are then freed
0488  *  automatically when the last reference is given up.
0489  */
0490 static void relay_close_buf(struct rchan_buf *buf)
0491 {
0492     buf->finalized = 1;
0493     irq_work_sync(&buf->wakeup_work);
0494     buf->chan->cb->remove_buf_file(buf->dentry);
0495     kref_put(&buf->kref, relay_remove_buf);
0496 }
0497 
0498 static void setup_callbacks(struct rchan *chan,
0499                    struct rchan_callbacks *cb)
0500 {
0501     if (!cb) {
0502         chan->cb = &default_channel_callbacks;
0503         return;
0504     }
0505 
0506     if (!cb->subbuf_start)
0507         cb->subbuf_start = subbuf_start_default_callback;
0508     if (!cb->buf_mapped)
0509         cb->buf_mapped = buf_mapped_default_callback;
0510     if (!cb->buf_unmapped)
0511         cb->buf_unmapped = buf_unmapped_default_callback;
0512     if (!cb->create_buf_file)
0513         cb->create_buf_file = create_buf_file_default_callback;
0514     if (!cb->remove_buf_file)
0515         cb->remove_buf_file = remove_buf_file_default_callback;
0516     chan->cb = cb;
0517 }
0518 
0519 int relay_prepare_cpu(unsigned int cpu)
0520 {
0521     struct rchan *chan;
0522     struct rchan_buf *buf;
0523 
0524     mutex_lock(&relay_channels_mutex);
0525     list_for_each_entry(chan, &relay_channels, list) {
0526         if ((buf = *per_cpu_ptr(chan->buf, cpu)))
0527             continue;
0528         buf = relay_open_buf(chan, cpu);
0529         if (!buf) {
0530             pr_err("relay: cpu %d buffer creation failed\n", cpu);
0531             mutex_unlock(&relay_channels_mutex);
0532             return -ENOMEM;
0533         }
0534         *per_cpu_ptr(chan->buf, cpu) = buf;
0535     }
0536     mutex_unlock(&relay_channels_mutex);
0537     return 0;
0538 }
0539 
0540 /**
0541  *  relay_open - create a new relay channel
0542  *  @base_filename: base name of files to create, %NULL for buffering only
0543  *  @parent: dentry of parent directory, %NULL for root directory or buffer
0544  *  @subbuf_size: size of sub-buffers
0545  *  @n_subbufs: number of sub-buffers
0546  *  @cb: client callback functions
0547  *  @private_data: user-defined data
0548  *
0549  *  Returns channel pointer if successful, %NULL otherwise.
0550  *
0551  *  Creates a channel buffer for each cpu using the sizes and
0552  *  attributes specified.  The created channel buffer files
0553  *  will be named base_filename0...base_filenameN-1.  File
0554  *  permissions will be %S_IRUSR.
0555  *
0556  *  If opening a buffer (@parent = NULL) that you later wish to register
0557  *  in a filesystem, call relay_late_setup_files() once the @parent dentry
0558  *  is available.
0559  */
0560 struct rchan *relay_open(const char *base_filename,
0561              struct dentry *parent,
0562              size_t subbuf_size,
0563              size_t n_subbufs,
0564              struct rchan_callbacks *cb,
0565              void *private_data)
0566 {
0567     unsigned int i;
0568     struct rchan *chan;
0569     struct rchan_buf *buf;
0570 
0571     if (!(subbuf_size && n_subbufs))
0572         return NULL;
0573     if (subbuf_size > UINT_MAX / n_subbufs)
0574         return NULL;
0575 
0576     chan = kzalloc(sizeof(struct rchan), GFP_KERNEL);
0577     if (!chan)
0578         return NULL;
0579 
0580     chan->buf = alloc_percpu(struct rchan_buf *);
0581     chan->version = RELAYFS_CHANNEL_VERSION;
0582     chan->n_subbufs = n_subbufs;
0583     chan->subbuf_size = subbuf_size;
0584     chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs);
0585     chan->parent = parent;
0586     chan->private_data = private_data;
0587     if (base_filename) {
0588         chan->has_base_filename = 1;
0589         strlcpy(chan->base_filename, base_filename, NAME_MAX);
0590     }
0591     setup_callbacks(chan, cb);
0592     kref_init(&chan->kref);
0593 
0594     mutex_lock(&relay_channels_mutex);
0595     for_each_online_cpu(i) {
0596         buf = relay_open_buf(chan, i);
0597         if (!buf)
0598             goto free_bufs;
0599         *per_cpu_ptr(chan->buf, i) = buf;
0600     }
0601     list_add(&chan->list, &relay_channels);
0602     mutex_unlock(&relay_channels_mutex);
0603 
0604     return chan;
0605 
0606 free_bufs:
0607     for_each_possible_cpu(i) {
0608         if ((buf = *per_cpu_ptr(chan->buf, i)))
0609             relay_close_buf(buf);
0610     }
0611 
0612     kref_put(&chan->kref, relay_destroy_channel);
0613     mutex_unlock(&relay_channels_mutex);
0614     kfree(chan);
0615     return NULL;
0616 }
0617 EXPORT_SYMBOL_GPL(relay_open);
0618 
0619 struct rchan_percpu_buf_dispatcher {
0620     struct rchan_buf *buf;
0621     struct dentry *dentry;
0622 };
0623 
0624 /* Called in atomic context. */
0625 static void __relay_set_buf_dentry(void *info)
0626 {
0627     struct rchan_percpu_buf_dispatcher *p = info;
0628 
0629     relay_set_buf_dentry(p->buf, p->dentry);
0630 }
0631 
0632 /**
0633  *  relay_late_setup_files - triggers file creation
0634  *  @chan: channel to operate on
0635  *  @base_filename: base name of files to create
0636  *  @parent: dentry of parent directory, %NULL for root directory
0637  *
0638  *  Returns 0 if successful, non-zero otherwise.
0639  *
0640  *  Use to setup files for a previously buffer-only channel created
0641  *  by relay_open() with a NULL parent dentry.
0642  *
0643  *  For example, this is useful for perfomring early tracing in kernel,
0644  *  before VFS is up and then exposing the early results once the dentry
0645  *  is available.
0646  */
0647 int relay_late_setup_files(struct rchan *chan,
0648                const char *base_filename,
0649                struct dentry *parent)
0650 {
0651     int err = 0;
0652     unsigned int i, curr_cpu;
0653     unsigned long flags;
0654     struct dentry *dentry;
0655     struct rchan_buf *buf;
0656     struct rchan_percpu_buf_dispatcher disp;
0657 
0658     if (!chan || !base_filename)
0659         return -EINVAL;
0660 
0661     strlcpy(chan->base_filename, base_filename, NAME_MAX);
0662 
0663     mutex_lock(&relay_channels_mutex);
0664     /* Is chan already set up? */
0665     if (unlikely(chan->has_base_filename)) {
0666         mutex_unlock(&relay_channels_mutex);
0667         return -EEXIST;
0668     }
0669     chan->has_base_filename = 1;
0670     chan->parent = parent;
0671 
0672     if (chan->is_global) {
0673         err = -EINVAL;
0674         buf = *per_cpu_ptr(chan->buf, 0);
0675         if (!WARN_ON_ONCE(!buf)) {
0676             dentry = relay_create_buf_file(chan, buf, 0);
0677             if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
0678                 relay_set_buf_dentry(buf, dentry);
0679                 err = 0;
0680             }
0681         }
0682         mutex_unlock(&relay_channels_mutex);
0683         return err;
0684     }
0685 
0686     curr_cpu = get_cpu();
0687     /*
0688      * The CPU hotplug notifier ran before us and created buffers with
0689      * no files associated. So it's safe to call relay_setup_buf_file()
0690      * on all currently online CPUs.
0691      */
0692     for_each_online_cpu(i) {
0693         buf = *per_cpu_ptr(chan->buf, i);
0694         if (unlikely(!buf)) {
0695             WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
0696             err = -EINVAL;
0697             break;
0698         }
0699 
0700         dentry = relay_create_buf_file(chan, buf, i);
0701         if (unlikely(!dentry)) {
0702             err = -EINVAL;
0703             break;
0704         }
0705 
0706         if (curr_cpu == i) {
0707             local_irq_save(flags);
0708             relay_set_buf_dentry(buf, dentry);
0709             local_irq_restore(flags);
0710         } else {
0711             disp.buf = buf;
0712             disp.dentry = dentry;
0713             smp_mb();
0714             /* relay_channels_mutex must be held, so wait. */
0715             err = smp_call_function_single(i,
0716                                __relay_set_buf_dentry,
0717                                &disp, 1);
0718         }
0719         if (unlikely(err))
0720             break;
0721     }
0722     put_cpu();
0723     mutex_unlock(&relay_channels_mutex);
0724 
0725     return err;
0726 }
0727 EXPORT_SYMBOL_GPL(relay_late_setup_files);
0728 
0729 /**
0730  *  relay_switch_subbuf - switch to a new sub-buffer
0731  *  @buf: channel buffer
0732  *  @length: size of current event
0733  *
0734  *  Returns either the length passed in or 0 if full.
0735  *
0736  *  Performs sub-buffer-switch tasks such as invoking callbacks,
0737  *  updating padding counts, waking up readers, etc.
0738  */
0739 size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
0740 {
0741     void *old, *new;
0742     size_t old_subbuf, new_subbuf;
0743 
0744     if (unlikely(length > buf->chan->subbuf_size))
0745         goto toobig;
0746 
0747     if (buf->offset != buf->chan->subbuf_size + 1) {
0748         buf->prev_padding = buf->chan->subbuf_size - buf->offset;
0749         old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
0750         buf->padding[old_subbuf] = buf->prev_padding;
0751         buf->subbufs_produced++;
0752         if (buf->dentry)
0753             d_inode(buf->dentry)->i_size +=
0754                 buf->chan->subbuf_size -
0755                 buf->padding[old_subbuf];
0756         else
0757             buf->early_bytes += buf->chan->subbuf_size -
0758                         buf->padding[old_subbuf];
0759         smp_mb();
0760         if (waitqueue_active(&buf->read_wait)) {
0761             /*
0762              * Calling wake_up_interruptible() from here
0763              * will deadlock if we happen to be logging
0764              * from the scheduler (trying to re-grab
0765              * rq->lock), so defer it.
0766              */
0767             irq_work_queue(&buf->wakeup_work);
0768         }
0769     }
0770 
0771     old = buf->data;
0772     new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
0773     new = buf->start + new_subbuf * buf->chan->subbuf_size;
0774     buf->offset = 0;
0775     if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
0776         buf->offset = buf->chan->subbuf_size + 1;
0777         return 0;
0778     }
0779     buf->data = new;
0780     buf->padding[new_subbuf] = 0;
0781 
0782     if (unlikely(length + buf->offset > buf->chan->subbuf_size))
0783         goto toobig;
0784 
0785     return length;
0786 
0787 toobig:
0788     buf->chan->last_toobig = length;
0789     return 0;
0790 }
0791 EXPORT_SYMBOL_GPL(relay_switch_subbuf);
0792 
0793 /**
0794  *  relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
0795  *  @chan: the channel
0796  *  @cpu: the cpu associated with the channel buffer to update
0797  *  @subbufs_consumed: number of sub-buffers to add to current buf's count
0798  *
0799  *  Adds to the channel buffer's consumed sub-buffer count.
0800  *  subbufs_consumed should be the number of sub-buffers newly consumed,
0801  *  not the total consumed.
0802  *
0803  *  NOTE. Kernel clients don't need to call this function if the channel
0804  *  mode is 'overwrite'.
0805  */
0806 void relay_subbufs_consumed(struct rchan *chan,
0807                 unsigned int cpu,
0808                 size_t subbufs_consumed)
0809 {
0810     struct rchan_buf *buf;
0811 
0812     if (!chan || cpu >= NR_CPUS)
0813         return;
0814 
0815     buf = *per_cpu_ptr(chan->buf, cpu);
0816     if (!buf || subbufs_consumed > chan->n_subbufs)
0817         return;
0818 
0819     if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
0820         buf->subbufs_consumed = buf->subbufs_produced;
0821     else
0822         buf->subbufs_consumed += subbufs_consumed;
0823 }
0824 EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
0825 
0826 /**
0827  *  relay_close - close the channel
0828  *  @chan: the channel
0829  *
0830  *  Closes all channel buffers and frees the channel.
0831  */
0832 void relay_close(struct rchan *chan)
0833 {
0834     struct rchan_buf *buf;
0835     unsigned int i;
0836 
0837     if (!chan)
0838         return;
0839 
0840     mutex_lock(&relay_channels_mutex);
0841     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0)))
0842         relay_close_buf(buf);
0843     else
0844         for_each_possible_cpu(i)
0845             if ((buf = *per_cpu_ptr(chan->buf, i)))
0846                 relay_close_buf(buf);
0847 
0848     if (chan->last_toobig)
0849         printk(KERN_WARNING "relay: one or more items not logged "
0850                "[item size (%Zd) > sub-buffer size (%Zd)]\n",
0851                chan->last_toobig, chan->subbuf_size);
0852 
0853     list_del(&chan->list);
0854     kref_put(&chan->kref, relay_destroy_channel);
0855     mutex_unlock(&relay_channels_mutex);
0856 }
0857 EXPORT_SYMBOL_GPL(relay_close);
0858 
0859 /**
0860  *  relay_flush - close the channel
0861  *  @chan: the channel
0862  *
0863  *  Flushes all channel buffers, i.e. forces buffer switch.
0864  */
0865 void relay_flush(struct rchan *chan)
0866 {
0867     struct rchan_buf *buf;
0868     unsigned int i;
0869 
0870     if (!chan)
0871         return;
0872 
0873     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
0874         relay_switch_subbuf(buf, 0);
0875         return;
0876     }
0877 
0878     mutex_lock(&relay_channels_mutex);
0879     for_each_possible_cpu(i)
0880         if ((buf = *per_cpu_ptr(chan->buf, i)))
0881             relay_switch_subbuf(buf, 0);
0882     mutex_unlock(&relay_channels_mutex);
0883 }
0884 EXPORT_SYMBOL_GPL(relay_flush);
0885 
0886 /**
0887  *  relay_file_open - open file op for relay files
0888  *  @inode: the inode
0889  *  @filp: the file
0890  *
0891  *  Increments the channel buffer refcount.
0892  */
0893 static int relay_file_open(struct inode *inode, struct file *filp)
0894 {
0895     struct rchan_buf *buf = inode->i_private;
0896     kref_get(&buf->kref);
0897     filp->private_data = buf;
0898 
0899     return nonseekable_open(inode, filp);
0900 }
0901 
0902 /**
0903  *  relay_file_mmap - mmap file op for relay files
0904  *  @filp: the file
0905  *  @vma: the vma describing what to map
0906  *
0907  *  Calls upon relay_mmap_buf() to map the file into user space.
0908  */
0909 static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
0910 {
0911     struct rchan_buf *buf = filp->private_data;
0912     return relay_mmap_buf(buf, vma);
0913 }
0914 
0915 /**
0916  *  relay_file_poll - poll file op for relay files
0917  *  @filp: the file
0918  *  @wait: poll table
0919  *
0920  *  Poll implemention.
0921  */
0922 static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
0923 {
0924     unsigned int mask = 0;
0925     struct rchan_buf *buf = filp->private_data;
0926 
0927     if (buf->finalized)
0928         return POLLERR;
0929 
0930     if (filp->f_mode & FMODE_READ) {
0931         poll_wait(filp, &buf->read_wait, wait);
0932         if (!relay_buf_empty(buf))
0933             mask |= POLLIN | POLLRDNORM;
0934     }
0935 
0936     return mask;
0937 }
0938 
0939 /**
0940  *  relay_file_release - release file op for relay files
0941  *  @inode: the inode
0942  *  @filp: the file
0943  *
0944  *  Decrements the channel refcount, as the filesystem is
0945  *  no longer using it.
0946  */
0947 static int relay_file_release(struct inode *inode, struct file *filp)
0948 {
0949     struct rchan_buf *buf = filp->private_data;
0950     kref_put(&buf->kref, relay_remove_buf);
0951 
0952     return 0;
0953 }
0954 
0955 /*
0956  *  relay_file_read_consume - update the consumed count for the buffer
0957  */
0958 static void relay_file_read_consume(struct rchan_buf *buf,
0959                     size_t read_pos,
0960                     size_t bytes_consumed)
0961 {
0962     size_t subbuf_size = buf->chan->subbuf_size;
0963     size_t n_subbufs = buf->chan->n_subbufs;
0964     size_t read_subbuf;
0965 
0966     if (buf->subbufs_produced == buf->subbufs_consumed &&
0967         buf->offset == buf->bytes_consumed)
0968         return;
0969 
0970     if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
0971         relay_subbufs_consumed(buf->chan, buf->cpu, 1);
0972         buf->bytes_consumed = 0;
0973     }
0974 
0975     buf->bytes_consumed += bytes_consumed;
0976     if (!read_pos)
0977         read_subbuf = buf->subbufs_consumed % n_subbufs;
0978     else
0979         read_subbuf = read_pos / buf->chan->subbuf_size;
0980     if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
0981         if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
0982             (buf->offset == subbuf_size))
0983             return;
0984         relay_subbufs_consumed(buf->chan, buf->cpu, 1);
0985         buf->bytes_consumed = 0;
0986     }
0987 }
0988 
0989 /*
0990  *  relay_file_read_avail - boolean, are there unconsumed bytes available?
0991  */
0992 static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
0993 {
0994     size_t subbuf_size = buf->chan->subbuf_size;
0995     size_t n_subbufs = buf->chan->n_subbufs;
0996     size_t produced = buf->subbufs_produced;
0997     size_t consumed = buf->subbufs_consumed;
0998 
0999     relay_file_read_consume(buf, read_pos, 0);
1000 
1001     consumed = buf->subbufs_consumed;
1002 
1003     if (unlikely(buf->offset > subbuf_size)) {
1004         if (produced == consumed)
1005             return 0;
1006         return 1;
1007     }
1008 
1009     if (unlikely(produced - consumed >= n_subbufs)) {
1010         consumed = produced - n_subbufs + 1;
1011         buf->subbufs_consumed = consumed;
1012         buf->bytes_consumed = 0;
1013     }
1014 
1015     produced = (produced % n_subbufs) * subbuf_size + buf->offset;
1016     consumed = (consumed % n_subbufs) * subbuf_size + buf->bytes_consumed;
1017 
1018     if (consumed > produced)
1019         produced += n_subbufs * subbuf_size;
1020 
1021     if (consumed == produced) {
1022         if (buf->offset == subbuf_size &&
1023             buf->subbufs_produced > buf->subbufs_consumed)
1024             return 1;
1025         return 0;
1026     }
1027 
1028     return 1;
1029 }
1030 
1031 /**
1032  *  relay_file_read_subbuf_avail - return bytes available in sub-buffer
1033  *  @read_pos: file read position
1034  *  @buf: relay channel buffer
1035  */
1036 static size_t relay_file_read_subbuf_avail(size_t read_pos,
1037                        struct rchan_buf *buf)
1038 {
1039     size_t padding, avail = 0;
1040     size_t read_subbuf, read_offset, write_subbuf, write_offset;
1041     size_t subbuf_size = buf->chan->subbuf_size;
1042 
1043     write_subbuf = (buf->data - buf->start) / subbuf_size;
1044     write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
1045     read_subbuf = read_pos / subbuf_size;
1046     read_offset = read_pos % subbuf_size;
1047     padding = buf->padding[read_subbuf];
1048 
1049     if (read_subbuf == write_subbuf) {
1050         if (read_offset + padding < write_offset)
1051             avail = write_offset - (read_offset + padding);
1052     } else
1053         avail = (subbuf_size - padding) - read_offset;
1054 
1055     return avail;
1056 }
1057 
1058 /**
1059  *  relay_file_read_start_pos - find the first available byte to read
1060  *  @read_pos: file read position
1061  *  @buf: relay channel buffer
1062  *
1063  *  If the @read_pos is in the middle of padding, return the
1064  *  position of the first actually available byte, otherwise
1065  *  return the original value.
1066  */
1067 static size_t relay_file_read_start_pos(size_t read_pos,
1068                     struct rchan_buf *buf)
1069 {
1070     size_t read_subbuf, padding, padding_start, padding_end;
1071     size_t subbuf_size = buf->chan->subbuf_size;
1072     size_t n_subbufs = buf->chan->n_subbufs;
1073     size_t consumed = buf->subbufs_consumed % n_subbufs;
1074 
1075     if (!read_pos)
1076         read_pos = consumed * subbuf_size + buf->bytes_consumed;
1077     read_subbuf = read_pos / subbuf_size;
1078     padding = buf->padding[read_subbuf];
1079     padding_start = (read_subbuf + 1) * subbuf_size - padding;
1080     padding_end = (read_subbuf + 1) * subbuf_size;
1081     if (read_pos >= padding_start && read_pos < padding_end) {
1082         read_subbuf = (read_subbuf + 1) % n_subbufs;
1083         read_pos = read_subbuf * subbuf_size;
1084     }
1085 
1086     return read_pos;
1087 }
1088 
1089 /**
1090  *  relay_file_read_end_pos - return the new read position
1091  *  @read_pos: file read position
1092  *  @buf: relay channel buffer
1093  *  @count: number of bytes to be read
1094  */
1095 static size_t relay_file_read_end_pos(struct rchan_buf *buf,
1096                       size_t read_pos,
1097                       size_t count)
1098 {
1099     size_t read_subbuf, padding, end_pos;
1100     size_t subbuf_size = buf->chan->subbuf_size;
1101     size_t n_subbufs = buf->chan->n_subbufs;
1102 
1103     read_subbuf = read_pos / subbuf_size;
1104     padding = buf->padding[read_subbuf];
1105     if (read_pos % subbuf_size + count + padding == subbuf_size)
1106         end_pos = (read_subbuf + 1) * subbuf_size;
1107     else
1108         end_pos = read_pos + count;
1109     if (end_pos >= subbuf_size * n_subbufs)
1110         end_pos = 0;
1111 
1112     return end_pos;
1113 }
1114 
1115 static ssize_t relay_file_read(struct file *filp,
1116                    char __user *buffer,
1117                    size_t count,
1118                    loff_t *ppos)
1119 {
1120     struct rchan_buf *buf = filp->private_data;
1121     size_t read_start, avail;
1122     size_t written = 0;
1123     int ret;
1124 
1125     if (!count)
1126         return 0;
1127 
1128     inode_lock(file_inode(filp));
1129     do {
1130         void *from;
1131 
1132         if (!relay_file_read_avail(buf, *ppos))
1133             break;
1134 
1135         read_start = relay_file_read_start_pos(*ppos, buf);
1136         avail = relay_file_read_subbuf_avail(read_start, buf);
1137         if (!avail)
1138             break;
1139 
1140         avail = min(count, avail);
1141         from = buf->start + read_start;
1142         ret = avail;
1143         if (copy_to_user(buffer, from, avail))
1144             break;
1145 
1146         buffer += ret;
1147         written += ret;
1148         count -= ret;
1149 
1150         relay_file_read_consume(buf, read_start, ret);
1151         *ppos = relay_file_read_end_pos(buf, read_start, ret);
1152     } while (count);
1153     inode_unlock(file_inode(filp));
1154 
1155     return written;
1156 }
1157 
1158 static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
1159 {
1160     rbuf->bytes_consumed += bytes_consumed;
1161 
1162     if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) {
1163         relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
1164         rbuf->bytes_consumed %= rbuf->chan->subbuf_size;
1165     }
1166 }
1167 
1168 static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1169                    struct pipe_buffer *buf)
1170 {
1171     struct rchan_buf *rbuf;
1172 
1173     rbuf = (struct rchan_buf *)page_private(buf->page);
1174     relay_consume_bytes(rbuf, buf->private);
1175 }
1176 
1177 static const struct pipe_buf_operations relay_pipe_buf_ops = {
1178     .can_merge = 0,
1179     .confirm = generic_pipe_buf_confirm,
1180     .release = relay_pipe_buf_release,
1181     .steal = generic_pipe_buf_steal,
1182     .get = generic_pipe_buf_get,
1183 };
1184 
1185 static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i)
1186 {
1187 }
1188 
1189 /*
1190  *  subbuf_splice_actor - splice up to one subbuf's worth of data
1191  */
1192 static ssize_t subbuf_splice_actor(struct file *in,
1193                    loff_t *ppos,
1194                    struct pipe_inode_info *pipe,
1195                    size_t len,
1196                    unsigned int flags,
1197                    int *nonpad_ret)
1198 {
1199     unsigned int pidx, poff, total_len, subbuf_pages, nr_pages;
1200     struct rchan_buf *rbuf = in->private_data;
1201     unsigned int subbuf_size = rbuf->chan->subbuf_size;
1202     uint64_t pos = (uint64_t) *ppos;
1203     uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size;
1204     size_t read_start = (size_t) do_div(pos, alloc_size);
1205     size_t read_subbuf = read_start / subbuf_size;
1206     size_t padding = rbuf->padding[read_subbuf];
1207     size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
1208     struct page *pages[PIPE_DEF_BUFFERS];
1209     struct partial_page partial[PIPE_DEF_BUFFERS];
1210     struct splice_pipe_desc spd = {
1211         .pages = pages,
1212         .nr_pages = 0,
1213         .nr_pages_max = PIPE_DEF_BUFFERS,
1214         .partial = partial,
1215         .flags = flags,
1216         .ops = &relay_pipe_buf_ops,
1217         .spd_release = relay_page_release,
1218     };
1219     ssize_t ret;
1220 
1221     if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
1222         return 0;
1223     if (splice_grow_spd(pipe, &spd))
1224         return -ENOMEM;
1225 
1226     /*
1227      * Adjust read len, if longer than what is available
1228      */
1229     if (len > (subbuf_size - read_start % subbuf_size))
1230         len = subbuf_size - read_start % subbuf_size;
1231 
1232     subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1233     pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1234     poff = read_start & ~PAGE_MASK;
1235     nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max);
1236 
1237     for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
1238         unsigned int this_len, this_end, private;
1239         unsigned int cur_pos = read_start + total_len;
1240 
1241         if (!len)
1242             break;
1243 
1244         this_len = min_t(unsigned long, len, PAGE_SIZE - poff);
1245         private = this_len;
1246 
1247         spd.pages[spd.nr_pages] = rbuf->page_array[pidx];
1248         spd.partial[spd.nr_pages].offset = poff;
1249 
1250         this_end = cur_pos + this_len;
1251         if (this_end >= nonpad_end) {
1252             this_len = nonpad_end - cur_pos;
1253             private = this_len + padding;
1254         }
1255         spd.partial[spd.nr_pages].len = this_len;
1256         spd.partial[spd.nr_pages].private = private;
1257 
1258         len -= this_len;
1259         total_len += this_len;
1260         poff = 0;
1261         pidx = (pidx + 1) % subbuf_pages;
1262 
1263         if (this_end >= nonpad_end) {
1264             spd.nr_pages++;
1265             break;
1266         }
1267     }
1268 
1269     ret = 0;
1270     if (!spd.nr_pages)
1271         goto out;
1272 
1273     ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
1274     if (ret < 0 || ret < total_len)
1275         goto out;
1276 
1277         if (read_start + ret == nonpad_end)
1278                 ret += padding;
1279 
1280 out:
1281     splice_shrink_spd(&spd);
1282     return ret;
1283 }
1284 
1285 static ssize_t relay_file_splice_read(struct file *in,
1286                       loff_t *ppos,
1287                       struct pipe_inode_info *pipe,
1288                       size_t len,
1289                       unsigned int flags)
1290 {
1291     ssize_t spliced;
1292     int ret;
1293     int nonpad_ret = 0;
1294 
1295     ret = 0;
1296     spliced = 0;
1297 
1298     while (len && !spliced) {
1299         ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
1300         if (ret < 0)
1301             break;
1302         else if (!ret) {
1303             if (flags & SPLICE_F_NONBLOCK)
1304                 ret = -EAGAIN;
1305             break;
1306         }
1307 
1308         *ppos += ret;
1309         if (ret > len)
1310             len = 0;
1311         else
1312             len -= ret;
1313         spliced += nonpad_ret;
1314         nonpad_ret = 0;
1315     }
1316 
1317     if (spliced)
1318         return spliced;
1319 
1320     return ret;
1321 }
1322 
1323 const struct file_operations relay_file_operations = {
1324     .open       = relay_file_open,
1325     .poll       = relay_file_poll,
1326     .mmap       = relay_file_mmap,
1327     .read       = relay_file_read,
1328     .llseek     = no_llseek,
1329     .release    = relay_file_release,
1330     .splice_read    = relay_file_splice_read,
1331 };
1332 EXPORT_SYMBOL_GPL(relay_file_operations);