Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Public API and common code for kernel->userspace relay file support.
0003  *
0004  * See Documentation/filesystems/relay.rst for an overview.
0005  *
0006  * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
0007  * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
0008  *
0009  * Moved to kernel/relay.c by Paul Mundt, 2006.
0010  * November 2006 - CPU hotplug support by Mathieu Desnoyers
0011  *  (mathieu.desnoyers@polymtl.ca)
0012  *
0013  * This file is released under the GPL.
0014  */
0015 #include <linux/errno.h>
0016 #include <linux/stddef.h>
0017 #include <linux/slab.h>
0018 #include <linux/export.h>
0019 #include <linux/string.h>
0020 #include <linux/relay.h>
0021 #include <linux/vmalloc.h>
0022 #include <linux/mm.h>
0023 #include <linux/cpu.h>
0024 #include <linux/splice.h>
0025 
0026 /* list of open channels, for cpu hotplug */
0027 static DEFINE_MUTEX(relay_channels_mutex);
0028 static LIST_HEAD(relay_channels);
0029 
0030 /*
0031  * fault() vm_op implementation for relay file mapping.
0032  */
0033 static vm_fault_t relay_buf_fault(struct vm_fault *vmf)
0034 {
0035     struct page *page;
0036     struct rchan_buf *buf = vmf->vma->vm_private_data;
0037     pgoff_t pgoff = vmf->pgoff;
0038 
0039     if (!buf)
0040         return VM_FAULT_OOM;
0041 
0042     page = vmalloc_to_page(buf->start + (pgoff << PAGE_SHIFT));
0043     if (!page)
0044         return VM_FAULT_SIGBUS;
0045     get_page(page);
0046     vmf->page = page;
0047 
0048     return 0;
0049 }
0050 
0051 /*
0052  * vm_ops for relay file mappings.
0053  */
0054 static const struct vm_operations_struct relay_file_mmap_ops = {
0055     .fault = relay_buf_fault,
0056 };
0057 
0058 /*
0059  * allocate an array of pointers of struct page
0060  */
0061 static struct page **relay_alloc_page_array(unsigned int n_pages)
0062 {
0063     const size_t pa_size = n_pages * sizeof(struct page *);
0064     if (pa_size > PAGE_SIZE)
0065         return vzalloc(pa_size);
0066     return kzalloc(pa_size, GFP_KERNEL);
0067 }
0068 
0069 /*
0070  * free an array of pointers of struct page
0071  */
0072 static void relay_free_page_array(struct page **array)
0073 {
0074     kvfree(array);
0075 }
0076 
0077 /**
0078  *  relay_mmap_buf: - mmap channel buffer to process address space
0079  *  @buf: relay channel buffer
0080  *  @vma: vm_area_struct describing memory to be mapped
0081  *
0082  *  Returns 0 if ok, negative on error
0083  *
0084  *  Caller should already have grabbed mmap_lock.
0085  */
0086 static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
0087 {
0088     unsigned long length = vma->vm_end - vma->vm_start;
0089 
0090     if (!buf)
0091         return -EBADF;
0092 
0093     if (length != (unsigned long)buf->chan->alloc_size)
0094         return -EINVAL;
0095 
0096     vma->vm_ops = &relay_file_mmap_ops;
0097     vma->vm_flags |= VM_DONTEXPAND;
0098     vma->vm_private_data = buf;
0099 
0100     return 0;
0101 }
0102 
0103 /**
0104  *  relay_alloc_buf - allocate a channel buffer
0105  *  @buf: the buffer struct
0106  *  @size: total size of the buffer
0107  *
0108  *  Returns a pointer to the resulting buffer, %NULL if unsuccessful. The
0109  *  passed in size will get page aligned, if it isn't already.
0110  */
0111 static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
0112 {
0113     void *mem;
0114     unsigned int i, j, n_pages;
0115 
0116     *size = PAGE_ALIGN(*size);
0117     n_pages = *size >> PAGE_SHIFT;
0118 
0119     buf->page_array = relay_alloc_page_array(n_pages);
0120     if (!buf->page_array)
0121         return NULL;
0122 
0123     for (i = 0; i < n_pages; i++) {
0124         buf->page_array[i] = alloc_page(GFP_KERNEL);
0125         if (unlikely(!buf->page_array[i]))
0126             goto depopulate;
0127         set_page_private(buf->page_array[i], (unsigned long)buf);
0128     }
0129     mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
0130     if (!mem)
0131         goto depopulate;
0132 
0133     memset(mem, 0, *size);
0134     buf->page_count = n_pages;
0135     return mem;
0136 
0137 depopulate:
0138     for (j = 0; j < i; j++)
0139         __free_page(buf->page_array[j]);
0140     relay_free_page_array(buf->page_array);
0141     return NULL;
0142 }
0143 
0144 /**
0145  *  relay_create_buf - allocate and initialize a channel buffer
0146  *  @chan: the relay channel
0147  *
0148  *  Returns channel buffer if successful, %NULL otherwise.
0149  */
0150 static struct rchan_buf *relay_create_buf(struct rchan *chan)
0151 {
0152     struct rchan_buf *buf;
0153 
0154     if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
0155         return NULL;
0156 
0157     buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
0158     if (!buf)
0159         return NULL;
0160     buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t *),
0161                      GFP_KERNEL);
0162     if (!buf->padding)
0163         goto free_buf;
0164 
0165     buf->start = relay_alloc_buf(buf, &chan->alloc_size);
0166     if (!buf->start)
0167         goto free_buf;
0168 
0169     buf->chan = chan;
0170     kref_get(&buf->chan->kref);
0171     return buf;
0172 
0173 free_buf:
0174     kfree(buf->padding);
0175     kfree(buf);
0176     return NULL;
0177 }
0178 
0179 /**
0180  *  relay_destroy_channel - free the channel struct
0181  *  @kref: target kernel reference that contains the relay channel
0182  *
0183  *  Should only be called from kref_put().
0184  */
0185 static void relay_destroy_channel(struct kref *kref)
0186 {
0187     struct rchan *chan = container_of(kref, struct rchan, kref);
0188     free_percpu(chan->buf);
0189     kfree(chan);
0190 }
0191 
0192 /**
0193  *  relay_destroy_buf - destroy an rchan_buf struct and associated buffer
0194  *  @buf: the buffer struct
0195  */
0196 static void relay_destroy_buf(struct rchan_buf *buf)
0197 {
0198     struct rchan *chan = buf->chan;
0199     unsigned int i;
0200 
0201     if (likely(buf->start)) {
0202         vunmap(buf->start);
0203         for (i = 0; i < buf->page_count; i++)
0204             __free_page(buf->page_array[i]);
0205         relay_free_page_array(buf->page_array);
0206     }
0207     *per_cpu_ptr(chan->buf, buf->cpu) = NULL;
0208     kfree(buf->padding);
0209     kfree(buf);
0210     kref_put(&chan->kref, relay_destroy_channel);
0211 }
0212 
0213 /**
0214  *  relay_remove_buf - remove a channel buffer
0215  *  @kref: target kernel reference that contains the relay buffer
0216  *
0217  *  Removes the file from the filesystem, which also frees the
0218  *  rchan_buf_struct and the channel buffer.  Should only be called from
0219  *  kref_put().
0220  */
0221 static void relay_remove_buf(struct kref *kref)
0222 {
0223     struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
0224     relay_destroy_buf(buf);
0225 }
0226 
0227 /**
0228  *  relay_buf_empty - boolean, is the channel buffer empty?
0229  *  @buf: channel buffer
0230  *
0231  *  Returns 1 if the buffer is empty, 0 otherwise.
0232  */
0233 static int relay_buf_empty(struct rchan_buf *buf)
0234 {
0235     return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
0236 }
0237 
0238 /**
0239  *  relay_buf_full - boolean, is the channel buffer full?
0240  *  @buf: channel buffer
0241  *
0242  *  Returns 1 if the buffer is full, 0 otherwise.
0243  */
0244 int relay_buf_full(struct rchan_buf *buf)
0245 {
0246     size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
0247     return (ready >= buf->chan->n_subbufs) ? 1 : 0;
0248 }
0249 EXPORT_SYMBOL_GPL(relay_buf_full);
0250 
0251 /*
0252  * High-level relay kernel API and associated functions.
0253  */
0254 
0255 static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf,
0256                   void *prev_subbuf, size_t prev_padding)
0257 {
0258     if (!buf->chan->cb->subbuf_start)
0259         return !relay_buf_full(buf);
0260 
0261     return buf->chan->cb->subbuf_start(buf, subbuf,
0262                        prev_subbuf, prev_padding);
0263 }
0264 
0265 /**
0266  *  wakeup_readers - wake up readers waiting on a channel
0267  *  @work: contains the channel buffer
0268  *
0269  *  This is the function used to defer reader waking
0270  */
0271 static void wakeup_readers(struct irq_work *work)
0272 {
0273     struct rchan_buf *buf;
0274 
0275     buf = container_of(work, struct rchan_buf, wakeup_work);
0276     wake_up_interruptible(&buf->read_wait);
0277 }
0278 
0279 /**
0280  *  __relay_reset - reset a channel buffer
0281  *  @buf: the channel buffer
0282  *  @init: 1 if this is a first-time initialization
0283  *
0284  *  See relay_reset() for description of effect.
0285  */
0286 static void __relay_reset(struct rchan_buf *buf, unsigned int init)
0287 {
0288     size_t i;
0289 
0290     if (init) {
0291         init_waitqueue_head(&buf->read_wait);
0292         kref_init(&buf->kref);
0293         init_irq_work(&buf->wakeup_work, wakeup_readers);
0294     } else {
0295         irq_work_sync(&buf->wakeup_work);
0296     }
0297 
0298     buf->subbufs_produced = 0;
0299     buf->subbufs_consumed = 0;
0300     buf->bytes_consumed = 0;
0301     buf->finalized = 0;
0302     buf->data = buf->start;
0303     buf->offset = 0;
0304 
0305     for (i = 0; i < buf->chan->n_subbufs; i++)
0306         buf->padding[i] = 0;
0307 
0308     relay_subbuf_start(buf, buf->data, NULL, 0);
0309 }
0310 
0311 /**
0312  *  relay_reset - reset the channel
0313  *  @chan: the channel
0314  *
0315  *  This has the effect of erasing all data from all channel buffers
0316  *  and restarting the channel in its initial state.  The buffers
0317  *  are not freed, so any mappings are still in effect.
0318  *
0319  *  NOTE. Care should be taken that the channel isn't actually
0320  *  being used by anything when this call is made.
0321  */
0322 void relay_reset(struct rchan *chan)
0323 {
0324     struct rchan_buf *buf;
0325     unsigned int i;
0326 
0327     if (!chan)
0328         return;
0329 
0330     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
0331         __relay_reset(buf, 0);
0332         return;
0333     }
0334 
0335     mutex_lock(&relay_channels_mutex);
0336     for_each_possible_cpu(i)
0337         if ((buf = *per_cpu_ptr(chan->buf, i)))
0338             __relay_reset(buf, 0);
0339     mutex_unlock(&relay_channels_mutex);
0340 }
0341 EXPORT_SYMBOL_GPL(relay_reset);
0342 
0343 static inline void relay_set_buf_dentry(struct rchan_buf *buf,
0344                     struct dentry *dentry)
0345 {
0346     buf->dentry = dentry;
0347     d_inode(buf->dentry)->i_size = buf->early_bytes;
0348 }
0349 
0350 static struct dentry *relay_create_buf_file(struct rchan *chan,
0351                         struct rchan_buf *buf,
0352                         unsigned int cpu)
0353 {
0354     struct dentry *dentry;
0355     char *tmpname;
0356 
0357     tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
0358     if (!tmpname)
0359         return NULL;
0360     snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
0361 
0362     /* Create file in fs */
0363     dentry = chan->cb->create_buf_file(tmpname, chan->parent,
0364                        S_IRUSR, buf,
0365                        &chan->is_global);
0366     if (IS_ERR(dentry))
0367         dentry = NULL;
0368 
0369     kfree(tmpname);
0370 
0371     return dentry;
0372 }
0373 
0374 /*
0375  *  relay_open_buf - create a new relay channel buffer
0376  *
0377  *  used by relay_open() and CPU hotplug.
0378  */
0379 static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
0380 {
0381     struct rchan_buf *buf = NULL;
0382     struct dentry *dentry;
0383 
0384     if (chan->is_global)
0385         return *per_cpu_ptr(chan->buf, 0);
0386 
0387     buf = relay_create_buf(chan);
0388     if (!buf)
0389         return NULL;
0390 
0391     if (chan->has_base_filename) {
0392         dentry = relay_create_buf_file(chan, buf, cpu);
0393         if (!dentry)
0394             goto free_buf;
0395         relay_set_buf_dentry(buf, dentry);
0396     } else {
0397         /* Only retrieve global info, nothing more, nothing less */
0398         dentry = chan->cb->create_buf_file(NULL, NULL,
0399                            S_IRUSR, buf,
0400                            &chan->is_global);
0401         if (IS_ERR_OR_NULL(dentry))
0402             goto free_buf;
0403     }
0404 
0405     buf->cpu = cpu;
0406     __relay_reset(buf, 1);
0407 
0408     if(chan->is_global) {
0409         *per_cpu_ptr(chan->buf, 0) = buf;
0410         buf->cpu = 0;
0411     }
0412 
0413     return buf;
0414 
0415 free_buf:
0416     relay_destroy_buf(buf);
0417     return NULL;
0418 }
0419 
0420 /**
0421  *  relay_close_buf - close a channel buffer
0422  *  @buf: channel buffer
0423  *
0424  *  Marks the buffer finalized and restores the default callbacks.
0425  *  The channel buffer and channel buffer data structure are then freed
0426  *  automatically when the last reference is given up.
0427  */
0428 static void relay_close_buf(struct rchan_buf *buf)
0429 {
0430     buf->finalized = 1;
0431     irq_work_sync(&buf->wakeup_work);
0432     buf->chan->cb->remove_buf_file(buf->dentry);
0433     kref_put(&buf->kref, relay_remove_buf);
0434 }
0435 
0436 int relay_prepare_cpu(unsigned int cpu)
0437 {
0438     struct rchan *chan;
0439     struct rchan_buf *buf;
0440 
0441     mutex_lock(&relay_channels_mutex);
0442     list_for_each_entry(chan, &relay_channels, list) {
0443         if (*per_cpu_ptr(chan->buf, cpu))
0444             continue;
0445         buf = relay_open_buf(chan, cpu);
0446         if (!buf) {
0447             pr_err("relay: cpu %d buffer creation failed\n", cpu);
0448             mutex_unlock(&relay_channels_mutex);
0449             return -ENOMEM;
0450         }
0451         *per_cpu_ptr(chan->buf, cpu) = buf;
0452     }
0453     mutex_unlock(&relay_channels_mutex);
0454     return 0;
0455 }
0456 
0457 /**
0458  *  relay_open - create a new relay channel
0459  *  @base_filename: base name of files to create, %NULL for buffering only
0460  *  @parent: dentry of parent directory, %NULL for root directory or buffer
0461  *  @subbuf_size: size of sub-buffers
0462  *  @n_subbufs: number of sub-buffers
0463  *  @cb: client callback functions
0464  *  @private_data: user-defined data
0465  *
0466  *  Returns channel pointer if successful, %NULL otherwise.
0467  *
0468  *  Creates a channel buffer for each cpu using the sizes and
0469  *  attributes specified.  The created channel buffer files
0470  *  will be named base_filename0...base_filenameN-1.  File
0471  *  permissions will be %S_IRUSR.
0472  *
0473  *  If opening a buffer (@parent = NULL) that you later wish to register
0474  *  in a filesystem, call relay_late_setup_files() once the @parent dentry
0475  *  is available.
0476  */
0477 struct rchan *relay_open(const char *base_filename,
0478              struct dentry *parent,
0479              size_t subbuf_size,
0480              size_t n_subbufs,
0481              const struct rchan_callbacks *cb,
0482              void *private_data)
0483 {
0484     unsigned int i;
0485     struct rchan *chan;
0486     struct rchan_buf *buf;
0487 
0488     if (!(subbuf_size && n_subbufs))
0489         return NULL;
0490     if (subbuf_size > UINT_MAX / n_subbufs)
0491         return NULL;
0492     if (!cb || !cb->create_buf_file || !cb->remove_buf_file)
0493         return NULL;
0494 
0495     chan = kzalloc(sizeof(struct rchan), GFP_KERNEL);
0496     if (!chan)
0497         return NULL;
0498 
0499     chan->buf = alloc_percpu(struct rchan_buf *);
0500     if (!chan->buf) {
0501         kfree(chan);
0502         return NULL;
0503     }
0504 
0505     chan->version = RELAYFS_CHANNEL_VERSION;
0506     chan->n_subbufs = n_subbufs;
0507     chan->subbuf_size = subbuf_size;
0508     chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs);
0509     chan->parent = parent;
0510     chan->private_data = private_data;
0511     if (base_filename) {
0512         chan->has_base_filename = 1;
0513         strlcpy(chan->base_filename, base_filename, NAME_MAX);
0514     }
0515     chan->cb = cb;
0516     kref_init(&chan->kref);
0517 
0518     mutex_lock(&relay_channels_mutex);
0519     for_each_online_cpu(i) {
0520         buf = relay_open_buf(chan, i);
0521         if (!buf)
0522             goto free_bufs;
0523         *per_cpu_ptr(chan->buf, i) = buf;
0524     }
0525     list_add(&chan->list, &relay_channels);
0526     mutex_unlock(&relay_channels_mutex);
0527 
0528     return chan;
0529 
0530 free_bufs:
0531     for_each_possible_cpu(i) {
0532         if ((buf = *per_cpu_ptr(chan->buf, i)))
0533             relay_close_buf(buf);
0534     }
0535 
0536     kref_put(&chan->kref, relay_destroy_channel);
0537     mutex_unlock(&relay_channels_mutex);
0538     return NULL;
0539 }
0540 EXPORT_SYMBOL_GPL(relay_open);
0541 
0542 struct rchan_percpu_buf_dispatcher {
0543     struct rchan_buf *buf;
0544     struct dentry *dentry;
0545 };
0546 
0547 /* Called in atomic context. */
0548 static void __relay_set_buf_dentry(void *info)
0549 {
0550     struct rchan_percpu_buf_dispatcher *p = info;
0551 
0552     relay_set_buf_dentry(p->buf, p->dentry);
0553 }
0554 
0555 /**
0556  *  relay_late_setup_files - triggers file creation
0557  *  @chan: channel to operate on
0558  *  @base_filename: base name of files to create
0559  *  @parent: dentry of parent directory, %NULL for root directory
0560  *
0561  *  Returns 0 if successful, non-zero otherwise.
0562  *
0563  *  Use to setup files for a previously buffer-only channel created
0564  *  by relay_open() with a NULL parent dentry.
0565  *
0566  *  For example, this is useful for perfomring early tracing in kernel,
0567  *  before VFS is up and then exposing the early results once the dentry
0568  *  is available.
0569  */
0570 int relay_late_setup_files(struct rchan *chan,
0571                const char *base_filename,
0572                struct dentry *parent)
0573 {
0574     int err = 0;
0575     unsigned int i, curr_cpu;
0576     unsigned long flags;
0577     struct dentry *dentry;
0578     struct rchan_buf *buf;
0579     struct rchan_percpu_buf_dispatcher disp;
0580 
0581     if (!chan || !base_filename)
0582         return -EINVAL;
0583 
0584     strlcpy(chan->base_filename, base_filename, NAME_MAX);
0585 
0586     mutex_lock(&relay_channels_mutex);
0587     /* Is chan already set up? */
0588     if (unlikely(chan->has_base_filename)) {
0589         mutex_unlock(&relay_channels_mutex);
0590         return -EEXIST;
0591     }
0592     chan->has_base_filename = 1;
0593     chan->parent = parent;
0594 
0595     if (chan->is_global) {
0596         err = -EINVAL;
0597         buf = *per_cpu_ptr(chan->buf, 0);
0598         if (!WARN_ON_ONCE(!buf)) {
0599             dentry = relay_create_buf_file(chan, buf, 0);
0600             if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
0601                 relay_set_buf_dentry(buf, dentry);
0602                 err = 0;
0603             }
0604         }
0605         mutex_unlock(&relay_channels_mutex);
0606         return err;
0607     }
0608 
0609     curr_cpu = get_cpu();
0610     /*
0611      * The CPU hotplug notifier ran before us and created buffers with
0612      * no files associated. So it's safe to call relay_setup_buf_file()
0613      * on all currently online CPUs.
0614      */
0615     for_each_online_cpu(i) {
0616         buf = *per_cpu_ptr(chan->buf, i);
0617         if (unlikely(!buf)) {
0618             WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
0619             err = -EINVAL;
0620             break;
0621         }
0622 
0623         dentry = relay_create_buf_file(chan, buf, i);
0624         if (unlikely(!dentry)) {
0625             err = -EINVAL;
0626             break;
0627         }
0628 
0629         if (curr_cpu == i) {
0630             local_irq_save(flags);
0631             relay_set_buf_dentry(buf, dentry);
0632             local_irq_restore(flags);
0633         } else {
0634             disp.buf = buf;
0635             disp.dentry = dentry;
0636             smp_mb();
0637             /* relay_channels_mutex must be held, so wait. */
0638             err = smp_call_function_single(i,
0639                                __relay_set_buf_dentry,
0640                                &disp, 1);
0641         }
0642         if (unlikely(err))
0643             break;
0644     }
0645     put_cpu();
0646     mutex_unlock(&relay_channels_mutex);
0647 
0648     return err;
0649 }
0650 EXPORT_SYMBOL_GPL(relay_late_setup_files);
0651 
0652 /**
0653  *  relay_switch_subbuf - switch to a new sub-buffer
0654  *  @buf: channel buffer
0655  *  @length: size of current event
0656  *
0657  *  Returns either the length passed in or 0 if full.
0658  *
0659  *  Performs sub-buffer-switch tasks such as invoking callbacks,
0660  *  updating padding counts, waking up readers, etc.
0661  */
0662 size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
0663 {
0664     void *old, *new;
0665     size_t old_subbuf, new_subbuf;
0666 
0667     if (unlikely(length > buf->chan->subbuf_size))
0668         goto toobig;
0669 
0670     if (buf->offset != buf->chan->subbuf_size + 1) {
0671         buf->prev_padding = buf->chan->subbuf_size - buf->offset;
0672         old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
0673         buf->padding[old_subbuf] = buf->prev_padding;
0674         buf->subbufs_produced++;
0675         if (buf->dentry)
0676             d_inode(buf->dentry)->i_size +=
0677                 buf->chan->subbuf_size -
0678                 buf->padding[old_subbuf];
0679         else
0680             buf->early_bytes += buf->chan->subbuf_size -
0681                         buf->padding[old_subbuf];
0682         smp_mb();
0683         if (waitqueue_active(&buf->read_wait)) {
0684             /*
0685              * Calling wake_up_interruptible() from here
0686              * will deadlock if we happen to be logging
0687              * from the scheduler (trying to re-grab
0688              * rq->lock), so defer it.
0689              */
0690             irq_work_queue(&buf->wakeup_work);
0691         }
0692     }
0693 
0694     old = buf->data;
0695     new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
0696     new = buf->start + new_subbuf * buf->chan->subbuf_size;
0697     buf->offset = 0;
0698     if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) {
0699         buf->offset = buf->chan->subbuf_size + 1;
0700         return 0;
0701     }
0702     buf->data = new;
0703     buf->padding[new_subbuf] = 0;
0704 
0705     if (unlikely(length + buf->offset > buf->chan->subbuf_size))
0706         goto toobig;
0707 
0708     return length;
0709 
0710 toobig:
0711     buf->chan->last_toobig = length;
0712     return 0;
0713 }
0714 EXPORT_SYMBOL_GPL(relay_switch_subbuf);
0715 
0716 /**
0717  *  relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
0718  *  @chan: the channel
0719  *  @cpu: the cpu associated with the channel buffer to update
0720  *  @subbufs_consumed: number of sub-buffers to add to current buf's count
0721  *
0722  *  Adds to the channel buffer's consumed sub-buffer count.
0723  *  subbufs_consumed should be the number of sub-buffers newly consumed,
0724  *  not the total consumed.
0725  *
0726  *  NOTE. Kernel clients don't need to call this function if the channel
0727  *  mode is 'overwrite'.
0728  */
0729 void relay_subbufs_consumed(struct rchan *chan,
0730                 unsigned int cpu,
0731                 size_t subbufs_consumed)
0732 {
0733     struct rchan_buf *buf;
0734 
0735     if (!chan || cpu >= NR_CPUS)
0736         return;
0737 
0738     buf = *per_cpu_ptr(chan->buf, cpu);
0739     if (!buf || subbufs_consumed > chan->n_subbufs)
0740         return;
0741 
0742     if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
0743         buf->subbufs_consumed = buf->subbufs_produced;
0744     else
0745         buf->subbufs_consumed += subbufs_consumed;
0746 }
0747 EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
0748 
0749 /**
0750  *  relay_close - close the channel
0751  *  @chan: the channel
0752  *
0753  *  Closes all channel buffers and frees the channel.
0754  */
0755 void relay_close(struct rchan *chan)
0756 {
0757     struct rchan_buf *buf;
0758     unsigned int i;
0759 
0760     if (!chan)
0761         return;
0762 
0763     mutex_lock(&relay_channels_mutex);
0764     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0)))
0765         relay_close_buf(buf);
0766     else
0767         for_each_possible_cpu(i)
0768             if ((buf = *per_cpu_ptr(chan->buf, i)))
0769                 relay_close_buf(buf);
0770 
0771     if (chan->last_toobig)
0772         printk(KERN_WARNING "relay: one or more items not logged "
0773                "[item size (%zd) > sub-buffer size (%zd)]\n",
0774                chan->last_toobig, chan->subbuf_size);
0775 
0776     list_del(&chan->list);
0777     kref_put(&chan->kref, relay_destroy_channel);
0778     mutex_unlock(&relay_channels_mutex);
0779 }
0780 EXPORT_SYMBOL_GPL(relay_close);
0781 
0782 /**
0783  *  relay_flush - close the channel
0784  *  @chan: the channel
0785  *
0786  *  Flushes all channel buffers, i.e. forces buffer switch.
0787  */
0788 void relay_flush(struct rchan *chan)
0789 {
0790     struct rchan_buf *buf;
0791     unsigned int i;
0792 
0793     if (!chan)
0794         return;
0795 
0796     if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
0797         relay_switch_subbuf(buf, 0);
0798         return;
0799     }
0800 
0801     mutex_lock(&relay_channels_mutex);
0802     for_each_possible_cpu(i)
0803         if ((buf = *per_cpu_ptr(chan->buf, i)))
0804             relay_switch_subbuf(buf, 0);
0805     mutex_unlock(&relay_channels_mutex);
0806 }
0807 EXPORT_SYMBOL_GPL(relay_flush);
0808 
0809 /**
0810  *  relay_file_open - open file op for relay files
0811  *  @inode: the inode
0812  *  @filp: the file
0813  *
0814  *  Increments the channel buffer refcount.
0815  */
0816 static int relay_file_open(struct inode *inode, struct file *filp)
0817 {
0818     struct rchan_buf *buf = inode->i_private;
0819     kref_get(&buf->kref);
0820     filp->private_data = buf;
0821 
0822     return nonseekable_open(inode, filp);
0823 }
0824 
0825 /**
0826  *  relay_file_mmap - mmap file op for relay files
0827  *  @filp: the file
0828  *  @vma: the vma describing what to map
0829  *
0830  *  Calls upon relay_mmap_buf() to map the file into user space.
0831  */
0832 static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
0833 {
0834     struct rchan_buf *buf = filp->private_data;
0835     return relay_mmap_buf(buf, vma);
0836 }
0837 
0838 /**
0839  *  relay_file_poll - poll file op for relay files
0840  *  @filp: the file
0841  *  @wait: poll table
0842  *
0843  *  Poll implemention.
0844  */
0845 static __poll_t relay_file_poll(struct file *filp, poll_table *wait)
0846 {
0847     __poll_t mask = 0;
0848     struct rchan_buf *buf = filp->private_data;
0849 
0850     if (buf->finalized)
0851         return EPOLLERR;
0852 
0853     if (filp->f_mode & FMODE_READ) {
0854         poll_wait(filp, &buf->read_wait, wait);
0855         if (!relay_buf_empty(buf))
0856             mask |= EPOLLIN | EPOLLRDNORM;
0857     }
0858 
0859     return mask;
0860 }
0861 
0862 /**
0863  *  relay_file_release - release file op for relay files
0864  *  @inode: the inode
0865  *  @filp: the file
0866  *
0867  *  Decrements the channel refcount, as the filesystem is
0868  *  no longer using it.
0869  */
0870 static int relay_file_release(struct inode *inode, struct file *filp)
0871 {
0872     struct rchan_buf *buf = filp->private_data;
0873     kref_put(&buf->kref, relay_remove_buf);
0874 
0875     return 0;
0876 }
0877 
0878 /*
0879  *  relay_file_read_consume - update the consumed count for the buffer
0880  */
0881 static void relay_file_read_consume(struct rchan_buf *buf,
0882                     size_t read_pos,
0883                     size_t bytes_consumed)
0884 {
0885     size_t subbuf_size = buf->chan->subbuf_size;
0886     size_t n_subbufs = buf->chan->n_subbufs;
0887     size_t read_subbuf;
0888 
0889     if (buf->subbufs_produced == buf->subbufs_consumed &&
0890         buf->offset == buf->bytes_consumed)
0891         return;
0892 
0893     if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
0894         relay_subbufs_consumed(buf->chan, buf->cpu, 1);
0895         buf->bytes_consumed = 0;
0896     }
0897 
0898     buf->bytes_consumed += bytes_consumed;
0899     if (!read_pos)
0900         read_subbuf = buf->subbufs_consumed % n_subbufs;
0901     else
0902         read_subbuf = read_pos / buf->chan->subbuf_size;
0903     if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) {
0904         if ((read_subbuf == buf->subbufs_produced % n_subbufs) &&
0905             (buf->offset == subbuf_size))
0906             return;
0907         relay_subbufs_consumed(buf->chan, buf->cpu, 1);
0908         buf->bytes_consumed = 0;
0909     }
0910 }
0911 
0912 /*
0913  *  relay_file_read_avail - boolean, are there unconsumed bytes available?
0914  */
0915 static int relay_file_read_avail(struct rchan_buf *buf)
0916 {
0917     size_t subbuf_size = buf->chan->subbuf_size;
0918     size_t n_subbufs = buf->chan->n_subbufs;
0919     size_t produced = buf->subbufs_produced;
0920     size_t consumed;
0921 
0922     relay_file_read_consume(buf, 0, 0);
0923 
0924     consumed = buf->subbufs_consumed;
0925 
0926     if (unlikely(buf->offset > subbuf_size)) {
0927         if (produced == consumed)
0928             return 0;
0929         return 1;
0930     }
0931 
0932     if (unlikely(produced - consumed >= n_subbufs)) {
0933         consumed = produced - n_subbufs + 1;
0934         buf->subbufs_consumed = consumed;
0935         buf->bytes_consumed = 0;
0936     }
0937 
0938     produced = (produced % n_subbufs) * subbuf_size + buf->offset;
0939     consumed = (consumed % n_subbufs) * subbuf_size + buf->bytes_consumed;
0940 
0941     if (consumed > produced)
0942         produced += n_subbufs * subbuf_size;
0943 
0944     if (consumed == produced) {
0945         if (buf->offset == subbuf_size &&
0946             buf->subbufs_produced > buf->subbufs_consumed)
0947             return 1;
0948         return 0;
0949     }
0950 
0951     return 1;
0952 }
0953 
0954 /**
0955  *  relay_file_read_subbuf_avail - return bytes available in sub-buffer
0956  *  @read_pos: file read position
0957  *  @buf: relay channel buffer
0958  */
0959 static size_t relay_file_read_subbuf_avail(size_t read_pos,
0960                        struct rchan_buf *buf)
0961 {
0962     size_t padding, avail = 0;
0963     size_t read_subbuf, read_offset, write_subbuf, write_offset;
0964     size_t subbuf_size = buf->chan->subbuf_size;
0965 
0966     write_subbuf = (buf->data - buf->start) / subbuf_size;
0967     write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset;
0968     read_subbuf = read_pos / subbuf_size;
0969     read_offset = read_pos % subbuf_size;
0970     padding = buf->padding[read_subbuf];
0971 
0972     if (read_subbuf == write_subbuf) {
0973         if (read_offset + padding < write_offset)
0974             avail = write_offset - (read_offset + padding);
0975     } else
0976         avail = (subbuf_size - padding) - read_offset;
0977 
0978     return avail;
0979 }
0980 
0981 /**
0982  *  relay_file_read_start_pos - find the first available byte to read
0983  *  @buf: relay channel buffer
0984  *
0985  *  If the read_pos is in the middle of padding, return the
0986  *  position of the first actually available byte, otherwise
0987  *  return the original value.
0988  */
0989 static size_t relay_file_read_start_pos(struct rchan_buf *buf)
0990 {
0991     size_t read_subbuf, padding, padding_start, padding_end;
0992     size_t subbuf_size = buf->chan->subbuf_size;
0993     size_t n_subbufs = buf->chan->n_subbufs;
0994     size_t consumed = buf->subbufs_consumed % n_subbufs;
0995     size_t read_pos = consumed * subbuf_size + buf->bytes_consumed;
0996 
0997     read_subbuf = read_pos / subbuf_size;
0998     padding = buf->padding[read_subbuf];
0999     padding_start = (read_subbuf + 1) * subbuf_size - padding;
1000     padding_end = (read_subbuf + 1) * subbuf_size;
1001     if (read_pos >= padding_start && read_pos < padding_end) {
1002         read_subbuf = (read_subbuf + 1) % n_subbufs;
1003         read_pos = read_subbuf * subbuf_size;
1004     }
1005 
1006     return read_pos;
1007 }
1008 
1009 /**
1010  *  relay_file_read_end_pos - return the new read position
1011  *  @read_pos: file read position
1012  *  @buf: relay channel buffer
1013  *  @count: number of bytes to be read
1014  */
1015 static size_t relay_file_read_end_pos(struct rchan_buf *buf,
1016                       size_t read_pos,
1017                       size_t count)
1018 {
1019     size_t read_subbuf, padding, end_pos;
1020     size_t subbuf_size = buf->chan->subbuf_size;
1021     size_t n_subbufs = buf->chan->n_subbufs;
1022 
1023     read_subbuf = read_pos / subbuf_size;
1024     padding = buf->padding[read_subbuf];
1025     if (read_pos % subbuf_size + count + padding == subbuf_size)
1026         end_pos = (read_subbuf + 1) * subbuf_size;
1027     else
1028         end_pos = read_pos + count;
1029     if (end_pos >= subbuf_size * n_subbufs)
1030         end_pos = 0;
1031 
1032     return end_pos;
1033 }
1034 
1035 static ssize_t relay_file_read(struct file *filp,
1036                    char __user *buffer,
1037                    size_t count,
1038                    loff_t *ppos)
1039 {
1040     struct rchan_buf *buf = filp->private_data;
1041     size_t read_start, avail;
1042     size_t written = 0;
1043     int ret;
1044 
1045     if (!count)
1046         return 0;
1047 
1048     inode_lock(file_inode(filp));
1049     do {
1050         void *from;
1051 
1052         if (!relay_file_read_avail(buf))
1053             break;
1054 
1055         read_start = relay_file_read_start_pos(buf);
1056         avail = relay_file_read_subbuf_avail(read_start, buf);
1057         if (!avail)
1058             break;
1059 
1060         avail = min(count, avail);
1061         from = buf->start + read_start;
1062         ret = avail;
1063         if (copy_to_user(buffer, from, avail))
1064             break;
1065 
1066         buffer += ret;
1067         written += ret;
1068         count -= ret;
1069 
1070         relay_file_read_consume(buf, read_start, ret);
1071         *ppos = relay_file_read_end_pos(buf, read_start, ret);
1072     } while (count);
1073     inode_unlock(file_inode(filp));
1074 
1075     return written;
1076 }
1077 
1078 static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
1079 {
1080     rbuf->bytes_consumed += bytes_consumed;
1081 
1082     if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) {
1083         relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
1084         rbuf->bytes_consumed %= rbuf->chan->subbuf_size;
1085     }
1086 }
1087 
1088 static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1089                    struct pipe_buffer *buf)
1090 {
1091     struct rchan_buf *rbuf;
1092 
1093     rbuf = (struct rchan_buf *)page_private(buf->page);
1094     relay_consume_bytes(rbuf, buf->private);
1095 }
1096 
1097 static const struct pipe_buf_operations relay_pipe_buf_ops = {
1098     .release    = relay_pipe_buf_release,
1099     .try_steal  = generic_pipe_buf_try_steal,
1100     .get        = generic_pipe_buf_get,
1101 };
1102 
1103 static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i)
1104 {
1105 }
1106 
1107 /*
1108  *  subbuf_splice_actor - splice up to one subbuf's worth of data
1109  */
1110 static ssize_t subbuf_splice_actor(struct file *in,
1111                    loff_t *ppos,
1112                    struct pipe_inode_info *pipe,
1113                    size_t len,
1114                    unsigned int flags,
1115                    int *nonpad_ret)
1116 {
1117     unsigned int pidx, poff, total_len, subbuf_pages, nr_pages;
1118     struct rchan_buf *rbuf = in->private_data;
1119     unsigned int subbuf_size = rbuf->chan->subbuf_size;
1120     uint64_t pos = (uint64_t) *ppos;
1121     uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size;
1122     size_t read_start = (size_t) do_div(pos, alloc_size);
1123     size_t read_subbuf = read_start / subbuf_size;
1124     size_t padding = rbuf->padding[read_subbuf];
1125     size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
1126     struct page *pages[PIPE_DEF_BUFFERS];
1127     struct partial_page partial[PIPE_DEF_BUFFERS];
1128     struct splice_pipe_desc spd = {
1129         .pages = pages,
1130         .nr_pages = 0,
1131         .nr_pages_max = PIPE_DEF_BUFFERS,
1132         .partial = partial,
1133         .ops = &relay_pipe_buf_ops,
1134         .spd_release = relay_page_release,
1135     };
1136     ssize_t ret;
1137 
1138     if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
1139         return 0;
1140     if (splice_grow_spd(pipe, &spd))
1141         return -ENOMEM;
1142 
1143     /*
1144      * Adjust read len, if longer than what is available
1145      */
1146     if (len > (subbuf_size - read_start % subbuf_size))
1147         len = subbuf_size - read_start % subbuf_size;
1148 
1149     subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1150     pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1151     poff = read_start & ~PAGE_MASK;
1152     nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max);
1153 
1154     for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
1155         unsigned int this_len, this_end, private;
1156         unsigned int cur_pos = read_start + total_len;
1157 
1158         if (!len)
1159             break;
1160 
1161         this_len = min_t(unsigned long, len, PAGE_SIZE - poff);
1162         private = this_len;
1163 
1164         spd.pages[spd.nr_pages] = rbuf->page_array[pidx];
1165         spd.partial[spd.nr_pages].offset = poff;
1166 
1167         this_end = cur_pos + this_len;
1168         if (this_end >= nonpad_end) {
1169             this_len = nonpad_end - cur_pos;
1170             private = this_len + padding;
1171         }
1172         spd.partial[spd.nr_pages].len = this_len;
1173         spd.partial[spd.nr_pages].private = private;
1174 
1175         len -= this_len;
1176         total_len += this_len;
1177         poff = 0;
1178         pidx = (pidx + 1) % subbuf_pages;
1179 
1180         if (this_end >= nonpad_end) {
1181             spd.nr_pages++;
1182             break;
1183         }
1184     }
1185 
1186     ret = 0;
1187     if (!spd.nr_pages)
1188         goto out;
1189 
1190     ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
1191     if (ret < 0 || ret < total_len)
1192         goto out;
1193 
1194         if (read_start + ret == nonpad_end)
1195                 ret += padding;
1196 
1197 out:
1198     splice_shrink_spd(&spd);
1199     return ret;
1200 }
1201 
1202 static ssize_t relay_file_splice_read(struct file *in,
1203                       loff_t *ppos,
1204                       struct pipe_inode_info *pipe,
1205                       size_t len,
1206                       unsigned int flags)
1207 {
1208     ssize_t spliced;
1209     int ret;
1210     int nonpad_ret = 0;
1211 
1212     ret = 0;
1213     spliced = 0;
1214 
1215     while (len && !spliced) {
1216         ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
1217         if (ret < 0)
1218             break;
1219         else if (!ret) {
1220             if (flags & SPLICE_F_NONBLOCK)
1221                 ret = -EAGAIN;
1222             break;
1223         }
1224 
1225         *ppos += ret;
1226         if (ret > len)
1227             len = 0;
1228         else
1229             len -= ret;
1230         spliced += nonpad_ret;
1231         nonpad_ret = 0;
1232     }
1233 
1234     if (spliced)
1235         return spliced;
1236 
1237     return ret;
1238 }
1239 
1240 const struct file_operations relay_file_operations = {
1241     .open       = relay_file_open,
1242     .poll       = relay_file_poll,
1243     .mmap       = relay_file_mmap,
1244     .read       = relay_file_read,
1245     .llseek     = no_llseek,
1246     .release    = relay_file_release,
1247     .splice_read    = relay_file_splice_read,
1248 };
1249 EXPORT_SYMBOL_GPL(relay_file_operations);