![]() |
|
|||
0001 #ifndef _TOOLS_LINUX_RING_BUFFER_H_ 0002 #define _TOOLS_LINUX_RING_BUFFER_H_ 0003 0004 #include <asm/barrier.h> 0005 #include <linux/perf_event.h> 0006 0007 /* 0008 * Contract with kernel for walking the perf ring buffer from 0009 * user space requires the following barrier pairing (quote 0010 * from kernel/events/ring_buffer.c): 0011 * 0012 * Since the mmap() consumer (userspace) can run on a 0013 * different CPU: 0014 * 0015 * kernel user 0016 * 0017 * if (LOAD ->data_tail) { LOAD ->data_head 0018 * (A) smp_rmb() (C) 0019 * STORE $data LOAD $data 0020 * smp_wmb() (B) smp_mb() (D) 0021 * STORE ->data_head STORE ->data_tail 0022 * } 0023 * 0024 * Where A pairs with D, and B pairs with C. 0025 * 0026 * In our case A is a control dependency that separates the 0027 * load of the ->data_tail and the stores of $data. In case 0028 * ->data_tail indicates there is no room in the buffer to 0029 * store $data we do not. 0030 * 0031 * D needs to be a full barrier since it separates the data 0032 * READ from the tail WRITE. 0033 * 0034 * For B a WMB is sufficient since it separates two WRITEs, 0035 * and for C an RMB is sufficient since it separates two READs. 0036 * 0037 * Note, instead of B, C, D we could also use smp_store_release() 0038 * in B and D as well as smp_load_acquire() in C. 0039 * 0040 * However, this optimization does not make sense for all kernel 0041 * supported architectures since for a fair number it would 0042 * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), 0043 * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). 0044 * 0045 * Thus for those smp_wmb() in B and smp_rmb() in C would still 0046 * be less expensive. For the case of D this has either the same 0047 * cost or is less expensive, for example, due to TSO x86 can 0048 * avoid the CPU barrier entirely. 0049 */ 0050 0051 static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) 0052 { 0053 /* 0054 * Architectures where smp_load_acquire() does not fallback to 0055 * READ_ONCE() + smp_mb() pair. 0056 */ 0057 #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ 0058 defined(__ia64__) || defined(__sparc__) && defined(__arch64__) 0059 return smp_load_acquire(&base->data_head); 0060 #else 0061 u64 head = READ_ONCE(base->data_head); 0062 0063 smp_rmb(); 0064 return head; 0065 #endif 0066 } 0067 0068 static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, 0069 u64 tail) 0070 { 0071 smp_store_release(&base->data_tail, tail); 0072 } 0073 0074 #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |
![]() ![]() |