0001
0002
0003
0004
0005
0006
0007 #include <linux/trace_recursion.h>
0008 #include <linux/trace_events.h>
0009 #include <linux/ring_buffer.h>
0010 #include <linux/trace_clock.h>
0011 #include <linux/sched/clock.h>
0012 #include <linux/trace_seq.h>
0013 #include <linux/spinlock.h>
0014 #include <linux/irq_work.h>
0015 #include <linux/security.h>
0016 #include <linux/uaccess.h>
0017 #include <linux/hardirq.h>
0018 #include <linux/kthread.h> /* for self test */
0019 #include <linux/module.h>
0020 #include <linux/percpu.h>
0021 #include <linux/mutex.h>
0022 #include <linux/delay.h>
0023 #include <linux/slab.h>
0024 #include <linux/init.h>
0025 #include <linux/hash.h>
0026 #include <linux/list.h>
0027 #include <linux/cpu.h>
0028 #include <linux/oom.h>
0029
0030 #include <asm/local.h>
0031
0032
0033
0034
0035
0036
0037 #define TS_MSB (0xf8ULL << 56)
0038 #define ABS_TS_MASK (~TS_MSB)
0039
0040 static void update_pages_handler(struct work_struct *work);
0041
0042
0043
0044
0045 int ring_buffer_print_entry_header(struct trace_seq *s)
0046 {
0047 trace_seq_puts(s, "# compressed entry header\n");
0048 trace_seq_puts(s, "\ttype_len : 5 bits\n");
0049 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
0050 trace_seq_puts(s, "\tarray : 32 bits\n");
0051 trace_seq_putc(s, '\n');
0052 trace_seq_printf(s, "\tpadding : type == %d\n",
0053 RINGBUF_TYPE_PADDING);
0054 trace_seq_printf(s, "\ttime_extend : type == %d\n",
0055 RINGBUF_TYPE_TIME_EXTEND);
0056 trace_seq_printf(s, "\ttime_stamp : type == %d\n",
0057 RINGBUF_TYPE_TIME_STAMP);
0058 trace_seq_printf(s, "\tdata max type_len == %d\n",
0059 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
0060
0061 return !trace_seq_has_overflowed(s);
0062 }
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133 #define RB_BUFFER_OFF (1 << 20)
0134
0135 #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
0136
0137 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
0138 #define RB_ALIGNMENT 4U
0139 #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
0140 #define RB_EVNT_MIN_SIZE 8U
0141
0142 #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
0143 # define RB_FORCE_8BYTE_ALIGNMENT 0
0144 # define RB_ARCH_ALIGNMENT RB_ALIGNMENT
0145 #else
0146 # define RB_FORCE_8BYTE_ALIGNMENT 1
0147 # define RB_ARCH_ALIGNMENT 8U
0148 #endif
0149
0150 #define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
0151
0152
0153 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
0154
0155 enum {
0156 RB_LEN_TIME_EXTEND = 8,
0157 RB_LEN_TIME_STAMP = 8,
0158 };
0159
0160 #define skip_time_extend(event) \
0161 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
0162
0163 #define extended_time(event) \
0164 (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
0165
0166 static inline int rb_null_event(struct ring_buffer_event *event)
0167 {
0168 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
0169 }
0170
0171 static void rb_event_set_padding(struct ring_buffer_event *event)
0172 {
0173
0174 event->type_len = RINGBUF_TYPE_PADDING;
0175 event->time_delta = 0;
0176 }
0177
0178 static unsigned
0179 rb_event_data_length(struct ring_buffer_event *event)
0180 {
0181 unsigned length;
0182
0183 if (event->type_len)
0184 length = event->type_len * RB_ALIGNMENT;
0185 else
0186 length = event->array[0];
0187 return length + RB_EVNT_HDR_SIZE;
0188 }
0189
0190
0191
0192
0193
0194
0195 static inline unsigned
0196 rb_event_length(struct ring_buffer_event *event)
0197 {
0198 switch (event->type_len) {
0199 case RINGBUF_TYPE_PADDING:
0200 if (rb_null_event(event))
0201
0202 return -1;
0203 return event->array[0] + RB_EVNT_HDR_SIZE;
0204
0205 case RINGBUF_TYPE_TIME_EXTEND:
0206 return RB_LEN_TIME_EXTEND;
0207
0208 case RINGBUF_TYPE_TIME_STAMP:
0209 return RB_LEN_TIME_STAMP;
0210
0211 case RINGBUF_TYPE_DATA:
0212 return rb_event_data_length(event);
0213 default:
0214 WARN_ON_ONCE(1);
0215 }
0216
0217 return 0;
0218 }
0219
0220
0221
0222
0223
0224 static inline unsigned
0225 rb_event_ts_length(struct ring_buffer_event *event)
0226 {
0227 unsigned len = 0;
0228
0229 if (extended_time(event)) {
0230
0231 len = RB_LEN_TIME_EXTEND;
0232 event = skip_time_extend(event);
0233 }
0234 return len + rb_event_length(event);
0235 }
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
0248 {
0249 unsigned length;
0250
0251 if (extended_time(event))
0252 event = skip_time_extend(event);
0253
0254 length = rb_event_length(event);
0255 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
0256 return length;
0257 length -= RB_EVNT_HDR_SIZE;
0258 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
0259 length -= sizeof(event->array[0]);
0260 return length;
0261 }
0262 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
0263
0264
0265 static __always_inline void *
0266 rb_event_data(struct ring_buffer_event *event)
0267 {
0268 if (extended_time(event))
0269 event = skip_time_extend(event);
0270 WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
0271
0272 if (event->type_len)
0273 return (void *)&event->array[0];
0274
0275 return (void *)&event->array[1];
0276 }
0277
0278
0279
0280
0281
0282 void *ring_buffer_event_data(struct ring_buffer_event *event)
0283 {
0284 return rb_event_data(event);
0285 }
0286 EXPORT_SYMBOL_GPL(ring_buffer_event_data);
0287
0288 #define for_each_buffer_cpu(buffer, cpu) \
0289 for_each_cpu(cpu, buffer->cpumask)
0290
0291 #define for_each_online_buffer_cpu(buffer, cpu) \
0292 for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
0293
0294 #define TS_SHIFT 27
0295 #define TS_MASK ((1ULL << TS_SHIFT) - 1)
0296 #define TS_DELTA_TEST (~TS_MASK)
0297
0298 static u64 rb_event_time_stamp(struct ring_buffer_event *event)
0299 {
0300 u64 ts;
0301
0302 ts = event->array[0];
0303 ts <<= TS_SHIFT;
0304 ts += event->time_delta;
0305
0306 return ts;
0307 }
0308
0309
0310 #define RB_MISSED_EVENTS (1 << 31)
0311
0312 #define RB_MISSED_STORED (1 << 30)
0313
0314 struct buffer_data_page {
0315 u64 time_stamp;
0316 local_t commit;
0317 unsigned char data[] RB_ALIGN_DATA;
0318 };
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328 struct buffer_page {
0329 struct list_head list;
0330 local_t write;
0331 unsigned read;
0332 local_t entries;
0333 unsigned long real_end;
0334 struct buffer_data_page *page;
0335 };
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349 #define RB_WRITE_MASK 0xfffff
0350 #define RB_WRITE_INTCNT (1 << 20)
0351
0352 static void rb_init_page(struct buffer_data_page *bpage)
0353 {
0354 local_set(&bpage->commit, 0);
0355 }
0356
0357
0358
0359
0360
0361 static void free_buffer_page(struct buffer_page *bpage)
0362 {
0363 free_page((unsigned long)bpage->page);
0364 kfree(bpage);
0365 }
0366
0367
0368
0369
0370 static inline int test_time_stamp(u64 delta)
0371 {
0372 if (delta & TS_DELTA_TEST)
0373 return 1;
0374 return 0;
0375 }
0376
0377 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
0378
0379
0380 #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
0381
0382 int ring_buffer_print_page_header(struct trace_seq *s)
0383 {
0384 struct buffer_data_page field;
0385
0386 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
0387 "offset:0;\tsize:%u;\tsigned:%u;\n",
0388 (unsigned int)sizeof(field.time_stamp),
0389 (unsigned int)is_signed_type(u64));
0390
0391 trace_seq_printf(s, "\tfield: local_t commit;\t"
0392 "offset:%u;\tsize:%u;\tsigned:%u;\n",
0393 (unsigned int)offsetof(typeof(field), commit),
0394 (unsigned int)sizeof(field.commit),
0395 (unsigned int)is_signed_type(long));
0396
0397 trace_seq_printf(s, "\tfield: int overwrite;\t"
0398 "offset:%u;\tsize:%u;\tsigned:%u;\n",
0399 (unsigned int)offsetof(typeof(field), commit),
0400 1,
0401 (unsigned int)is_signed_type(long));
0402
0403 trace_seq_printf(s, "\tfield: char data;\t"
0404 "offset:%u;\tsize:%u;\tsigned:%u;\n",
0405 (unsigned int)offsetof(typeof(field), data),
0406 (unsigned int)BUF_PAGE_SIZE,
0407 (unsigned int)is_signed_type(char));
0408
0409 return !trace_seq_has_overflowed(s);
0410 }
0411
0412 struct rb_irq_work {
0413 struct irq_work work;
0414 wait_queue_head_t waiters;
0415 wait_queue_head_t full_waiters;
0416 bool waiters_pending;
0417 bool full_waiters_pending;
0418 bool wakeup_full;
0419 };
0420
0421
0422
0423
0424 struct rb_event_info {
0425 u64 ts;
0426 u64 delta;
0427 u64 before;
0428 u64 after;
0429 unsigned long length;
0430 struct buffer_page *tail_page;
0431 int add_timestamp;
0432 };
0433
0434
0435
0436
0437
0438
0439
0440
0441 enum {
0442 RB_ADD_STAMP_NONE = 0,
0443 RB_ADD_STAMP_EXTEND = BIT(1),
0444 RB_ADD_STAMP_ABSOLUTE = BIT(2),
0445 RB_ADD_STAMP_FORCE = BIT(3)
0446 };
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457 enum {
0458 RB_CTX_TRANSITION,
0459 RB_CTX_NMI,
0460 RB_CTX_IRQ,
0461 RB_CTX_SOFTIRQ,
0462 RB_CTX_NORMAL,
0463 RB_CTX_MAX
0464 };
0465
0466 #if BITS_PER_LONG == 32
0467 #define RB_TIME_32
0468 #endif
0469
0470
0471
0472
0473 #ifdef RB_TIME_32
0474
0475 struct rb_time_struct {
0476 local_t cnt;
0477 local_t top;
0478 local_t bottom;
0479 local_t msb;
0480 };
0481 #else
0482 #include <asm/local64.h>
0483 struct rb_time_struct {
0484 local64_t time;
0485 };
0486 #endif
0487 typedef struct rb_time_struct rb_time_t;
0488
0489 #define MAX_NEST 5
0490
0491
0492
0493
0494 struct ring_buffer_per_cpu {
0495 int cpu;
0496 atomic_t record_disabled;
0497 atomic_t resize_disabled;
0498 struct trace_buffer *buffer;
0499 raw_spinlock_t reader_lock;
0500 arch_spinlock_t lock;
0501 struct lock_class_key lock_key;
0502 struct buffer_data_page *free_page;
0503 unsigned long nr_pages;
0504 unsigned int current_context;
0505 struct list_head *pages;
0506 struct buffer_page *head_page;
0507 struct buffer_page *tail_page;
0508 struct buffer_page *commit_page;
0509 struct buffer_page *reader_page;
0510 unsigned long lost_events;
0511 unsigned long last_overrun;
0512 unsigned long nest;
0513 local_t entries_bytes;
0514 local_t entries;
0515 local_t overrun;
0516 local_t commit_overrun;
0517 local_t dropped_events;
0518 local_t committing;
0519 local_t commits;
0520 local_t pages_touched;
0521 local_t pages_read;
0522 long last_pages_touch;
0523 size_t shortest_full;
0524 unsigned long read;
0525 unsigned long read_bytes;
0526 rb_time_t write_stamp;
0527 rb_time_t before_stamp;
0528 u64 event_stamp[MAX_NEST];
0529 u64 read_stamp;
0530
0531 long nr_pages_to_update;
0532 struct list_head new_pages;
0533 struct work_struct update_pages_work;
0534 struct completion update_done;
0535
0536 struct rb_irq_work irq_work;
0537 };
0538
0539 struct trace_buffer {
0540 unsigned flags;
0541 int cpus;
0542 atomic_t record_disabled;
0543 cpumask_var_t cpumask;
0544
0545 struct lock_class_key *reader_lock_key;
0546
0547 struct mutex mutex;
0548
0549 struct ring_buffer_per_cpu **buffers;
0550
0551 struct hlist_node node;
0552 u64 (*clock)(void);
0553
0554 struct rb_irq_work irq_work;
0555 bool time_stamp_abs;
0556 };
0557
0558 struct ring_buffer_iter {
0559 struct ring_buffer_per_cpu *cpu_buffer;
0560 unsigned long head;
0561 unsigned long next_event;
0562 struct buffer_page *head_page;
0563 struct buffer_page *cache_reader_page;
0564 unsigned long cache_read;
0565 u64 read_stamp;
0566 u64 page_stamp;
0567 struct ring_buffer_event *event;
0568 int missed_events;
0569 };
0570
0571 #ifdef RB_TIME_32
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603 #define RB_TIME_SHIFT 30
0604 #define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1)
0605 #define RB_TIME_MSB_SHIFT 60
0606
0607 static inline int rb_time_cnt(unsigned long val)
0608 {
0609 return (val >> RB_TIME_SHIFT) & 3;
0610 }
0611
0612 static inline u64 rb_time_val(unsigned long top, unsigned long bottom)
0613 {
0614 u64 val;
0615
0616 val = top & RB_TIME_VAL_MASK;
0617 val <<= RB_TIME_SHIFT;
0618 val |= bottom & RB_TIME_VAL_MASK;
0619
0620 return val;
0621 }
0622
0623 static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
0624 {
0625 unsigned long top, bottom, msb;
0626 unsigned long c;
0627
0628
0629
0630
0631
0632
0633 do {
0634 c = local_read(&t->cnt);
0635 top = local_read(&t->top);
0636 bottom = local_read(&t->bottom);
0637 msb = local_read(&t->msb);
0638 } while (c != local_read(&t->cnt));
0639
0640 *cnt = rb_time_cnt(top);
0641
0642
0643 if (*cnt != rb_time_cnt(bottom))
0644 return false;
0645
0646
0647 *ret = rb_time_val(top, bottom) | ((u64)msb << RB_TIME_MSB_SHIFT);
0648 return true;
0649 }
0650
0651 static bool rb_time_read(rb_time_t *t, u64 *ret)
0652 {
0653 unsigned long cnt;
0654
0655 return __rb_time_read(t, ret, &cnt);
0656 }
0657
0658 static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt)
0659 {
0660 return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT);
0661 }
0662
0663 static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom,
0664 unsigned long *msb)
0665 {
0666 *top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK);
0667 *bottom = (unsigned long)(val & RB_TIME_VAL_MASK);
0668 *msb = (unsigned long)(val >> RB_TIME_MSB_SHIFT);
0669 }
0670
0671 static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt)
0672 {
0673 val = rb_time_val_cnt(val, cnt);
0674 local_set(t, val);
0675 }
0676
0677 static void rb_time_set(rb_time_t *t, u64 val)
0678 {
0679 unsigned long cnt, top, bottom, msb;
0680
0681 rb_time_split(val, &top, &bottom, &msb);
0682
0683
0684 do {
0685 cnt = local_inc_return(&t->cnt);
0686 rb_time_val_set(&t->top, top, cnt);
0687 rb_time_val_set(&t->bottom, bottom, cnt);
0688 rb_time_val_set(&t->msb, val >> RB_TIME_MSB_SHIFT, cnt);
0689 } while (cnt != local_read(&t->cnt));
0690 }
0691
0692 static inline bool
0693 rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
0694 {
0695 unsigned long ret;
0696
0697 ret = local_cmpxchg(l, expect, set);
0698 return ret == expect;
0699 }
0700
0701 static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
0702 {
0703 unsigned long cnt, top, bottom, msb;
0704 unsigned long cnt2, top2, bottom2, msb2;
0705 u64 val;
0706
0707
0708 if (!__rb_time_read(t, &val, &cnt2))
0709 return false;
0710
0711 if (val != expect)
0712 return false;
0713
0714 cnt = local_read(&t->cnt);
0715 if ((cnt & 3) != cnt2)
0716 return false;
0717
0718 cnt2 = cnt + 1;
0719
0720 rb_time_split(val, &top, &bottom, &msb);
0721 top = rb_time_val_cnt(top, cnt);
0722 bottom = rb_time_val_cnt(bottom, cnt);
0723
0724 rb_time_split(set, &top2, &bottom2, &msb2);
0725 top2 = rb_time_val_cnt(top2, cnt2);
0726 bottom2 = rb_time_val_cnt(bottom2, cnt2);
0727
0728 if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
0729 return false;
0730 if (!rb_time_read_cmpxchg(&t->msb, msb, msb2))
0731 return false;
0732 if (!rb_time_read_cmpxchg(&t->top, top, top2))
0733 return false;
0734 if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
0735 return false;
0736 return true;
0737 }
0738
0739 #else
0740
0741
0742
0743 static inline bool rb_time_read(rb_time_t *t, u64 *ret)
0744 {
0745 *ret = local64_read(&t->time);
0746 return true;
0747 }
0748 static void rb_time_set(rb_time_t *t, u64 val)
0749 {
0750 local64_set(&t->time, val);
0751 }
0752
0753 static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
0754 {
0755 u64 val;
0756 val = local64_cmpxchg(&t->time, expect, set);
0757 return val == expect;
0758 }
0759 #endif
0760
0761
0762
0763
0764
0765
0766
0767 #ifdef RB_VERIFY_EVENT
0768 static struct list_head *rb_list_head(struct list_head *list);
0769 static void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
0770 void *event)
0771 {
0772 struct buffer_page *page = cpu_buffer->commit_page;
0773 struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page);
0774 struct list_head *next;
0775 long commit, write;
0776 unsigned long addr = (unsigned long)event;
0777 bool done = false;
0778 int stop = 0;
0779
0780
0781 do {
0782 if (page == tail_page || WARN_ON_ONCE(stop++ > 100))
0783 done = true;
0784 commit = local_read(&page->page->commit);
0785 write = local_read(&page->write);
0786 if (addr >= (unsigned long)&page->page->data[commit] &&
0787 addr < (unsigned long)&page->page->data[write])
0788 return;
0789
0790 next = rb_list_head(page->list.next);
0791 page = list_entry(next, struct buffer_page, list);
0792 } while (!done);
0793 WARN_ON_ONCE(1);
0794 }
0795 #else
0796 static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
0797 void *event)
0798 {
0799 }
0800 #endif
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810 static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts)
0811 {
0812 if (save_ts & TS_MSB) {
0813 abs |= save_ts & TS_MSB;
0814
0815 if (unlikely(abs < save_ts))
0816 abs += 1ULL << 59;
0817 }
0818 return abs;
0819 }
0820
0821 static inline u64 rb_time_stamp(struct trace_buffer *buffer);
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840 u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
0841 struct ring_buffer_event *event)
0842 {
0843 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()];
0844 unsigned int nest;
0845 u64 ts;
0846
0847
0848 if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
0849 ts = rb_event_time_stamp(event);
0850 return rb_fix_abs_ts(ts, cpu_buffer->tail_page->page->time_stamp);
0851 }
0852
0853 nest = local_read(&cpu_buffer->committing);
0854 verify_event(cpu_buffer, event);
0855 if (WARN_ON_ONCE(!nest))
0856 goto fail;
0857
0858
0859 if (likely(--nest < MAX_NEST))
0860 return cpu_buffer->event_stamp[nest];
0861
0862
0863 WARN_ONCE(1, "nest (%d) greater than max", nest);
0864
0865 fail:
0866
0867 if (!rb_time_read(&cpu_buffer->write_stamp, &ts))
0868
0869 ts = rb_time_stamp(cpu_buffer->buffer);
0870
0871 return ts;
0872 }
0873
0874
0875
0876
0877
0878
0879
0880
0881 size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
0882 {
0883 return buffer->buffers[cpu]->nr_pages;
0884 }
0885
0886
0887
0888
0889
0890
0891
0892
0893 size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
0894 {
0895 size_t read;
0896 size_t cnt;
0897
0898 read = local_read(&buffer->buffers[cpu]->pages_read);
0899 cnt = local_read(&buffer->buffers[cpu]->pages_touched);
0900
0901 if (cnt < read) {
0902 WARN_ON_ONCE(read > cnt + 1);
0903 return 0;
0904 }
0905
0906 return cnt - read;
0907 }
0908
0909
0910
0911
0912
0913
0914
0915 static void rb_wake_up_waiters(struct irq_work *work)
0916 {
0917 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
0918
0919 wake_up_all(&rbwork->waiters);
0920 if (rbwork->wakeup_full) {
0921 rbwork->wakeup_full = false;
0922 wake_up_all(&rbwork->full_waiters);
0923 }
0924 }
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936 int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
0937 {
0938 struct ring_buffer_per_cpu *cpu_buffer;
0939 DEFINE_WAIT(wait);
0940 struct rb_irq_work *work;
0941 int ret = 0;
0942
0943
0944
0945
0946
0947
0948 if (cpu == RING_BUFFER_ALL_CPUS) {
0949 work = &buffer->irq_work;
0950
0951 full = 0;
0952 } else {
0953 if (!cpumask_test_cpu(cpu, buffer->cpumask))
0954 return -ENODEV;
0955 cpu_buffer = buffer->buffers[cpu];
0956 work = &cpu_buffer->irq_work;
0957 }
0958
0959
0960 while (true) {
0961 if (full)
0962 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
0963 else
0964 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977
0978
0979
0980
0981
0982
0983
0984
0985
0986 if (full)
0987 work->full_waiters_pending = true;
0988 else
0989 work->waiters_pending = true;
0990
0991 if (signal_pending(current)) {
0992 ret = -EINTR;
0993 break;
0994 }
0995
0996 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
0997 break;
0998
0999 if (cpu != RING_BUFFER_ALL_CPUS &&
1000 !ring_buffer_empty_cpu(buffer, cpu)) {
1001 unsigned long flags;
1002 bool pagebusy;
1003 size_t nr_pages;
1004 size_t dirty;
1005
1006 if (!full)
1007 break;
1008
1009 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1010 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
1011 nr_pages = cpu_buffer->nr_pages;
1012 dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
1013 if (!cpu_buffer->shortest_full ||
1014 cpu_buffer->shortest_full < full)
1015 cpu_buffer->shortest_full = full;
1016 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1017 if (!pagebusy &&
1018 (!nr_pages || (dirty * 100) > full * nr_pages))
1019 break;
1020 }
1021
1022 schedule();
1023 }
1024
1025 if (full)
1026 finish_wait(&work->full_waiters, &wait);
1027 else
1028 finish_wait(&work->waiters, &wait);
1029
1030 return ret;
1031 }
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
1048 struct file *filp, poll_table *poll_table)
1049 {
1050 struct ring_buffer_per_cpu *cpu_buffer;
1051 struct rb_irq_work *work;
1052
1053 if (cpu == RING_BUFFER_ALL_CPUS)
1054 work = &buffer->irq_work;
1055 else {
1056 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1057 return -EINVAL;
1058
1059 cpu_buffer = buffer->buffers[cpu];
1060 work = &cpu_buffer->irq_work;
1061 }
1062
1063 poll_wait(filp, &work->waiters, poll_table);
1064 work->waiters_pending = true;
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 smp_mb();
1079
1080 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
1081 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
1082 return EPOLLIN | EPOLLRDNORM;
1083 return 0;
1084 }
1085
1086
1087 #define RB_WARN_ON(b, cond) \
1088 ({ \
1089 int _____ret = unlikely(cond); \
1090 if (_____ret) { \
1091 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
1092 struct ring_buffer_per_cpu *__b = \
1093 (void *)b; \
1094 atomic_inc(&__b->buffer->record_disabled); \
1095 } else \
1096 atomic_inc(&b->record_disabled); \
1097 WARN_ON(1); \
1098 } \
1099 _____ret; \
1100 })
1101
1102
1103 #define DEBUG_SHIFT 0
1104
1105 static inline u64 rb_time_stamp(struct trace_buffer *buffer)
1106 {
1107 u64 ts;
1108
1109
1110 if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local))
1111 ts = trace_clock_local();
1112 else
1113 ts = buffer->clock();
1114
1115
1116 return ts << DEBUG_SHIFT;
1117 }
1118
1119 u64 ring_buffer_time_stamp(struct trace_buffer *buffer)
1120 {
1121 u64 time;
1122
1123 preempt_disable_notrace();
1124 time = rb_time_stamp(buffer);
1125 preempt_enable_notrace();
1126
1127 return time;
1128 }
1129 EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
1130
1131 void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer,
1132 int cpu, u64 *ts)
1133 {
1134
1135 *ts >>= DEBUG_SHIFT;
1136 }
1137 EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 #define RB_PAGE_NORMAL 0UL
1209 #define RB_PAGE_HEAD 1UL
1210 #define RB_PAGE_UPDATE 2UL
1211
1212
1213 #define RB_FLAG_MASK 3UL
1214
1215
1216 #define RB_PAGE_MOVED 4UL
1217
1218
1219
1220
1221 static struct list_head *rb_list_head(struct list_head *list)
1222 {
1223 unsigned long val = (unsigned long)list;
1224
1225 return (struct list_head *)(val & ~RB_FLAG_MASK);
1226 }
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236 static inline int
1237 rb_is_head_page(struct buffer_page *page, struct list_head *list)
1238 {
1239 unsigned long val;
1240
1241 val = (unsigned long)list->next;
1242
1243 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
1244 return RB_PAGE_MOVED;
1245
1246 return val & RB_FLAG_MASK;
1247 }
1248
1249
1250
1251
1252
1253
1254
1255
1256 static bool rb_is_reader_page(struct buffer_page *page)
1257 {
1258 struct list_head *list = page->list.prev;
1259
1260 return rb_list_head(list->next) != &page->list;
1261 }
1262
1263
1264
1265
1266 static void rb_set_list_to_head(struct list_head *list)
1267 {
1268 unsigned long *ptr;
1269
1270 ptr = (unsigned long *)&list->next;
1271 *ptr |= RB_PAGE_HEAD;
1272 *ptr &= ~RB_PAGE_UPDATE;
1273 }
1274
1275
1276
1277
1278 static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
1279 {
1280 struct buffer_page *head;
1281
1282 head = cpu_buffer->head_page;
1283 if (!head)
1284 return;
1285
1286
1287
1288
1289 rb_set_list_to_head(head->list.prev);
1290 }
1291
1292 static void rb_list_head_clear(struct list_head *list)
1293 {
1294 unsigned long *ptr = (unsigned long *)&list->next;
1295
1296 *ptr &= ~RB_FLAG_MASK;
1297 }
1298
1299
1300
1301
1302 static void
1303 rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
1304 {
1305 struct list_head *hd;
1306
1307
1308 rb_list_head_clear(cpu_buffer->pages);
1309
1310 list_for_each(hd, cpu_buffer->pages)
1311 rb_list_head_clear(hd);
1312 }
1313
1314 static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
1315 struct buffer_page *head,
1316 struct buffer_page *prev,
1317 int old_flag, int new_flag)
1318 {
1319 struct list_head *list;
1320 unsigned long val = (unsigned long)&head->list;
1321 unsigned long ret;
1322
1323 list = &prev->list;
1324
1325 val &= ~RB_FLAG_MASK;
1326
1327 ret = cmpxchg((unsigned long *)&list->next,
1328 val | old_flag, val | new_flag);
1329
1330
1331 if ((ret & ~RB_FLAG_MASK) != val)
1332 return RB_PAGE_MOVED;
1333
1334 return ret & RB_FLAG_MASK;
1335 }
1336
1337 static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
1338 struct buffer_page *head,
1339 struct buffer_page *prev,
1340 int old_flag)
1341 {
1342 return rb_head_page_set(cpu_buffer, head, prev,
1343 old_flag, RB_PAGE_UPDATE);
1344 }
1345
1346 static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
1347 struct buffer_page *head,
1348 struct buffer_page *prev,
1349 int old_flag)
1350 {
1351 return rb_head_page_set(cpu_buffer, head, prev,
1352 old_flag, RB_PAGE_HEAD);
1353 }
1354
1355 static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
1356 struct buffer_page *head,
1357 struct buffer_page *prev,
1358 int old_flag)
1359 {
1360 return rb_head_page_set(cpu_buffer, head, prev,
1361 old_flag, RB_PAGE_NORMAL);
1362 }
1363
1364 static inline void rb_inc_page(struct buffer_page **bpage)
1365 {
1366 struct list_head *p = rb_list_head((*bpage)->list.next);
1367
1368 *bpage = list_entry(p, struct buffer_page, list);
1369 }
1370
1371 static struct buffer_page *
1372 rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
1373 {
1374 struct buffer_page *head;
1375 struct buffer_page *page;
1376 struct list_head *list;
1377 int i;
1378
1379 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
1380 return NULL;
1381
1382
1383 list = cpu_buffer->pages;
1384 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
1385 return NULL;
1386
1387 page = head = cpu_buffer->head_page;
1388
1389
1390
1391
1392
1393
1394 for (i = 0; i < 3; i++) {
1395 do {
1396 if (rb_is_head_page(page, page->list.prev)) {
1397 cpu_buffer->head_page = page;
1398 return page;
1399 }
1400 rb_inc_page(&page);
1401 } while (page != head);
1402 }
1403
1404 RB_WARN_ON(cpu_buffer, 1);
1405
1406 return NULL;
1407 }
1408
1409 static int rb_head_page_replace(struct buffer_page *old,
1410 struct buffer_page *new)
1411 {
1412 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1413 unsigned long val;
1414 unsigned long ret;
1415
1416 val = *ptr & ~RB_FLAG_MASK;
1417 val |= RB_PAGE_HEAD;
1418
1419 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1420
1421 return ret == val;
1422 }
1423
1424
1425
1426
1427 static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1428 struct buffer_page *tail_page,
1429 struct buffer_page *next_page)
1430 {
1431 unsigned long old_entries;
1432 unsigned long old_write;
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1444 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1445
1446 local_inc(&cpu_buffer->pages_touched);
1447
1448
1449
1450
1451 barrier();
1452
1453
1454
1455
1456
1457
1458 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1459
1460 unsigned long val = old_write & ~RB_WRITE_MASK;
1461 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473 (void)local_cmpxchg(&next_page->write, old_write, val);
1474 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1475
1476
1477
1478
1479
1480
1481 local_set(&next_page->page->commit, 0);
1482
1483
1484 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1485 }
1486 }
1487
1488 static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1489 struct buffer_page *bpage)
1490 {
1491 unsigned long val = (unsigned long)bpage;
1492
1493 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1494 return 1;
1495
1496 return 0;
1497 }
1498
1499
1500
1501
1502 static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1503 struct list_head *list)
1504 {
1505 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1506 return 1;
1507 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1508 return 1;
1509 return 0;
1510 }
1511
1512
1513
1514
1515
1516
1517
1518
1519 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1520 {
1521 struct list_head *head = cpu_buffer->pages;
1522 struct buffer_page *bpage, *tmp;
1523
1524
1525 if (cpu_buffer->head_page)
1526 rb_set_head_page(cpu_buffer);
1527
1528 rb_head_page_deactivate(cpu_buffer);
1529
1530 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1531 return -1;
1532 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1533 return -1;
1534
1535 if (rb_check_list(cpu_buffer, head))
1536 return -1;
1537
1538 list_for_each_entry_safe(bpage, tmp, head, list) {
1539 if (RB_WARN_ON(cpu_buffer,
1540 bpage->list.next->prev != &bpage->list))
1541 return -1;
1542 if (RB_WARN_ON(cpu_buffer,
1543 bpage->list.prev->next != &bpage->list))
1544 return -1;
1545 if (rb_check_list(cpu_buffer, &bpage->list))
1546 return -1;
1547 }
1548
1549 rb_head_page_activate(cpu_buffer);
1550
1551 return 0;
1552 }
1553
1554 static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1555 long nr_pages, struct list_head *pages)
1556 {
1557 struct buffer_page *bpage, *tmp;
1558 bool user_thread = current->mm != NULL;
1559 gfp_t mflags;
1560 long i;
1561
1562
1563
1564
1565
1566
1567
1568
1569 i = si_mem_available();
1570 if (i < nr_pages)
1571 return -ENOMEM;
1572
1573
1574
1575
1576
1577
1578 mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589 if (user_thread)
1590 set_current_oom_origin();
1591 for (i = 0; i < nr_pages; i++) {
1592 struct page *page;
1593
1594 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1595 mflags, cpu_to_node(cpu_buffer->cpu));
1596 if (!bpage)
1597 goto free_pages;
1598
1599 rb_check_bpage(cpu_buffer, bpage);
1600
1601 list_add(&bpage->list, pages);
1602
1603 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0);
1604 if (!page)
1605 goto free_pages;
1606 bpage->page = page_address(page);
1607 rb_init_page(bpage->page);
1608
1609 if (user_thread && fatal_signal_pending(current))
1610 goto free_pages;
1611 }
1612 if (user_thread)
1613 clear_current_oom_origin();
1614
1615 return 0;
1616
1617 free_pages:
1618 list_for_each_entry_safe(bpage, tmp, pages, list) {
1619 list_del_init(&bpage->list);
1620 free_buffer_page(bpage);
1621 }
1622 if (user_thread)
1623 clear_current_oom_origin();
1624
1625 return -ENOMEM;
1626 }
1627
1628 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1629 unsigned long nr_pages)
1630 {
1631 LIST_HEAD(pages);
1632
1633 WARN_ON(!nr_pages);
1634
1635 if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages))
1636 return -ENOMEM;
1637
1638
1639
1640
1641
1642
1643 cpu_buffer->pages = pages.next;
1644 list_del(&pages);
1645
1646 cpu_buffer->nr_pages = nr_pages;
1647
1648 rb_check_pages(cpu_buffer);
1649
1650 return 0;
1651 }
1652
1653 static struct ring_buffer_per_cpu *
1654 rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
1655 {
1656 struct ring_buffer_per_cpu *cpu_buffer;
1657 struct buffer_page *bpage;
1658 struct page *page;
1659 int ret;
1660
1661 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1662 GFP_KERNEL, cpu_to_node(cpu));
1663 if (!cpu_buffer)
1664 return NULL;
1665
1666 cpu_buffer->cpu = cpu;
1667 cpu_buffer->buffer = buffer;
1668 raw_spin_lock_init(&cpu_buffer->reader_lock);
1669 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1670 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1671 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1672 init_completion(&cpu_buffer->update_done);
1673 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1674 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1675 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1676
1677 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1678 GFP_KERNEL, cpu_to_node(cpu));
1679 if (!bpage)
1680 goto fail_free_buffer;
1681
1682 rb_check_bpage(cpu_buffer, bpage);
1683
1684 cpu_buffer->reader_page = bpage;
1685 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1686 if (!page)
1687 goto fail_free_reader;
1688 bpage->page = page_address(page);
1689 rb_init_page(bpage->page);
1690
1691 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1692 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1693
1694 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1695 if (ret < 0)
1696 goto fail_free_reader;
1697
1698 cpu_buffer->head_page
1699 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1700 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1701
1702 rb_head_page_activate(cpu_buffer);
1703
1704 return cpu_buffer;
1705
1706 fail_free_reader:
1707 free_buffer_page(cpu_buffer->reader_page);
1708
1709 fail_free_buffer:
1710 kfree(cpu_buffer);
1711 return NULL;
1712 }
1713
1714 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1715 {
1716 struct list_head *head = cpu_buffer->pages;
1717 struct buffer_page *bpage, *tmp;
1718
1719 free_buffer_page(cpu_buffer->reader_page);
1720
1721 rb_head_page_deactivate(cpu_buffer);
1722
1723 if (head) {
1724 list_for_each_entry_safe(bpage, tmp, head, list) {
1725 list_del_init(&bpage->list);
1726 free_buffer_page(bpage);
1727 }
1728 bpage = list_entry(head, struct buffer_page, list);
1729 free_buffer_page(bpage);
1730 }
1731
1732 kfree(cpu_buffer);
1733 }
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746 struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1747 struct lock_class_key *key)
1748 {
1749 struct trace_buffer *buffer;
1750 long nr_pages;
1751 int bsize;
1752 int cpu;
1753 int ret;
1754
1755
1756 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1757 GFP_KERNEL);
1758 if (!buffer)
1759 return NULL;
1760
1761 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1762 goto fail_free_buffer;
1763
1764 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1765 buffer->flags = flags;
1766 buffer->clock = trace_clock_local;
1767 buffer->reader_lock_key = key;
1768
1769 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1770 init_waitqueue_head(&buffer->irq_work.waiters);
1771
1772
1773 if (nr_pages < 2)
1774 nr_pages = 2;
1775
1776 buffer->cpus = nr_cpu_ids;
1777
1778 bsize = sizeof(void *) * nr_cpu_ids;
1779 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1780 GFP_KERNEL);
1781 if (!buffer->buffers)
1782 goto fail_free_cpumask;
1783
1784 cpu = raw_smp_processor_id();
1785 cpumask_set_cpu(cpu, buffer->cpumask);
1786 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1787 if (!buffer->buffers[cpu])
1788 goto fail_free_buffers;
1789
1790 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1791 if (ret < 0)
1792 goto fail_free_buffers;
1793
1794 mutex_init(&buffer->mutex);
1795
1796 return buffer;
1797
1798 fail_free_buffers:
1799 for_each_buffer_cpu(buffer, cpu) {
1800 if (buffer->buffers[cpu])
1801 rb_free_cpu_buffer(buffer->buffers[cpu]);
1802 }
1803 kfree(buffer->buffers);
1804
1805 fail_free_cpumask:
1806 free_cpumask_var(buffer->cpumask);
1807
1808 fail_free_buffer:
1809 kfree(buffer);
1810 return NULL;
1811 }
1812 EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1813
1814
1815
1816
1817
1818 void
1819 ring_buffer_free(struct trace_buffer *buffer)
1820 {
1821 int cpu;
1822
1823 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1824
1825 for_each_buffer_cpu(buffer, cpu)
1826 rb_free_cpu_buffer(buffer->buffers[cpu]);
1827
1828 kfree(buffer->buffers);
1829 free_cpumask_var(buffer->cpumask);
1830
1831 kfree(buffer);
1832 }
1833 EXPORT_SYMBOL_GPL(ring_buffer_free);
1834
1835 void ring_buffer_set_clock(struct trace_buffer *buffer,
1836 u64 (*clock)(void))
1837 {
1838 buffer->clock = clock;
1839 }
1840
1841 void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs)
1842 {
1843 buffer->time_stamp_abs = abs;
1844 }
1845
1846 bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer)
1847 {
1848 return buffer->time_stamp_abs;
1849 }
1850
1851 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1852
1853 static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1854 {
1855 return local_read(&bpage->entries) & RB_WRITE_MASK;
1856 }
1857
1858 static inline unsigned long rb_page_write(struct buffer_page *bpage)
1859 {
1860 return local_read(&bpage->write) & RB_WRITE_MASK;
1861 }
1862
1863 static int
1864 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1865 {
1866 struct list_head *tail_page, *to_remove, *next_page;
1867 struct buffer_page *to_remove_page, *tmp_iter_page;
1868 struct buffer_page *last_page, *first_page;
1869 unsigned long nr_removed;
1870 unsigned long head_bit;
1871 int page_entries;
1872
1873 head_bit = 0;
1874
1875 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1876 atomic_inc(&cpu_buffer->record_disabled);
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886 tail_page = &cpu_buffer->tail_page->list;
1887
1888
1889
1890
1891
1892 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1893 tail_page = rb_list_head(tail_page->next);
1894 to_remove = tail_page;
1895
1896
1897 first_page = list_entry(rb_list_head(to_remove->next),
1898 struct buffer_page, list);
1899
1900 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1901 to_remove = rb_list_head(to_remove)->next;
1902 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1903 }
1904
1905 next_page = rb_list_head(to_remove)->next;
1906
1907
1908
1909
1910
1911
1912 tail_page->next = (struct list_head *)((unsigned long)next_page |
1913 head_bit);
1914 next_page = rb_list_head(next_page);
1915 next_page->prev = tail_page;
1916
1917
1918 cpu_buffer->pages = next_page;
1919
1920
1921 if (head_bit)
1922 cpu_buffer->head_page = list_entry(next_page,
1923 struct buffer_page, list);
1924
1925
1926
1927
1928
1929 cpu_buffer->read = 0;
1930
1931
1932 atomic_dec(&cpu_buffer->record_disabled);
1933 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1934
1935 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1936
1937
1938 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1939 list);
1940 tmp_iter_page = first_page;
1941
1942 do {
1943 cond_resched();
1944
1945 to_remove_page = tmp_iter_page;
1946 rb_inc_page(&tmp_iter_page);
1947
1948
1949 page_entries = rb_page_entries(to_remove_page);
1950 if (page_entries) {
1951
1952
1953
1954
1955
1956
1957 local_add(page_entries, &cpu_buffer->overrun);
1958 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1959 }
1960
1961
1962
1963
1964
1965 free_buffer_page(to_remove_page);
1966 nr_removed--;
1967
1968 } while (to_remove_page != last_page);
1969
1970 RB_WARN_ON(cpu_buffer, nr_removed);
1971
1972 return nr_removed == 0;
1973 }
1974
1975 static int
1976 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1977 {
1978 struct list_head *pages = &cpu_buffer->new_pages;
1979 int retries, success;
1980
1981 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996 retries = 10;
1997 success = 0;
1998 while (retries--) {
1999 struct list_head *head_page, *prev_page, *r;
2000 struct list_head *last_page, *first_page;
2001 struct list_head *head_page_with_bit;
2002
2003 head_page = &rb_set_head_page(cpu_buffer)->list;
2004 if (!head_page)
2005 break;
2006 prev_page = head_page->prev;
2007
2008 first_page = pages->next;
2009 last_page = pages->prev;
2010
2011 head_page_with_bit = (struct list_head *)
2012 ((unsigned long)head_page | RB_PAGE_HEAD);
2013
2014 last_page->next = head_page_with_bit;
2015 first_page->prev = prev_page;
2016
2017 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
2018
2019 if (r == head_page_with_bit) {
2020
2021
2022
2023
2024
2025 head_page->prev = last_page;
2026 success = 1;
2027 break;
2028 }
2029 }
2030
2031 if (success)
2032 INIT_LIST_HEAD(pages);
2033
2034
2035
2036
2037 RB_WARN_ON(cpu_buffer, !success);
2038 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
2039
2040
2041 if (!success) {
2042 struct buffer_page *bpage, *tmp;
2043 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
2044 list) {
2045 list_del_init(&bpage->list);
2046 free_buffer_page(bpage);
2047 }
2048 }
2049 return success;
2050 }
2051
2052 static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
2053 {
2054 int success;
2055
2056 if (cpu_buffer->nr_pages_to_update > 0)
2057 success = rb_insert_pages(cpu_buffer);
2058 else
2059 success = rb_remove_pages(cpu_buffer,
2060 -cpu_buffer->nr_pages_to_update);
2061
2062 if (success)
2063 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
2064 }
2065
2066 static void update_pages_handler(struct work_struct *work)
2067 {
2068 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
2069 struct ring_buffer_per_cpu, update_pages_work);
2070 rb_update_pages(cpu_buffer);
2071 complete(&cpu_buffer->update_done);
2072 }
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084 int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
2085 int cpu_id)
2086 {
2087 struct ring_buffer_per_cpu *cpu_buffer;
2088 unsigned long nr_pages;
2089 int cpu, err;
2090
2091
2092
2093
2094 if (!buffer)
2095 return 0;
2096
2097
2098 if (cpu_id != RING_BUFFER_ALL_CPUS &&
2099 !cpumask_test_cpu(cpu_id, buffer->cpumask))
2100 return 0;
2101
2102 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
2103
2104
2105 if (nr_pages < 2)
2106 nr_pages = 2;
2107
2108
2109 mutex_lock(&buffer->mutex);
2110
2111
2112 if (cpu_id == RING_BUFFER_ALL_CPUS) {
2113
2114
2115
2116
2117
2118 for_each_buffer_cpu(buffer, cpu) {
2119 cpu_buffer = buffer->buffers[cpu];
2120 if (atomic_read(&cpu_buffer->resize_disabled)) {
2121 err = -EBUSY;
2122 goto out_err_unlock;
2123 }
2124 }
2125
2126
2127 for_each_buffer_cpu(buffer, cpu) {
2128 cpu_buffer = buffer->buffers[cpu];
2129
2130 cpu_buffer->nr_pages_to_update = nr_pages -
2131 cpu_buffer->nr_pages;
2132
2133
2134
2135 if (cpu_buffer->nr_pages_to_update <= 0)
2136 continue;
2137
2138
2139
2140
2141 INIT_LIST_HEAD(&cpu_buffer->new_pages);
2142 if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
2143 &cpu_buffer->new_pages)) {
2144
2145 err = -ENOMEM;
2146 goto out_err;
2147 }
2148 }
2149
2150 cpus_read_lock();
2151
2152
2153
2154
2155
2156 for_each_buffer_cpu(buffer, cpu) {
2157 cpu_buffer = buffer->buffers[cpu];
2158 if (!cpu_buffer->nr_pages_to_update)
2159 continue;
2160
2161
2162 if (!cpu_online(cpu)) {
2163 rb_update_pages(cpu_buffer);
2164 cpu_buffer->nr_pages_to_update = 0;
2165 } else {
2166 schedule_work_on(cpu,
2167 &cpu_buffer->update_pages_work);
2168 }
2169 }
2170
2171
2172 for_each_buffer_cpu(buffer, cpu) {
2173 cpu_buffer = buffer->buffers[cpu];
2174 if (!cpu_buffer->nr_pages_to_update)
2175 continue;
2176
2177 if (cpu_online(cpu))
2178 wait_for_completion(&cpu_buffer->update_done);
2179 cpu_buffer->nr_pages_to_update = 0;
2180 }
2181
2182 cpus_read_unlock();
2183 } else {
2184 cpu_buffer = buffer->buffers[cpu_id];
2185
2186 if (nr_pages == cpu_buffer->nr_pages)
2187 goto out;
2188
2189
2190
2191
2192
2193
2194 if (atomic_read(&cpu_buffer->resize_disabled)) {
2195 err = -EBUSY;
2196 goto out_err_unlock;
2197 }
2198
2199 cpu_buffer->nr_pages_to_update = nr_pages -
2200 cpu_buffer->nr_pages;
2201
2202 INIT_LIST_HEAD(&cpu_buffer->new_pages);
2203 if (cpu_buffer->nr_pages_to_update > 0 &&
2204 __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
2205 &cpu_buffer->new_pages)) {
2206 err = -ENOMEM;
2207 goto out_err;
2208 }
2209
2210 cpus_read_lock();
2211
2212
2213 if (!cpu_online(cpu_id))
2214 rb_update_pages(cpu_buffer);
2215 else {
2216 schedule_work_on(cpu_id,
2217 &cpu_buffer->update_pages_work);
2218 wait_for_completion(&cpu_buffer->update_done);
2219 }
2220
2221 cpu_buffer->nr_pages_to_update = 0;
2222 cpus_read_unlock();
2223 }
2224
2225 out:
2226
2227
2228
2229
2230
2231
2232
2233 if (atomic_read(&buffer->record_disabled)) {
2234 atomic_inc(&buffer->record_disabled);
2235
2236
2237
2238
2239
2240
2241 synchronize_rcu();
2242 for_each_buffer_cpu(buffer, cpu) {
2243 cpu_buffer = buffer->buffers[cpu];
2244 rb_check_pages(cpu_buffer);
2245 }
2246 atomic_dec(&buffer->record_disabled);
2247 }
2248
2249 mutex_unlock(&buffer->mutex);
2250 return 0;
2251
2252 out_err:
2253 for_each_buffer_cpu(buffer, cpu) {
2254 struct buffer_page *bpage, *tmp;
2255
2256 cpu_buffer = buffer->buffers[cpu];
2257 cpu_buffer->nr_pages_to_update = 0;
2258
2259 if (list_empty(&cpu_buffer->new_pages))
2260 continue;
2261
2262 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
2263 list) {
2264 list_del_init(&bpage->list);
2265 free_buffer_page(bpage);
2266 }
2267 }
2268 out_err_unlock:
2269 mutex_unlock(&buffer->mutex);
2270 return err;
2271 }
2272 EXPORT_SYMBOL_GPL(ring_buffer_resize);
2273
2274 void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val)
2275 {
2276 mutex_lock(&buffer->mutex);
2277 if (val)
2278 buffer->flags |= RB_FL_OVERWRITE;
2279 else
2280 buffer->flags &= ~RB_FL_OVERWRITE;
2281 mutex_unlock(&buffer->mutex);
2282 }
2283 EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
2284
2285 static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
2286 {
2287 return bpage->page->data + index;
2288 }
2289
2290 static __always_inline struct ring_buffer_event *
2291 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
2292 {
2293 return __rb_page_index(cpu_buffer->reader_page,
2294 cpu_buffer->reader_page->read);
2295 }
2296
2297 static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
2298 {
2299 return local_read(&bpage->page->commit);
2300 }
2301
2302 static struct ring_buffer_event *
2303 rb_iter_head_event(struct ring_buffer_iter *iter)
2304 {
2305 struct ring_buffer_event *event;
2306 struct buffer_page *iter_head_page = iter->head_page;
2307 unsigned long commit;
2308 unsigned length;
2309
2310 if (iter->head != iter->next_event)
2311 return iter->event;
2312
2313
2314
2315
2316
2317
2318 commit = rb_page_commit(iter_head_page);
2319 smp_rmb();
2320 event = __rb_page_index(iter_head_page, iter->head);
2321 length = rb_event_length(event);
2322
2323
2324
2325
2326
2327 barrier();
2328
2329 if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
2330
2331 goto reset;
2332
2333 memcpy(iter->event, event, length);
2334
2335
2336
2337
2338 smp_rmb();
2339
2340
2341 if (iter->page_stamp != iter_head_page->page->time_stamp ||
2342 commit > rb_page_commit(iter_head_page))
2343 goto reset;
2344
2345 iter->next_event = iter->head + length;
2346 return iter->event;
2347 reset:
2348
2349 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
2350 iter->head = 0;
2351 iter->next_event = 0;
2352 iter->missed_events = 1;
2353 return NULL;
2354 }
2355
2356
2357 static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
2358 {
2359 return rb_page_commit(bpage);
2360 }
2361
2362 static __always_inline unsigned
2363 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
2364 {
2365 return rb_page_commit(cpu_buffer->commit_page);
2366 }
2367
2368 static __always_inline unsigned
2369 rb_event_index(struct ring_buffer_event *event)
2370 {
2371 unsigned long addr = (unsigned long)event;
2372
2373 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
2374 }
2375
2376 static void rb_inc_iter(struct ring_buffer_iter *iter)
2377 {
2378 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2379
2380
2381
2382
2383
2384
2385
2386 if (iter->head_page == cpu_buffer->reader_page)
2387 iter->head_page = rb_set_head_page(cpu_buffer);
2388 else
2389 rb_inc_page(&iter->head_page);
2390
2391 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
2392 iter->head = 0;
2393 iter->next_event = 0;
2394 }
2395
2396
2397
2398
2399
2400
2401
2402
2403 static int
2404 rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
2405 struct buffer_page *tail_page,
2406 struct buffer_page *next_page)
2407 {
2408 struct buffer_page *new_head;
2409 int entries;
2410 int type;
2411 int ret;
2412
2413 entries = rb_page_entries(next_page);
2414
2415
2416
2417
2418
2419
2420 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
2421 RB_PAGE_HEAD);
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434 switch (type) {
2435 case RB_PAGE_HEAD:
2436
2437
2438
2439
2440
2441 local_add(entries, &cpu_buffer->overrun);
2442 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
2443
2444
2445
2446
2447
2448
2449
2450 break;
2451
2452 case RB_PAGE_UPDATE:
2453
2454
2455
2456
2457 break;
2458 case RB_PAGE_NORMAL:
2459
2460
2461
2462
2463
2464 return 1;
2465 case RB_PAGE_MOVED:
2466
2467
2468
2469
2470
2471 return 1;
2472 default:
2473 RB_WARN_ON(cpu_buffer, 1);
2474 return -1;
2475 }
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491 new_head = next_page;
2492 rb_inc_page(&new_head);
2493
2494 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2495 RB_PAGE_NORMAL);
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505 switch (ret) {
2506 case RB_PAGE_HEAD:
2507 case RB_PAGE_NORMAL:
2508
2509 break;
2510 default:
2511 RB_WARN_ON(cpu_buffer, 1);
2512 return -1;
2513 }
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525 if (ret == RB_PAGE_NORMAL) {
2526 struct buffer_page *buffer_tail_page;
2527
2528 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2529
2530
2531
2532
2533 if (buffer_tail_page != tail_page &&
2534 buffer_tail_page != next_page)
2535 rb_head_page_set_normal(cpu_buffer, new_head,
2536 next_page,
2537 RB_PAGE_HEAD);
2538 }
2539
2540
2541
2542
2543
2544
2545 if (type == RB_PAGE_HEAD) {
2546 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2547 tail_page,
2548 RB_PAGE_UPDATE);
2549 if (RB_WARN_ON(cpu_buffer,
2550 ret != RB_PAGE_UPDATE))
2551 return -1;
2552 }
2553
2554 return 0;
2555 }
2556
2557 static inline void
2558 rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2559 unsigned long tail, struct rb_event_info *info)
2560 {
2561 struct buffer_page *tail_page = info->tail_page;
2562 struct ring_buffer_event *event;
2563 unsigned long length = info->length;
2564
2565
2566
2567
2568
2569 if (tail >= BUF_PAGE_SIZE) {
2570
2571
2572
2573
2574
2575 if (tail == BUF_PAGE_SIZE)
2576 tail_page->real_end = 0;
2577
2578 local_sub(length, &tail_page->write);
2579 return;
2580 }
2581
2582 event = __rb_page_index(tail_page, tail);
2583
2584
2585 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2586
2587
2588
2589
2590
2591
2592 tail_page->real_end = tail;
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2606
2607
2608
2609 rb_event_set_padding(event);
2610
2611
2612 local_sub(length, &tail_page->write);
2613 return;
2614 }
2615
2616
2617 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2618 event->type_len = RINGBUF_TYPE_PADDING;
2619
2620 event->time_delta = 1;
2621
2622
2623 length = (tail + length) - BUF_PAGE_SIZE;
2624 local_sub(length, &tail_page->write);
2625 }
2626
2627 static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2628
2629
2630
2631
2632 static noinline struct ring_buffer_event *
2633 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2634 unsigned long tail, struct rb_event_info *info)
2635 {
2636 struct buffer_page *tail_page = info->tail_page;
2637 struct buffer_page *commit_page = cpu_buffer->commit_page;
2638 struct trace_buffer *buffer = cpu_buffer->buffer;
2639 struct buffer_page *next_page;
2640 int ret;
2641
2642 next_page = tail_page;
2643
2644 rb_inc_page(&next_page);
2645
2646
2647
2648
2649
2650
2651 if (unlikely(next_page == commit_page)) {
2652 local_inc(&cpu_buffer->commit_overrun);
2653 goto out_reset;
2654 }
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670 if (rb_is_head_page(next_page, &tail_page->list)) {
2671
2672
2673
2674
2675
2676 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2677
2678
2679
2680
2681 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2682 local_inc(&cpu_buffer->dropped_events);
2683 goto out_reset;
2684 }
2685
2686 ret = rb_handle_head_page(cpu_buffer,
2687 tail_page,
2688 next_page);
2689 if (ret < 0)
2690 goto out_reset;
2691 if (ret)
2692 goto out_again;
2693 } else {
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704 if (unlikely((cpu_buffer->commit_page !=
2705 cpu_buffer->tail_page) &&
2706 (cpu_buffer->commit_page ==
2707 cpu_buffer->reader_page))) {
2708 local_inc(&cpu_buffer->commit_overrun);
2709 goto out_reset;
2710 }
2711 }
2712 }
2713
2714 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2715
2716 out_again:
2717
2718 rb_reset_tail(cpu_buffer, tail, info);
2719
2720
2721 rb_end_commit(cpu_buffer);
2722
2723 local_inc(&cpu_buffer->committing);
2724
2725
2726 return ERR_PTR(-EAGAIN);
2727
2728 out_reset:
2729
2730 rb_reset_tail(cpu_buffer, tail, info);
2731
2732 return NULL;
2733 }
2734
2735
2736 static struct ring_buffer_event *
2737 rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2738 {
2739 if (abs)
2740 event->type_len = RINGBUF_TYPE_TIME_STAMP;
2741 else
2742 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2743
2744
2745 if (abs || rb_event_index(event)) {
2746 event->time_delta = delta & TS_MASK;
2747 event->array[0] = delta >> TS_SHIFT;
2748 } else {
2749
2750 event->time_delta = 0;
2751 event->array[0] = 0;
2752 }
2753
2754 return skip_time_extend(event);
2755 }
2756
2757 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2758 static inline bool sched_clock_stable(void)
2759 {
2760 return true;
2761 }
2762 #endif
2763
2764 static void
2765 rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2766 struct rb_event_info *info)
2767 {
2768 u64 write_stamp;
2769
2770 WARN_ONCE(1, "Delta way too big! %llu ts=%llu before=%llu after=%llu write stamp=%llu\n%s",
2771 (unsigned long long)info->delta,
2772 (unsigned long long)info->ts,
2773 (unsigned long long)info->before,
2774 (unsigned long long)info->after,
2775 (unsigned long long)(rb_time_read(&cpu_buffer->write_stamp, &write_stamp) ? write_stamp : 0),
2776 sched_clock_stable() ? "" :
2777 "If you just came from a suspend/resume,\n"
2778 "please switch to the trace global clock:\n"
2779 " echo global > /sys/kernel/debug/tracing/trace_clock\n"
2780 "or add trace_clock=global to the kernel command line\n");
2781 }
2782
2783 static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2784 struct ring_buffer_event **event,
2785 struct rb_event_info *info,
2786 u64 *delta,
2787 unsigned int *length)
2788 {
2789 bool abs = info->add_timestamp &
2790 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE);
2791
2792 if (unlikely(info->delta > (1ULL << 59))) {
2793
2794
2795
2796
2797 if (abs && (info->ts & TS_MSB)) {
2798 info->delta &= ABS_TS_MASK;
2799
2800
2801 } else if (info->before == info->after && info->before > info->ts) {
2802
2803 static int once;
2804
2805
2806
2807
2808
2809 if (!once) {
2810 once++;
2811 pr_warn("Ring buffer clock went backwards: %llu -> %llu\n",
2812 info->before, info->ts);
2813 }
2814 } else
2815 rb_check_timestamp(cpu_buffer, info);
2816 if (!abs)
2817 info->delta = 0;
2818 }
2819 *event = rb_add_time_stamp(*event, info->delta, abs);
2820 *length -= RB_LEN_TIME_EXTEND;
2821 *delta = 0;
2822 }
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835 static void
2836 rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2837 struct ring_buffer_event *event,
2838 struct rb_event_info *info)
2839 {
2840 unsigned length = info->length;
2841 u64 delta = info->delta;
2842 unsigned int nest = local_read(&cpu_buffer->committing) - 1;
2843
2844 if (!WARN_ON_ONCE(nest >= MAX_NEST))
2845 cpu_buffer->event_stamp[nest] = info->ts;
2846
2847
2848
2849
2850
2851 if (unlikely(info->add_timestamp))
2852 rb_add_timestamp(cpu_buffer, &event, info, &delta, &length);
2853
2854 event->time_delta = delta;
2855 length -= RB_EVNT_HDR_SIZE;
2856 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2857 event->type_len = 0;
2858 event->array[0] = length;
2859 } else
2860 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2861 }
2862
2863 static unsigned rb_calculate_event_length(unsigned length)
2864 {
2865 struct ring_buffer_event event;
2866
2867
2868 if (!length)
2869 length++;
2870
2871 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2872 length += sizeof(event.array[0]);
2873
2874 length += RB_EVNT_HDR_SIZE;
2875 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2890 length += RB_ALIGNMENT;
2891
2892 return length;
2893 }
2894
2895 static u64 rb_time_delta(struct ring_buffer_event *event)
2896 {
2897 switch (event->type_len) {
2898 case RINGBUF_TYPE_PADDING:
2899 return 0;
2900
2901 case RINGBUF_TYPE_TIME_EXTEND:
2902 return rb_event_time_stamp(event);
2903
2904 case RINGBUF_TYPE_TIME_STAMP:
2905 return 0;
2906
2907 case RINGBUF_TYPE_DATA:
2908 return event->time_delta;
2909 default:
2910 return 0;
2911 }
2912 }
2913
2914 static inline int
2915 rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2916 struct ring_buffer_event *event)
2917 {
2918 unsigned long new_index, old_index;
2919 struct buffer_page *bpage;
2920 unsigned long index;
2921 unsigned long addr;
2922 u64 write_stamp;
2923 u64 delta;
2924
2925 new_index = rb_event_index(event);
2926 old_index = new_index + rb_event_ts_length(event);
2927 addr = (unsigned long)event;
2928 addr &= PAGE_MASK;
2929
2930 bpage = READ_ONCE(cpu_buffer->tail_page);
2931
2932 delta = rb_time_delta(event);
2933
2934 if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp))
2935 return 0;
2936
2937
2938 barrier();
2939
2940 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2941 unsigned long write_mask =
2942 local_read(&bpage->write) & ~RB_WRITE_MASK;
2943 unsigned long event_length = rb_event_length(event);
2944
2945
2946 if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
2947 write_stamp, write_stamp - delta))
2948 return 0;
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958 if (!delta)
2959 rb_time_set(&cpu_buffer->before_stamp, 0);
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975 old_index += write_mask;
2976 new_index += write_mask;
2977 index = local_cmpxchg(&bpage->write, old_index, new_index);
2978 if (index == old_index) {
2979
2980 local_sub(event_length, &cpu_buffer->entries_bytes);
2981 return 1;
2982 }
2983 }
2984
2985
2986 return 0;
2987 }
2988
2989 static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2990 {
2991 local_inc(&cpu_buffer->committing);
2992 local_inc(&cpu_buffer->commits);
2993 }
2994
2995 static __always_inline void
2996 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2997 {
2998 unsigned long max_count;
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008 again:
3009 max_count = cpu_buffer->nr_pages * 100;
3010
3011 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
3012 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
3013 return;
3014 if (RB_WARN_ON(cpu_buffer,
3015 rb_is_reader_page(cpu_buffer->tail_page)))
3016 return;
3017 local_set(&cpu_buffer->commit_page->page->commit,
3018 rb_page_write(cpu_buffer->commit_page));
3019 rb_inc_page(&cpu_buffer->commit_page);
3020
3021 barrier();
3022 }
3023 while (rb_commit_index(cpu_buffer) !=
3024 rb_page_write(cpu_buffer->commit_page)) {
3025
3026 local_set(&cpu_buffer->commit_page->page->commit,
3027 rb_page_write(cpu_buffer->commit_page));
3028 RB_WARN_ON(cpu_buffer,
3029 local_read(&cpu_buffer->commit_page->page->commit) &
3030 ~RB_WRITE_MASK);
3031 barrier();
3032 }
3033
3034
3035 barrier();
3036
3037
3038
3039
3040
3041
3042 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
3043 goto again;
3044 }
3045
3046 static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
3047 {
3048 unsigned long commits;
3049
3050 if (RB_WARN_ON(cpu_buffer,
3051 !local_read(&cpu_buffer->committing)))
3052 return;
3053
3054 again:
3055 commits = local_read(&cpu_buffer->commits);
3056
3057 barrier();
3058 if (local_read(&cpu_buffer->committing) == 1)
3059 rb_set_commit_to_write(cpu_buffer);
3060
3061 local_dec(&cpu_buffer->committing);
3062
3063
3064 barrier();
3065
3066
3067
3068
3069
3070
3071 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
3072 !local_read(&cpu_buffer->committing)) {
3073 local_inc(&cpu_buffer->committing);
3074 goto again;
3075 }
3076 }
3077
3078 static inline void rb_event_discard(struct ring_buffer_event *event)
3079 {
3080 if (extended_time(event))
3081 event = skip_time_extend(event);
3082
3083
3084 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
3085 event->type_len = RINGBUF_TYPE_PADDING;
3086
3087 if (!event->time_delta)
3088 event->time_delta = 1;
3089 }
3090
3091 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
3092 struct ring_buffer_event *event)
3093 {
3094 local_inc(&cpu_buffer->entries);
3095 rb_end_commit(cpu_buffer);
3096 }
3097
3098 static __always_inline void
3099 rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
3100 {
3101 size_t nr_pages;
3102 size_t dirty;
3103 size_t full;
3104
3105 if (buffer->irq_work.waiters_pending) {
3106 buffer->irq_work.waiters_pending = false;
3107
3108 irq_work_queue(&buffer->irq_work.work);
3109 }
3110
3111 if (cpu_buffer->irq_work.waiters_pending) {
3112 cpu_buffer->irq_work.waiters_pending = false;
3113
3114 irq_work_queue(&cpu_buffer->irq_work.work);
3115 }
3116
3117 if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
3118 return;
3119
3120 if (cpu_buffer->reader_page == cpu_buffer->commit_page)
3121 return;
3122
3123 if (!cpu_buffer->irq_work.full_waiters_pending)
3124 return;
3125
3126 cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
3127
3128 full = cpu_buffer->shortest_full;
3129 nr_pages = cpu_buffer->nr_pages;
3130 dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
3131 if (full && nr_pages && (dirty * 100) <= full * nr_pages)
3132 return;
3133
3134 cpu_buffer->irq_work.wakeup_full = true;
3135 cpu_buffer->irq_work.full_waiters_pending = false;
3136
3137 irq_work_queue(&cpu_buffer->irq_work.work);
3138 }
3139
3140 #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
3141 # define do_ring_buffer_record_recursion() \
3142 do_ftrace_record_recursion(_THIS_IP_, _RET_IP_)
3143 #else
3144 # define do_ring_buffer_record_recursion() do { } while (0)
3145 #endif
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209 static __always_inline int
3210 trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
3211 {
3212 unsigned int val = cpu_buffer->current_context;
3213 int bit = interrupt_context_level();
3214
3215 bit = RB_CTX_NORMAL - bit;
3216
3217 if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
3218
3219
3220
3221
3222
3223 bit = RB_CTX_TRANSITION;
3224 if (val & (1 << (bit + cpu_buffer->nest))) {
3225 do_ring_buffer_record_recursion();
3226 return 1;
3227 }
3228 }
3229
3230 val |= (1 << (bit + cpu_buffer->nest));
3231 cpu_buffer->current_context = val;
3232
3233 return 0;
3234 }
3235
3236 static __always_inline void
3237 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
3238 {
3239 cpu_buffer->current_context &=
3240 cpu_buffer->current_context - (1 << cpu_buffer->nest);
3241 }
3242
3243
3244 #define NESTED_BITS 5
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259 void ring_buffer_nest_start(struct trace_buffer *buffer)
3260 {
3261 struct ring_buffer_per_cpu *cpu_buffer;
3262 int cpu;
3263
3264
3265 preempt_disable_notrace();
3266 cpu = raw_smp_processor_id();
3267 cpu_buffer = buffer->buffers[cpu];
3268
3269 cpu_buffer->nest += NESTED_BITS;
3270 }
3271
3272
3273
3274
3275
3276
3277
3278
3279 void ring_buffer_nest_end(struct trace_buffer *buffer)
3280 {
3281 struct ring_buffer_per_cpu *cpu_buffer;
3282 int cpu;
3283
3284
3285 cpu = raw_smp_processor_id();
3286 cpu_buffer = buffer->buffers[cpu];
3287
3288 cpu_buffer->nest -= NESTED_BITS;
3289 preempt_enable_notrace();
3290 }
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301 int ring_buffer_unlock_commit(struct trace_buffer *buffer,
3302 struct ring_buffer_event *event)
3303 {
3304 struct ring_buffer_per_cpu *cpu_buffer;
3305 int cpu = raw_smp_processor_id();
3306
3307 cpu_buffer = buffer->buffers[cpu];
3308
3309 rb_commit(cpu_buffer, event);
3310
3311 rb_wakeups(buffer, cpu_buffer);
3312
3313 trace_recursive_unlock(cpu_buffer);
3314
3315 preempt_enable_notrace();
3316
3317 return 0;
3318 }
3319 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
3320
3321
3322 #define CHECK_FULL_PAGE 1L
3323
3324 #ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS
3325 static void dump_buffer_page(struct buffer_data_page *bpage,
3326 struct rb_event_info *info,
3327 unsigned long tail)
3328 {
3329 struct ring_buffer_event *event;
3330 u64 ts, delta;
3331 int e;
3332
3333 ts = bpage->time_stamp;
3334 pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
3335
3336 for (e = 0; e < tail; e += rb_event_length(event)) {
3337
3338 event = (struct ring_buffer_event *)(bpage->data + e);
3339
3340 switch (event->type_len) {
3341
3342 case RINGBUF_TYPE_TIME_EXTEND:
3343 delta = rb_event_time_stamp(event);
3344 ts += delta;
3345 pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
3346 break;
3347
3348 case RINGBUF_TYPE_TIME_STAMP:
3349 delta = rb_event_time_stamp(event);
3350 ts = rb_fix_abs_ts(delta, ts);
3351 pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
3352 break;
3353
3354 case RINGBUF_TYPE_PADDING:
3355 ts += event->time_delta;
3356 pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta);
3357 break;
3358
3359 case RINGBUF_TYPE_DATA:
3360 ts += event->time_delta;
3361 pr_warn(" [%lld] delta:%d\n", ts, event->time_delta);
3362 break;
3363
3364 default:
3365 break;
3366 }
3367 }
3368 }
3369
3370 static DEFINE_PER_CPU(atomic_t, checking);
3371 static atomic_t ts_dump;
3372
3373
3374
3375
3376
3377 static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
3378 struct rb_event_info *info,
3379 unsigned long tail)
3380 {
3381 struct ring_buffer_event *event;
3382 struct buffer_data_page *bpage;
3383 u64 ts, delta;
3384 bool full = false;
3385 int e;
3386
3387 bpage = info->tail_page->page;
3388
3389 if (tail == CHECK_FULL_PAGE) {
3390 full = true;
3391 tail = local_read(&bpage->commit);
3392 } else if (info->add_timestamp &
3393 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
3394
3395 return;
3396 }
3397
3398
3399
3400
3401
3402 if (tail <= 8 || tail > local_read(&bpage->commit))
3403 return;
3404
3405
3406
3407
3408 if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
3409 goto out;
3410
3411 ts = bpage->time_stamp;
3412
3413 for (e = 0; e < tail; e += rb_event_length(event)) {
3414
3415 event = (struct ring_buffer_event *)(bpage->data + e);
3416
3417 switch (event->type_len) {
3418
3419 case RINGBUF_TYPE_TIME_EXTEND:
3420 delta = rb_event_time_stamp(event);
3421 ts += delta;
3422 break;
3423
3424 case RINGBUF_TYPE_TIME_STAMP:
3425 delta = rb_event_time_stamp(event);
3426 ts = rb_fix_abs_ts(delta, ts);
3427 break;
3428
3429 case RINGBUF_TYPE_PADDING:
3430 if (event->time_delta == 1)
3431 break;
3432 fallthrough;
3433 case RINGBUF_TYPE_DATA:
3434 ts += event->time_delta;
3435 break;
3436
3437 default:
3438 RB_WARN_ON(cpu_buffer, 1);
3439 }
3440 }
3441 if ((full && ts > info->ts) ||
3442 (!full && ts + info->delta != info->ts)) {
3443
3444 if (atomic_inc_return(&ts_dump) != 1) {
3445 atomic_dec(&ts_dump);
3446 goto out;
3447 }
3448 atomic_inc(&cpu_buffer->record_disabled);
3449
3450 WARN_ON_ONCE(system_state != SYSTEM_BOOTING);
3451 pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n",
3452 cpu_buffer->cpu,
3453 ts + info->delta, info->ts, info->delta,
3454 info->before, info->after,
3455 full ? " (full)" : "");
3456 dump_buffer_page(bpage, info, tail);
3457 atomic_dec(&ts_dump);
3458
3459 return;
3460 }
3461 out:
3462 atomic_dec(this_cpu_ptr(&checking));
3463 }
3464 #else
3465 static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
3466 struct rb_event_info *info,
3467 unsigned long tail)
3468 {
3469 }
3470 #endif
3471
3472 static struct ring_buffer_event *
3473 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
3474 struct rb_event_info *info)
3475 {
3476 struct ring_buffer_event *event;
3477 struct buffer_page *tail_page;
3478 unsigned long tail, write, w;
3479 bool a_ok;
3480 bool b_ok;
3481
3482
3483 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
3484
3485 w = local_read(&tail_page->write) & RB_WRITE_MASK;
3486 barrier();
3487 b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
3488 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3489 barrier();
3490 info->ts = rb_time_stamp(cpu_buffer->buffer);
3491
3492 if ((info->add_timestamp & RB_ADD_STAMP_ABSOLUTE)) {
3493 info->delta = info->ts;
3494 } else {
3495
3496
3497
3498
3499
3500 if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) {
3501 info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND;
3502 info->length += RB_LEN_TIME_EXTEND;
3503 } else {
3504 info->delta = info->ts - info->after;
3505 if (unlikely(test_time_stamp(info->delta))) {
3506 info->add_timestamp |= RB_ADD_STAMP_EXTEND;
3507 info->length += RB_LEN_TIME_EXTEND;
3508 }
3509 }
3510 }
3511
3512 rb_time_set(&cpu_buffer->before_stamp, info->ts);
3513
3514 write = local_add_return(info->length, &tail_page->write);
3515
3516
3517 write &= RB_WRITE_MASK;
3518
3519 tail = write - info->length;
3520
3521
3522 if (unlikely(write > BUF_PAGE_SIZE)) {
3523
3524 b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
3525 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3526 if (a_ok && b_ok && info->before != info->after)
3527 (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
3528 info->before, info->after);
3529 if (a_ok && b_ok)
3530 check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
3531 return rb_move_tail(cpu_buffer, tail, info);
3532 }
3533
3534 if (likely(tail == w)) {
3535 u64 save_before;
3536 bool s_ok;
3537
3538
3539 rb_time_set(&cpu_buffer->write_stamp, info->ts);
3540 barrier();
3541 s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before);
3542 RB_WARN_ON(cpu_buffer, !s_ok);
3543 if (likely(!(info->add_timestamp &
3544 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
3545
3546 info->delta = info->ts - info->after;
3547 else
3548
3549 info->delta = info->ts;
3550 barrier();
3551 check_buffer(cpu_buffer, info, tail);
3552 if (unlikely(info->ts != save_before)) {
3553
3554
3555 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3556 RB_WARN_ON(cpu_buffer, !a_ok);
3557
3558
3559 if (save_before > info->after) {
3560
3561
3562
3563
3564 (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
3565 info->after, save_before);
3566 }
3567 }
3568 } else {
3569 u64 ts;
3570
3571 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3572
3573 RB_WARN_ON(cpu_buffer, !a_ok);
3574 ts = rb_time_stamp(cpu_buffer->buffer);
3575 barrier();
3576 if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
3577 info->after < ts &&
3578 rb_time_cmpxchg(&cpu_buffer->write_stamp,
3579 info->after, ts)) {
3580
3581 info->delta = ts - info->after;
3582 } else {
3583
3584
3585
3586
3587
3588
3589
3590
3591 info->delta = 0;
3592 }
3593 info->ts = ts;
3594 info->add_timestamp &= ~RB_ADD_STAMP_FORCE;
3595 }
3596
3597
3598
3599
3600
3601 if (unlikely(!tail && !(info->add_timestamp &
3602 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
3603 info->delta = 0;
3604
3605
3606
3607 event = __rb_page_index(tail_page, tail);
3608 rb_update_event(cpu_buffer, event, info);
3609
3610 local_inc(&tail_page->entries);
3611
3612
3613
3614
3615
3616 if (unlikely(!tail))
3617 tail_page->page->time_stamp = info->ts;
3618
3619
3620 local_add(info->length, &cpu_buffer->entries_bytes);
3621
3622 return event;
3623 }
3624
3625 static __always_inline struct ring_buffer_event *
3626 rb_reserve_next_event(struct trace_buffer *buffer,
3627 struct ring_buffer_per_cpu *cpu_buffer,
3628 unsigned long length)
3629 {
3630 struct ring_buffer_event *event;
3631 struct rb_event_info info;
3632 int nr_loops = 0;
3633 int add_ts_default;
3634
3635 rb_start_commit(cpu_buffer);
3636
3637
3638 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3639
3640
3641
3642
3643
3644
3645 barrier();
3646 if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
3647 local_dec(&cpu_buffer->committing);
3648 local_dec(&cpu_buffer->commits);
3649 return NULL;
3650 }
3651 #endif
3652
3653 info.length = rb_calculate_event_length(length);
3654
3655 if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
3656 add_ts_default = RB_ADD_STAMP_ABSOLUTE;
3657 info.length += RB_LEN_TIME_EXTEND;
3658 } else {
3659 add_ts_default = RB_ADD_STAMP_NONE;
3660 }
3661
3662 again:
3663 info.add_timestamp = add_ts_default;
3664 info.delta = 0;
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
3676 goto out_fail;
3677
3678 event = __rb_reserve_next(cpu_buffer, &info);
3679
3680 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
3681 if (info.add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND))
3682 info.length -= RB_LEN_TIME_EXTEND;
3683 goto again;
3684 }
3685
3686 if (likely(event))
3687 return event;
3688 out_fail:
3689 rb_end_commit(cpu_buffer);
3690 return NULL;
3691 }
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708 struct ring_buffer_event *
3709 ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
3710 {
3711 struct ring_buffer_per_cpu *cpu_buffer;
3712 struct ring_buffer_event *event;
3713 int cpu;
3714
3715
3716 preempt_disable_notrace();
3717
3718 if (unlikely(atomic_read(&buffer->record_disabled)))
3719 goto out;
3720
3721 cpu = raw_smp_processor_id();
3722
3723 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
3724 goto out;
3725
3726 cpu_buffer = buffer->buffers[cpu];
3727
3728 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
3729 goto out;
3730
3731 if (unlikely(length > BUF_MAX_DATA_SIZE))
3732 goto out;
3733
3734 if (unlikely(trace_recursive_lock(cpu_buffer)))
3735 goto out;
3736
3737 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3738 if (!event)
3739 goto out_unlock;
3740
3741 return event;
3742
3743 out_unlock:
3744 trace_recursive_unlock(cpu_buffer);
3745 out:
3746 preempt_enable_notrace();
3747 return NULL;
3748 }
3749 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
3750
3751
3752
3753
3754
3755
3756
3757 static inline void
3758 rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
3759 struct ring_buffer_event *event)
3760 {
3761 unsigned long addr = (unsigned long)event;
3762 struct buffer_page *bpage = cpu_buffer->commit_page;
3763 struct buffer_page *start;
3764
3765 addr &= PAGE_MASK;
3766
3767
3768 if (likely(bpage->page == (void *)addr)) {
3769 local_dec(&bpage->entries);
3770 return;
3771 }
3772
3773
3774
3775
3776
3777 rb_inc_page(&bpage);
3778 start = bpage;
3779 do {
3780 if (bpage->page == (void *)addr) {
3781 local_dec(&bpage->entries);
3782 return;
3783 }
3784 rb_inc_page(&bpage);
3785 } while (bpage != start);
3786
3787
3788 RB_WARN_ON(cpu_buffer, 1);
3789 }
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810 void ring_buffer_discard_commit(struct trace_buffer *buffer,
3811 struct ring_buffer_event *event)
3812 {
3813 struct ring_buffer_per_cpu *cpu_buffer;
3814 int cpu;
3815
3816
3817 rb_event_discard(event);
3818
3819 cpu = smp_processor_id();
3820 cpu_buffer = buffer->buffers[cpu];
3821
3822
3823
3824
3825
3826
3827 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
3828
3829 rb_decrement_entry(cpu_buffer, event);
3830 if (rb_try_to_discard(cpu_buffer, event))
3831 goto out;
3832
3833 out:
3834 rb_end_commit(cpu_buffer);
3835
3836 trace_recursive_unlock(cpu_buffer);
3837
3838 preempt_enable_notrace();
3839
3840 }
3841 EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856 int ring_buffer_write(struct trace_buffer *buffer,
3857 unsigned long length,
3858 void *data)
3859 {
3860 struct ring_buffer_per_cpu *cpu_buffer;
3861 struct ring_buffer_event *event;
3862 void *body;
3863 int ret = -EBUSY;
3864 int cpu;
3865
3866 preempt_disable_notrace();
3867
3868 if (atomic_read(&buffer->record_disabled))
3869 goto out;
3870
3871 cpu = raw_smp_processor_id();
3872
3873 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3874 goto out;
3875
3876 cpu_buffer = buffer->buffers[cpu];
3877
3878 if (atomic_read(&cpu_buffer->record_disabled))
3879 goto out;
3880
3881 if (length > BUF_MAX_DATA_SIZE)
3882 goto out;
3883
3884 if (unlikely(trace_recursive_lock(cpu_buffer)))
3885 goto out;
3886
3887 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3888 if (!event)
3889 goto out_unlock;
3890
3891 body = rb_event_data(event);
3892
3893 memcpy(body, data, length);
3894
3895 rb_commit(cpu_buffer, event);
3896
3897 rb_wakeups(buffer, cpu_buffer);
3898
3899 ret = 0;
3900
3901 out_unlock:
3902 trace_recursive_unlock(cpu_buffer);
3903
3904 out:
3905 preempt_enable_notrace();
3906
3907 return ret;
3908 }
3909 EXPORT_SYMBOL_GPL(ring_buffer_write);
3910
3911 static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3912 {
3913 struct buffer_page *reader = cpu_buffer->reader_page;
3914 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3915 struct buffer_page *commit = cpu_buffer->commit_page;
3916
3917
3918 if (unlikely(!head))
3919 return true;
3920
3921
3922 if (reader->read != rb_page_commit(reader))
3923 return false;
3924
3925
3926
3927
3928
3929 if (commit == reader)
3930 return true;
3931
3932
3933
3934
3935
3936 if (commit != head)
3937 return false;
3938
3939
3940
3941
3942
3943
3944 return rb_page_commit(commit) == 0;
3945 }
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956 void ring_buffer_record_disable(struct trace_buffer *buffer)
3957 {
3958 atomic_inc(&buffer->record_disabled);
3959 }
3960 EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3961
3962
3963
3964
3965
3966
3967
3968
3969 void ring_buffer_record_enable(struct trace_buffer *buffer)
3970 {
3971 atomic_dec(&buffer->record_disabled);
3972 }
3973 EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986 void ring_buffer_record_off(struct trace_buffer *buffer)
3987 {
3988 unsigned int rd;
3989 unsigned int new_rd;
3990
3991 do {
3992 rd = atomic_read(&buffer->record_disabled);
3993 new_rd = rd | RB_BUFFER_OFF;
3994 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3995 }
3996 EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009 void ring_buffer_record_on(struct trace_buffer *buffer)
4010 {
4011 unsigned int rd;
4012 unsigned int new_rd;
4013
4014 do {
4015 rd = atomic_read(&buffer->record_disabled);
4016 new_rd = rd & ~RB_BUFFER_OFF;
4017 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
4018 }
4019 EXPORT_SYMBOL_GPL(ring_buffer_record_on);
4020
4021
4022
4023
4024
4025
4026
4027 bool ring_buffer_record_is_on(struct trace_buffer *buffer)
4028 {
4029 return !atomic_read(&buffer->record_disabled);
4030 }
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043 bool ring_buffer_record_is_set_on(struct trace_buffer *buffer)
4044 {
4045 return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
4046 }
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058 void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu)
4059 {
4060 struct ring_buffer_per_cpu *cpu_buffer;
4061
4062 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4063 return;
4064
4065 cpu_buffer = buffer->buffers[cpu];
4066 atomic_inc(&cpu_buffer->record_disabled);
4067 }
4068 EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078 void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
4079 {
4080 struct ring_buffer_per_cpu *cpu_buffer;
4081
4082 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4083 return;
4084
4085 cpu_buffer = buffer->buffers[cpu];
4086 atomic_dec(&cpu_buffer->record_disabled);
4087 }
4088 EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
4089
4090
4091
4092
4093
4094
4095
4096 static inline unsigned long
4097 rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
4098 {
4099 return local_read(&cpu_buffer->entries) -
4100 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
4101 }
4102
4103
4104
4105
4106
4107
4108 u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
4109 {
4110 unsigned long flags;
4111 struct ring_buffer_per_cpu *cpu_buffer;
4112 struct buffer_page *bpage;
4113 u64 ret = 0;
4114
4115 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4116 return 0;
4117
4118 cpu_buffer = buffer->buffers[cpu];
4119 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4120
4121
4122
4123
4124 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
4125 bpage = cpu_buffer->reader_page;
4126 else
4127 bpage = rb_set_head_page(cpu_buffer);
4128 if (bpage)
4129 ret = bpage->page->time_stamp;
4130 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4131
4132 return ret;
4133 }
4134 EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
4135
4136
4137
4138
4139
4140
4141 unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu)
4142 {
4143 struct ring_buffer_per_cpu *cpu_buffer;
4144 unsigned long ret;
4145
4146 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4147 return 0;
4148
4149 cpu_buffer = buffer->buffers[cpu];
4150 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
4151
4152 return ret;
4153 }
4154 EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
4155
4156
4157
4158
4159
4160
4161 unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu)
4162 {
4163 struct ring_buffer_per_cpu *cpu_buffer;
4164
4165 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4166 return 0;
4167
4168 cpu_buffer = buffer->buffers[cpu];
4169
4170 return rb_num_of_entries(cpu_buffer);
4171 }
4172 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
4173
4174
4175
4176
4177
4178
4179
4180 unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu)
4181 {
4182 struct ring_buffer_per_cpu *cpu_buffer;
4183 unsigned long ret;
4184
4185 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4186 return 0;
4187
4188 cpu_buffer = buffer->buffers[cpu];
4189 ret = local_read(&cpu_buffer->overrun);
4190
4191 return ret;
4192 }
4193 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
4194
4195
4196
4197
4198
4199
4200
4201
4202 unsigned long
4203 ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu)
4204 {
4205 struct ring_buffer_per_cpu *cpu_buffer;
4206 unsigned long ret;
4207
4208 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4209 return 0;
4210
4211 cpu_buffer = buffer->buffers[cpu];
4212 ret = local_read(&cpu_buffer->commit_overrun);
4213
4214 return ret;
4215 }
4216 EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
4217
4218
4219
4220
4221
4222
4223
4224 unsigned long
4225 ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu)
4226 {
4227 struct ring_buffer_per_cpu *cpu_buffer;
4228 unsigned long ret;
4229
4230 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4231 return 0;
4232
4233 cpu_buffer = buffer->buffers[cpu];
4234 ret = local_read(&cpu_buffer->dropped_events);
4235
4236 return ret;
4237 }
4238 EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
4239
4240
4241
4242
4243
4244
4245 unsigned long
4246 ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu)
4247 {
4248 struct ring_buffer_per_cpu *cpu_buffer;
4249
4250 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4251 return 0;
4252
4253 cpu_buffer = buffer->buffers[cpu];
4254 return cpu_buffer->read;
4255 }
4256 EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
4257
4258
4259
4260
4261
4262
4263
4264
4265 unsigned long ring_buffer_entries(struct trace_buffer *buffer)
4266 {
4267 struct ring_buffer_per_cpu *cpu_buffer;
4268 unsigned long entries = 0;
4269 int cpu;
4270
4271
4272 for_each_buffer_cpu(buffer, cpu) {
4273 cpu_buffer = buffer->buffers[cpu];
4274 entries += rb_num_of_entries(cpu_buffer);
4275 }
4276
4277 return entries;
4278 }
4279 EXPORT_SYMBOL_GPL(ring_buffer_entries);
4280
4281
4282
4283
4284
4285
4286
4287
4288 unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
4289 {
4290 struct ring_buffer_per_cpu *cpu_buffer;
4291 unsigned long overruns = 0;
4292 int cpu;
4293
4294
4295 for_each_buffer_cpu(buffer, cpu) {
4296 cpu_buffer = buffer->buffers[cpu];
4297 overruns += local_read(&cpu_buffer->overrun);
4298 }
4299
4300 return overruns;
4301 }
4302 EXPORT_SYMBOL_GPL(ring_buffer_overruns);
4303
4304 static void rb_iter_reset(struct ring_buffer_iter *iter)
4305 {
4306 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4307
4308
4309 iter->head_page = cpu_buffer->reader_page;
4310 iter->head = cpu_buffer->reader_page->read;
4311 iter->next_event = iter->head;
4312
4313 iter->cache_reader_page = iter->head_page;
4314 iter->cache_read = cpu_buffer->read;
4315
4316 if (iter->head) {
4317 iter->read_stamp = cpu_buffer->read_stamp;
4318 iter->page_stamp = cpu_buffer->reader_page->page->time_stamp;
4319 } else {
4320 iter->read_stamp = iter->head_page->page->time_stamp;
4321 iter->page_stamp = iter->read_stamp;
4322 }
4323 }
4324
4325
4326
4327
4328
4329
4330
4331
4332 void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
4333 {
4334 struct ring_buffer_per_cpu *cpu_buffer;
4335 unsigned long flags;
4336
4337 if (!iter)
4338 return;
4339
4340 cpu_buffer = iter->cpu_buffer;
4341
4342 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4343 rb_iter_reset(iter);
4344 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4345 }
4346 EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
4347
4348
4349
4350
4351
4352 int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
4353 {
4354 struct ring_buffer_per_cpu *cpu_buffer;
4355 struct buffer_page *reader;
4356 struct buffer_page *head_page;
4357 struct buffer_page *commit_page;
4358 struct buffer_page *curr_commit_page;
4359 unsigned commit;
4360 u64 curr_commit_ts;
4361 u64 commit_ts;
4362
4363 cpu_buffer = iter->cpu_buffer;
4364 reader = cpu_buffer->reader_page;
4365 head_page = cpu_buffer->head_page;
4366 commit_page = cpu_buffer->commit_page;
4367 commit_ts = commit_page->page->time_stamp;
4368
4369
4370
4371
4372
4373
4374 smp_rmb();
4375 commit = rb_page_commit(commit_page);
4376
4377 smp_rmb();
4378
4379
4380 curr_commit_page = READ_ONCE(cpu_buffer->commit_page);
4381 curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp);
4382
4383
4384 if (curr_commit_page != commit_page ||
4385 curr_commit_ts != commit_ts)
4386 return 0;
4387
4388
4389 return ((iter->head_page == commit_page && iter->head >= commit) ||
4390 (iter->head_page == reader && commit_page == head_page &&
4391 head_page->read == commit &&
4392 iter->head == rb_page_commit(cpu_buffer->reader_page)));
4393 }
4394 EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
4395
4396 static void
4397 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
4398 struct ring_buffer_event *event)
4399 {
4400 u64 delta;
4401
4402 switch (event->type_len) {
4403 case RINGBUF_TYPE_PADDING:
4404 return;
4405
4406 case RINGBUF_TYPE_TIME_EXTEND:
4407 delta = rb_event_time_stamp(event);
4408 cpu_buffer->read_stamp += delta;
4409 return;
4410
4411 case RINGBUF_TYPE_TIME_STAMP:
4412 delta = rb_event_time_stamp(event);
4413 delta = rb_fix_abs_ts(delta, cpu_buffer->read_stamp);
4414 cpu_buffer->read_stamp = delta;
4415 return;
4416
4417 case RINGBUF_TYPE_DATA:
4418 cpu_buffer->read_stamp += event->time_delta;
4419 return;
4420
4421 default:
4422 RB_WARN_ON(cpu_buffer, 1);
4423 }
4424 return;
4425 }
4426
4427 static void
4428 rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
4429 struct ring_buffer_event *event)
4430 {
4431 u64 delta;
4432
4433 switch (event->type_len) {
4434 case RINGBUF_TYPE_PADDING:
4435 return;
4436
4437 case RINGBUF_TYPE_TIME_EXTEND:
4438 delta = rb_event_time_stamp(event);
4439 iter->read_stamp += delta;
4440 return;
4441
4442 case RINGBUF_TYPE_TIME_STAMP:
4443 delta = rb_event_time_stamp(event);
4444 delta = rb_fix_abs_ts(delta, iter->read_stamp);
4445 iter->read_stamp = delta;
4446 return;
4447
4448 case RINGBUF_TYPE_DATA:
4449 iter->read_stamp += event->time_delta;
4450 return;
4451
4452 default:
4453 RB_WARN_ON(iter->cpu_buffer, 1);
4454 }
4455 return;
4456 }
4457
4458 static struct buffer_page *
4459 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
4460 {
4461 struct buffer_page *reader = NULL;
4462 unsigned long overwrite;
4463 unsigned long flags;
4464 int nr_loops = 0;
4465 int ret;
4466
4467 local_irq_save(flags);
4468 arch_spin_lock(&cpu_buffer->lock);
4469
4470 again:
4471
4472
4473
4474
4475
4476
4477 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
4478 reader = NULL;
4479 goto out;
4480 }
4481
4482 reader = cpu_buffer->reader_page;
4483
4484
4485 if (cpu_buffer->reader_page->read < rb_page_size(reader))
4486 goto out;
4487
4488
4489 if (RB_WARN_ON(cpu_buffer,
4490 cpu_buffer->reader_page->read > rb_page_size(reader)))
4491 goto out;
4492
4493
4494 reader = NULL;
4495 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
4496 goto out;
4497
4498
4499 if (rb_num_of_entries(cpu_buffer) == 0)
4500 goto out;
4501
4502
4503
4504
4505 local_set(&cpu_buffer->reader_page->write, 0);
4506 local_set(&cpu_buffer->reader_page->entries, 0);
4507 local_set(&cpu_buffer->reader_page->page->commit, 0);
4508 cpu_buffer->reader_page->real_end = 0;
4509
4510 spin:
4511
4512
4513
4514 reader = rb_set_head_page(cpu_buffer);
4515 if (!reader)
4516 goto out;
4517 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
4518 cpu_buffer->reader_page->list.prev = reader->list.prev;
4519
4520
4521
4522
4523
4524
4525 cpu_buffer->pages = reader->list.prev;
4526
4527
4528 rb_set_list_to_head(&cpu_buffer->reader_page->list);
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539 smp_mb();
4540 overwrite = local_read(&(cpu_buffer->overrun));
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
4554
4555
4556
4557
4558 if (!ret)
4559 goto spin;
4560
4561
4562
4563
4564
4565
4566 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
4567 rb_inc_page(&cpu_buffer->head_page);
4568
4569 local_inc(&cpu_buffer->pages_read);
4570
4571
4572 cpu_buffer->reader_page = reader;
4573 cpu_buffer->reader_page->read = 0;
4574
4575 if (overwrite != cpu_buffer->last_overrun) {
4576 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
4577 cpu_buffer->last_overrun = overwrite;
4578 }
4579
4580 goto again;
4581
4582 out:
4583
4584 if (reader && reader->read == 0)
4585 cpu_buffer->read_stamp = reader->page->time_stamp;
4586
4587 arch_spin_unlock(&cpu_buffer->lock);
4588 local_irq_restore(flags);
4589
4590 return reader;
4591 }
4592
4593 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
4594 {
4595 struct ring_buffer_event *event;
4596 struct buffer_page *reader;
4597 unsigned length;
4598
4599 reader = rb_get_reader_page(cpu_buffer);
4600
4601
4602 if (RB_WARN_ON(cpu_buffer, !reader))
4603 return;
4604
4605 event = rb_reader_event(cpu_buffer);
4606
4607 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
4608 cpu_buffer->read++;
4609
4610 rb_update_read_stamp(cpu_buffer, event);
4611
4612 length = rb_event_length(event);
4613 cpu_buffer->reader_page->read += length;
4614 }
4615
4616 static void rb_advance_iter(struct ring_buffer_iter *iter)
4617 {
4618 struct ring_buffer_per_cpu *cpu_buffer;
4619
4620 cpu_buffer = iter->cpu_buffer;
4621
4622
4623 if (iter->head == iter->next_event) {
4624
4625 if (rb_iter_head_event(iter) == NULL)
4626 return;
4627 }
4628
4629 iter->head = iter->next_event;
4630
4631
4632
4633
4634 if (iter->next_event >= rb_page_size(iter->head_page)) {
4635
4636 if (iter->head_page == cpu_buffer->commit_page)
4637 return;
4638 rb_inc_iter(iter);
4639 return;
4640 }
4641
4642 rb_update_iter_read_stamp(iter, iter->event);
4643 }
4644
4645 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
4646 {
4647 return cpu_buffer->lost_events;
4648 }
4649
4650 static struct ring_buffer_event *
4651 rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
4652 unsigned long *lost_events)
4653 {
4654 struct ring_buffer_event *event;
4655 struct buffer_page *reader;
4656 int nr_loops = 0;
4657
4658 if (ts)
4659 *ts = 0;
4660 again:
4661
4662
4663
4664
4665
4666
4667 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
4668 return NULL;
4669
4670 reader = rb_get_reader_page(cpu_buffer);
4671 if (!reader)
4672 return NULL;
4673
4674 event = rb_reader_event(cpu_buffer);
4675
4676 switch (event->type_len) {
4677 case RINGBUF_TYPE_PADDING:
4678 if (rb_null_event(event))
4679 RB_WARN_ON(cpu_buffer, 1);
4680
4681
4682
4683
4684
4685
4686
4687
4688 return event;
4689
4690 case RINGBUF_TYPE_TIME_EXTEND:
4691
4692 rb_advance_reader(cpu_buffer);
4693 goto again;
4694
4695 case RINGBUF_TYPE_TIME_STAMP:
4696 if (ts) {
4697 *ts = rb_event_time_stamp(event);
4698 *ts = rb_fix_abs_ts(*ts, reader->page->time_stamp);
4699 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4700 cpu_buffer->cpu, ts);
4701 }
4702
4703 rb_advance_reader(cpu_buffer);
4704 goto again;
4705
4706 case RINGBUF_TYPE_DATA:
4707 if (ts && !(*ts)) {
4708 *ts = cpu_buffer->read_stamp + event->time_delta;
4709 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4710 cpu_buffer->cpu, ts);
4711 }
4712 if (lost_events)
4713 *lost_events = rb_lost_events(cpu_buffer);
4714 return event;
4715
4716 default:
4717 RB_WARN_ON(cpu_buffer, 1);
4718 }
4719
4720 return NULL;
4721 }
4722 EXPORT_SYMBOL_GPL(ring_buffer_peek);
4723
4724 static struct ring_buffer_event *
4725 rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4726 {
4727 struct trace_buffer *buffer;
4728 struct ring_buffer_per_cpu *cpu_buffer;
4729 struct ring_buffer_event *event;
4730 int nr_loops = 0;
4731
4732 if (ts)
4733 *ts = 0;
4734
4735 cpu_buffer = iter->cpu_buffer;
4736 buffer = cpu_buffer->buffer;
4737
4738
4739
4740
4741
4742
4743 if (unlikely(iter->cache_read != cpu_buffer->read ||
4744 iter->cache_reader_page != cpu_buffer->reader_page))
4745 rb_iter_reset(iter);
4746
4747 again:
4748 if (ring_buffer_iter_empty(iter))
4749 return NULL;
4750
4751
4752
4753
4754
4755
4756
4757
4758 if (++nr_loops > 3)
4759 return NULL;
4760
4761 if (rb_per_cpu_empty(cpu_buffer))
4762 return NULL;
4763
4764 if (iter->head >= rb_page_size(iter->head_page)) {
4765 rb_inc_iter(iter);
4766 goto again;
4767 }
4768
4769 event = rb_iter_head_event(iter);
4770 if (!event)
4771 goto again;
4772
4773 switch (event->type_len) {
4774 case RINGBUF_TYPE_PADDING:
4775 if (rb_null_event(event)) {
4776 rb_inc_iter(iter);
4777 goto again;
4778 }
4779 rb_advance_iter(iter);
4780 return event;
4781
4782 case RINGBUF_TYPE_TIME_EXTEND:
4783
4784 rb_advance_iter(iter);
4785 goto again;
4786
4787 case RINGBUF_TYPE_TIME_STAMP:
4788 if (ts) {
4789 *ts = rb_event_time_stamp(event);
4790 *ts = rb_fix_abs_ts(*ts, iter->head_page->page->time_stamp);
4791 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4792 cpu_buffer->cpu, ts);
4793 }
4794
4795 rb_advance_iter(iter);
4796 goto again;
4797
4798 case RINGBUF_TYPE_DATA:
4799 if (ts && !(*ts)) {
4800 *ts = iter->read_stamp + event->time_delta;
4801 ring_buffer_normalize_time_stamp(buffer,
4802 cpu_buffer->cpu, ts);
4803 }
4804 return event;
4805
4806 default:
4807 RB_WARN_ON(cpu_buffer, 1);
4808 }
4809
4810 return NULL;
4811 }
4812 EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
4813
4814 static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
4815 {
4816 if (likely(!in_nmi())) {
4817 raw_spin_lock(&cpu_buffer->reader_lock);
4818 return true;
4819 }
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830 if (raw_spin_trylock(&cpu_buffer->reader_lock))
4831 return true;
4832
4833
4834 atomic_inc(&cpu_buffer->record_disabled);
4835 return false;
4836 }
4837
4838 static inline void
4839 rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
4840 {
4841 if (likely(locked))
4842 raw_spin_unlock(&cpu_buffer->reader_lock);
4843 return;
4844 }
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856 struct ring_buffer_event *
4857 ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
4858 unsigned long *lost_events)
4859 {
4860 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4861 struct ring_buffer_event *event;
4862 unsigned long flags;
4863 bool dolock;
4864
4865 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4866 return NULL;
4867
4868 again:
4869 local_irq_save(flags);
4870 dolock = rb_reader_lock(cpu_buffer);
4871 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4872 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4873 rb_advance_reader(cpu_buffer);
4874 rb_reader_unlock(cpu_buffer, dolock);
4875 local_irq_restore(flags);
4876
4877 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4878 goto again;
4879
4880 return event;
4881 }
4882
4883
4884
4885
4886
4887
4888 bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
4889 {
4890 bool ret = iter->missed_events != 0;
4891
4892 iter->missed_events = 0;
4893 return ret;
4894 }
4895 EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped);
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905 struct ring_buffer_event *
4906 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4907 {
4908 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4909 struct ring_buffer_event *event;
4910 unsigned long flags;
4911
4912 again:
4913 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4914 event = rb_iter_peek(iter, ts);
4915 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4916
4917 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4918 goto again;
4919
4920 return event;
4921 }
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934 struct ring_buffer_event *
4935 ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
4936 unsigned long *lost_events)
4937 {
4938 struct ring_buffer_per_cpu *cpu_buffer;
4939 struct ring_buffer_event *event = NULL;
4940 unsigned long flags;
4941 bool dolock;
4942
4943 again:
4944
4945 preempt_disable();
4946
4947 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4948 goto out;
4949
4950 cpu_buffer = buffer->buffers[cpu];
4951 local_irq_save(flags);
4952 dolock = rb_reader_lock(cpu_buffer);
4953
4954 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4955 if (event) {
4956 cpu_buffer->lost_events = 0;
4957 rb_advance_reader(cpu_buffer);
4958 }
4959
4960 rb_reader_unlock(cpu_buffer, dolock);
4961 local_irq_restore(flags);
4962
4963 out:
4964 preempt_enable();
4965
4966 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4967 goto again;
4968
4969 return event;
4970 }
4971 EXPORT_SYMBOL_GPL(ring_buffer_consume);
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994 struct ring_buffer_iter *
4995 ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
4996 {
4997 struct ring_buffer_per_cpu *cpu_buffer;
4998 struct ring_buffer_iter *iter;
4999
5000 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5001 return NULL;
5002
5003 iter = kzalloc(sizeof(*iter), flags);
5004 if (!iter)
5005 return NULL;
5006
5007 iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
5008 if (!iter->event) {
5009 kfree(iter);
5010 return NULL;
5011 }
5012
5013 cpu_buffer = buffer->buffers[cpu];
5014
5015 iter->cpu_buffer = cpu_buffer;
5016
5017 atomic_inc(&cpu_buffer->resize_disabled);
5018
5019 return iter;
5020 }
5021 EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
5022
5023
5024
5025
5026
5027
5028
5029
5030 void
5031 ring_buffer_read_prepare_sync(void)
5032 {
5033 synchronize_rcu();
5034 }
5035 EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048 void
5049 ring_buffer_read_start(struct ring_buffer_iter *iter)
5050 {
5051 struct ring_buffer_per_cpu *cpu_buffer;
5052 unsigned long flags;
5053
5054 if (!iter)
5055 return;
5056
5057 cpu_buffer = iter->cpu_buffer;
5058
5059 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5060 arch_spin_lock(&cpu_buffer->lock);
5061 rb_iter_reset(iter);
5062 arch_spin_unlock(&cpu_buffer->lock);
5063 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5064 }
5065 EXPORT_SYMBOL_GPL(ring_buffer_read_start);
5066
5067
5068
5069
5070
5071
5072
5073
5074 void
5075 ring_buffer_read_finish(struct ring_buffer_iter *iter)
5076 {
5077 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
5078 unsigned long flags;
5079
5080
5081
5082
5083
5084
5085
5086 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5087 rb_check_pages(cpu_buffer);
5088 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5089
5090 atomic_dec(&cpu_buffer->resize_disabled);
5091 kfree(iter->event);
5092 kfree(iter);
5093 }
5094 EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
5095
5096
5097
5098
5099
5100
5101
5102
5103 void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
5104 {
5105 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
5106 unsigned long flags;
5107
5108 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5109
5110 rb_advance_iter(iter);
5111
5112 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5113 }
5114 EXPORT_SYMBOL_GPL(ring_buffer_iter_advance);
5115
5116
5117
5118
5119
5120
5121 unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
5122 {
5123
5124
5125
5126
5127
5128
5129 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5130 return 0;
5131
5132 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
5133 }
5134 EXPORT_SYMBOL_GPL(ring_buffer_size);
5135
5136 static void
5137 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
5138 {
5139 rb_head_page_deactivate(cpu_buffer);
5140
5141 cpu_buffer->head_page
5142 = list_entry(cpu_buffer->pages, struct buffer_page, list);
5143 local_set(&cpu_buffer->head_page->write, 0);
5144 local_set(&cpu_buffer->head_page->entries, 0);
5145 local_set(&cpu_buffer->head_page->page->commit, 0);
5146
5147 cpu_buffer->head_page->read = 0;
5148
5149 cpu_buffer->tail_page = cpu_buffer->head_page;
5150 cpu_buffer->commit_page = cpu_buffer->head_page;
5151
5152 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
5153 INIT_LIST_HEAD(&cpu_buffer->new_pages);
5154 local_set(&cpu_buffer->reader_page->write, 0);
5155 local_set(&cpu_buffer->reader_page->entries, 0);
5156 local_set(&cpu_buffer->reader_page->page->commit, 0);
5157 cpu_buffer->reader_page->read = 0;
5158
5159 local_set(&cpu_buffer->entries_bytes, 0);
5160 local_set(&cpu_buffer->overrun, 0);
5161 local_set(&cpu_buffer->commit_overrun, 0);
5162 local_set(&cpu_buffer->dropped_events, 0);
5163 local_set(&cpu_buffer->entries, 0);
5164 local_set(&cpu_buffer->committing, 0);
5165 local_set(&cpu_buffer->commits, 0);
5166 local_set(&cpu_buffer->pages_touched, 0);
5167 local_set(&cpu_buffer->pages_read, 0);
5168 cpu_buffer->last_pages_touch = 0;
5169 cpu_buffer->shortest_full = 0;
5170 cpu_buffer->read = 0;
5171 cpu_buffer->read_bytes = 0;
5172
5173 rb_time_set(&cpu_buffer->write_stamp, 0);
5174 rb_time_set(&cpu_buffer->before_stamp, 0);
5175
5176 memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp));
5177
5178 cpu_buffer->lost_events = 0;
5179 cpu_buffer->last_overrun = 0;
5180
5181 rb_head_page_activate(cpu_buffer);
5182 }
5183
5184
5185 static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
5186 {
5187 unsigned long flags;
5188
5189 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5190
5191 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
5192 goto out;
5193
5194 arch_spin_lock(&cpu_buffer->lock);
5195
5196 rb_reset_cpu(cpu_buffer);
5197
5198 arch_spin_unlock(&cpu_buffer->lock);
5199
5200 out:
5201 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5202 }
5203
5204
5205
5206
5207
5208
5209 void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
5210 {
5211 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5212
5213 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5214 return;
5215
5216
5217 mutex_lock(&buffer->mutex);
5218
5219 atomic_inc(&cpu_buffer->resize_disabled);
5220 atomic_inc(&cpu_buffer->record_disabled);
5221
5222
5223 synchronize_rcu();
5224
5225 reset_disabled_cpu_buffer(cpu_buffer);
5226
5227 atomic_dec(&cpu_buffer->record_disabled);
5228 atomic_dec(&cpu_buffer->resize_disabled);
5229
5230 mutex_unlock(&buffer->mutex);
5231 }
5232 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
5233
5234
5235
5236
5237
5238
5239 void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
5240 {
5241 struct ring_buffer_per_cpu *cpu_buffer;
5242 int cpu;
5243
5244
5245 mutex_lock(&buffer->mutex);
5246
5247 for_each_online_buffer_cpu(buffer, cpu) {
5248 cpu_buffer = buffer->buffers[cpu];
5249
5250 atomic_inc(&cpu_buffer->resize_disabled);
5251 atomic_inc(&cpu_buffer->record_disabled);
5252 }
5253
5254
5255 synchronize_rcu();
5256
5257 for_each_online_buffer_cpu(buffer, cpu) {
5258 cpu_buffer = buffer->buffers[cpu];
5259
5260 reset_disabled_cpu_buffer(cpu_buffer);
5261
5262 atomic_dec(&cpu_buffer->record_disabled);
5263 atomic_dec(&cpu_buffer->resize_disabled);
5264 }
5265
5266 mutex_unlock(&buffer->mutex);
5267 }
5268
5269
5270
5271
5272
5273 void ring_buffer_reset(struct trace_buffer *buffer)
5274 {
5275 struct ring_buffer_per_cpu *cpu_buffer;
5276 int cpu;
5277
5278
5279 mutex_lock(&buffer->mutex);
5280
5281 for_each_buffer_cpu(buffer, cpu) {
5282 cpu_buffer = buffer->buffers[cpu];
5283
5284 atomic_inc(&cpu_buffer->resize_disabled);
5285 atomic_inc(&cpu_buffer->record_disabled);
5286 }
5287
5288
5289 synchronize_rcu();
5290
5291 for_each_buffer_cpu(buffer, cpu) {
5292 cpu_buffer = buffer->buffers[cpu];
5293
5294 reset_disabled_cpu_buffer(cpu_buffer);
5295
5296 atomic_dec(&cpu_buffer->record_disabled);
5297 atomic_dec(&cpu_buffer->resize_disabled);
5298 }
5299
5300 mutex_unlock(&buffer->mutex);
5301 }
5302 EXPORT_SYMBOL_GPL(ring_buffer_reset);
5303
5304
5305
5306
5307
5308 bool ring_buffer_empty(struct trace_buffer *buffer)
5309 {
5310 struct ring_buffer_per_cpu *cpu_buffer;
5311 unsigned long flags;
5312 bool dolock;
5313 int cpu;
5314 int ret;
5315
5316
5317 for_each_buffer_cpu(buffer, cpu) {
5318 cpu_buffer = buffer->buffers[cpu];
5319 local_irq_save(flags);
5320 dolock = rb_reader_lock(cpu_buffer);
5321 ret = rb_per_cpu_empty(cpu_buffer);
5322 rb_reader_unlock(cpu_buffer, dolock);
5323 local_irq_restore(flags);
5324
5325 if (!ret)
5326 return false;
5327 }
5328
5329 return true;
5330 }
5331 EXPORT_SYMBOL_GPL(ring_buffer_empty);
5332
5333
5334
5335
5336
5337
5338 bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
5339 {
5340 struct ring_buffer_per_cpu *cpu_buffer;
5341 unsigned long flags;
5342 bool dolock;
5343 int ret;
5344
5345 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5346 return true;
5347
5348 cpu_buffer = buffer->buffers[cpu];
5349 local_irq_save(flags);
5350 dolock = rb_reader_lock(cpu_buffer);
5351 ret = rb_per_cpu_empty(cpu_buffer);
5352 rb_reader_unlock(cpu_buffer, dolock);
5353 local_irq_restore(flags);
5354
5355 return ret;
5356 }
5357 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
5358
5359 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371 int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
5372 struct trace_buffer *buffer_b, int cpu)
5373 {
5374 struct ring_buffer_per_cpu *cpu_buffer_a;
5375 struct ring_buffer_per_cpu *cpu_buffer_b;
5376 int ret = -EINVAL;
5377
5378 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
5379 !cpumask_test_cpu(cpu, buffer_b->cpumask))
5380 goto out;
5381
5382 cpu_buffer_a = buffer_a->buffers[cpu];
5383 cpu_buffer_b = buffer_b->buffers[cpu];
5384
5385
5386 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
5387 goto out;
5388
5389 ret = -EAGAIN;
5390
5391 if (atomic_read(&buffer_a->record_disabled))
5392 goto out;
5393
5394 if (atomic_read(&buffer_b->record_disabled))
5395 goto out;
5396
5397 if (atomic_read(&cpu_buffer_a->record_disabled))
5398 goto out;
5399
5400 if (atomic_read(&cpu_buffer_b->record_disabled))
5401 goto out;
5402
5403
5404
5405
5406
5407
5408
5409 atomic_inc(&cpu_buffer_a->record_disabled);
5410 atomic_inc(&cpu_buffer_b->record_disabled);
5411
5412 ret = -EBUSY;
5413 if (local_read(&cpu_buffer_a->committing))
5414 goto out_dec;
5415 if (local_read(&cpu_buffer_b->committing))
5416 goto out_dec;
5417
5418 buffer_a->buffers[cpu] = cpu_buffer_b;
5419 buffer_b->buffers[cpu] = cpu_buffer_a;
5420
5421 cpu_buffer_b->buffer = buffer_a;
5422 cpu_buffer_a->buffer = buffer_b;
5423
5424 ret = 0;
5425
5426 out_dec:
5427 atomic_dec(&cpu_buffer_a->record_disabled);
5428 atomic_dec(&cpu_buffer_b->record_disabled);
5429 out:
5430 return ret;
5431 }
5432 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
5433 #endif
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451 void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
5452 {
5453 struct ring_buffer_per_cpu *cpu_buffer;
5454 struct buffer_data_page *bpage = NULL;
5455 unsigned long flags;
5456 struct page *page;
5457
5458 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5459 return ERR_PTR(-ENODEV);
5460
5461 cpu_buffer = buffer->buffers[cpu];
5462 local_irq_save(flags);
5463 arch_spin_lock(&cpu_buffer->lock);
5464
5465 if (cpu_buffer->free_page) {
5466 bpage = cpu_buffer->free_page;
5467 cpu_buffer->free_page = NULL;
5468 }
5469
5470 arch_spin_unlock(&cpu_buffer->lock);
5471 local_irq_restore(flags);
5472
5473 if (bpage)
5474 goto out;
5475
5476 page = alloc_pages_node(cpu_to_node(cpu),
5477 GFP_KERNEL | __GFP_NORETRY, 0);
5478 if (!page)
5479 return ERR_PTR(-ENOMEM);
5480
5481 bpage = page_address(page);
5482
5483 out:
5484 rb_init_page(bpage);
5485
5486 return bpage;
5487 }
5488 EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498 void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
5499 {
5500 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5501 struct buffer_data_page *bpage = data;
5502 struct page *page = virt_to_page(bpage);
5503 unsigned long flags;
5504
5505
5506 if (page_ref_count(page) > 1)
5507 goto out;
5508
5509 local_irq_save(flags);
5510 arch_spin_lock(&cpu_buffer->lock);
5511
5512 if (!cpu_buffer->free_page) {
5513 cpu_buffer->free_page = bpage;
5514 bpage = NULL;
5515 }
5516
5517 arch_spin_unlock(&cpu_buffer->lock);
5518 local_irq_restore(flags);
5519
5520 out:
5521 free_page((unsigned long)bpage);
5522 }
5523 EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558 int ring_buffer_read_page(struct trace_buffer *buffer,
5559 void **data_page, size_t len, int cpu, int full)
5560 {
5561 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5562 struct ring_buffer_event *event;
5563 struct buffer_data_page *bpage;
5564 struct buffer_page *reader;
5565 unsigned long missed_events;
5566 unsigned long flags;
5567 unsigned int commit;
5568 unsigned int read;
5569 u64 save_timestamp;
5570 int ret = -1;
5571
5572 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5573 goto out;
5574
5575
5576
5577
5578
5579 if (len <= BUF_PAGE_HDR_SIZE)
5580 goto out;
5581
5582 len -= BUF_PAGE_HDR_SIZE;
5583
5584 if (!data_page)
5585 goto out;
5586
5587 bpage = *data_page;
5588 if (!bpage)
5589 goto out;
5590
5591 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5592
5593 reader = rb_get_reader_page(cpu_buffer);
5594 if (!reader)
5595 goto out_unlock;
5596
5597 event = rb_reader_event(cpu_buffer);
5598
5599 read = reader->read;
5600 commit = rb_page_commit(reader);
5601
5602
5603 missed_events = cpu_buffer->lost_events;
5604
5605
5606
5607
5608
5609
5610
5611
5612 if (read || (len < (commit - read)) ||
5613 cpu_buffer->reader_page == cpu_buffer->commit_page) {
5614 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
5615 unsigned int rpos = read;
5616 unsigned int pos = 0;
5617 unsigned int size;
5618
5619 if (full)
5620 goto out_unlock;
5621
5622 if (len > (commit - read))
5623 len = (commit - read);
5624
5625
5626 size = rb_event_ts_length(event);
5627
5628 if (len < size)
5629 goto out_unlock;
5630
5631
5632 save_timestamp = cpu_buffer->read_stamp;
5633
5634
5635 do {
5636
5637
5638
5639
5640
5641
5642 size = rb_event_length(event);
5643 memcpy(bpage->data + pos, rpage->data + rpos, size);
5644
5645 len -= size;
5646
5647 rb_advance_reader(cpu_buffer);
5648 rpos = reader->read;
5649 pos += size;
5650
5651 if (rpos >= commit)
5652 break;
5653
5654 event = rb_reader_event(cpu_buffer);
5655
5656 size = rb_event_ts_length(event);
5657 } while (len >= size);
5658
5659
5660 local_set(&bpage->commit, pos);
5661 bpage->time_stamp = save_timestamp;
5662
5663
5664 read = 0;
5665 } else {
5666
5667 cpu_buffer->read += rb_page_entries(reader);
5668 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
5669
5670
5671 rb_init_page(bpage);
5672 bpage = reader->page;
5673 reader->page = *data_page;
5674 local_set(&reader->write, 0);
5675 local_set(&reader->entries, 0);
5676 reader->read = 0;
5677 *data_page = bpage;
5678
5679
5680
5681
5682
5683
5684 if (reader->real_end)
5685 local_set(&bpage->commit, reader->real_end);
5686 }
5687 ret = read;
5688
5689 cpu_buffer->lost_events = 0;
5690
5691 commit = local_read(&bpage->commit);
5692
5693
5694
5695 if (missed_events) {
5696
5697
5698
5699 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
5700 memcpy(&bpage->data[commit], &missed_events,
5701 sizeof(missed_events));
5702 local_add(RB_MISSED_STORED, &bpage->commit);
5703 commit += sizeof(missed_events);
5704 }
5705 local_add(RB_MISSED_EVENTS, &bpage->commit);
5706 }
5707
5708
5709
5710
5711 if (commit < BUF_PAGE_SIZE)
5712 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
5713
5714 out_unlock:
5715 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5716
5717 out:
5718 return ret;
5719 }
5720 EXPORT_SYMBOL_GPL(ring_buffer_read_page);
5721
5722
5723
5724
5725
5726
5727 int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
5728 {
5729 struct trace_buffer *buffer;
5730 long nr_pages_same;
5731 int cpu_i;
5732 unsigned long nr_pages;
5733
5734 buffer = container_of(node, struct trace_buffer, node);
5735 if (cpumask_test_cpu(cpu, buffer->cpumask))
5736 return 0;
5737
5738 nr_pages = 0;
5739 nr_pages_same = 1;
5740
5741 for_each_buffer_cpu(buffer, cpu_i) {
5742
5743 if (nr_pages == 0)
5744 nr_pages = buffer->buffers[cpu_i]->nr_pages;
5745 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
5746 nr_pages_same = 0;
5747 break;
5748 }
5749 }
5750
5751 if (!nr_pages_same)
5752 nr_pages = 2;
5753 buffer->buffers[cpu] =
5754 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
5755 if (!buffer->buffers[cpu]) {
5756 WARN(1, "failed to allocate ring buffer on CPU %u\n",
5757 cpu);
5758 return -ENOMEM;
5759 }
5760 smp_wmb();
5761 cpumask_set_cpu(cpu, buffer->cpumask);
5762 return 0;
5763 }
5764
5765 #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781 static struct task_struct *rb_threads[NR_CPUS] __initdata;
5782
5783 struct rb_test_data {
5784 struct trace_buffer *buffer;
5785 unsigned long events;
5786 unsigned long bytes_written;
5787 unsigned long bytes_alloc;
5788 unsigned long bytes_dropped;
5789 unsigned long events_nested;
5790 unsigned long bytes_written_nested;
5791 unsigned long bytes_alloc_nested;
5792 unsigned long bytes_dropped_nested;
5793 int min_size_nested;
5794 int max_size_nested;
5795 int max_size;
5796 int min_size;
5797 int cpu;
5798 int cnt;
5799 };
5800
5801 static struct rb_test_data rb_data[NR_CPUS] __initdata;
5802
5803
5804 #define RB_TEST_BUFFER_SIZE 1048576
5805
5806 static char rb_string[] __initdata =
5807 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
5808 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
5809 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
5810
5811 static bool rb_test_started __initdata;
5812
5813 struct rb_item {
5814 int size;
5815 char str[];
5816 };
5817
5818 static __init int rb_write_something(struct rb_test_data *data, bool nested)
5819 {
5820 struct ring_buffer_event *event;
5821 struct rb_item *item;
5822 bool started;
5823 int event_len;
5824 int size;
5825 int len;
5826 int cnt;
5827
5828
5829 cnt = data->cnt + (nested ? 27 : 0);
5830
5831
5832 size = (cnt * 68 / 25) % (sizeof(rb_string) - 1);
5833
5834 len = size + sizeof(struct rb_item);
5835
5836 started = rb_test_started;
5837
5838 smp_rmb();
5839
5840 event = ring_buffer_lock_reserve(data->buffer, len);
5841 if (!event) {
5842
5843 if (started) {
5844 if (nested)
5845 data->bytes_dropped += len;
5846 else
5847 data->bytes_dropped_nested += len;
5848 }
5849 return len;
5850 }
5851
5852 event_len = ring_buffer_event_length(event);
5853
5854 if (RB_WARN_ON(data->buffer, event_len < len))
5855 goto out;
5856
5857 item = ring_buffer_event_data(event);
5858 item->size = size;
5859 memcpy(item->str, rb_string, size);
5860
5861 if (nested) {
5862 data->bytes_alloc_nested += event_len;
5863 data->bytes_written_nested += len;
5864 data->events_nested++;
5865 if (!data->min_size_nested || len < data->min_size_nested)
5866 data->min_size_nested = len;
5867 if (len > data->max_size_nested)
5868 data->max_size_nested = len;
5869 } else {
5870 data->bytes_alloc += event_len;
5871 data->bytes_written += len;
5872 data->events++;
5873 if (!data->min_size || len < data->min_size)
5874 data->max_size = len;
5875 if (len > data->max_size)
5876 data->max_size = len;
5877 }
5878
5879 out:
5880 ring_buffer_unlock_commit(data->buffer, event);
5881
5882 return 0;
5883 }
5884
5885 static __init int rb_test(void *arg)
5886 {
5887 struct rb_test_data *data = arg;
5888
5889 while (!kthread_should_stop()) {
5890 rb_write_something(data, false);
5891 data->cnt++;
5892
5893 set_current_state(TASK_INTERRUPTIBLE);
5894
5895 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
5896 }
5897
5898 return 0;
5899 }
5900
5901 static __init void rb_ipi(void *ignore)
5902 {
5903 struct rb_test_data *data;
5904 int cpu = smp_processor_id();
5905
5906 data = &rb_data[cpu];
5907 rb_write_something(data, true);
5908 }
5909
5910 static __init int rb_hammer_test(void *arg)
5911 {
5912 while (!kthread_should_stop()) {
5913
5914
5915 smp_call_function(rb_ipi, NULL, 1);
5916
5917 schedule();
5918 }
5919
5920 return 0;
5921 }
5922
5923 static __init int test_ringbuffer(void)
5924 {
5925 struct task_struct *rb_hammer;
5926 struct trace_buffer *buffer;
5927 int cpu;
5928 int ret = 0;
5929
5930 if (security_locked_down(LOCKDOWN_TRACEFS)) {
5931 pr_warn("Lockdown is enabled, skipping ring buffer tests\n");
5932 return 0;
5933 }
5934
5935 pr_info("Running ring buffer tests...\n");
5936
5937 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
5938 if (WARN_ON(!buffer))
5939 return 0;
5940
5941
5942 ring_buffer_record_off(buffer);
5943
5944 for_each_online_cpu(cpu) {
5945 rb_data[cpu].buffer = buffer;
5946 rb_data[cpu].cpu = cpu;
5947 rb_data[cpu].cnt = cpu;
5948 rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu],
5949 cpu, "rbtester/%u");
5950 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
5951 pr_cont("FAILED\n");
5952 ret = PTR_ERR(rb_threads[cpu]);
5953 goto out_free;
5954 }
5955 }
5956
5957
5958 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
5959 if (WARN_ON(IS_ERR(rb_hammer))) {
5960 pr_cont("FAILED\n");
5961 ret = PTR_ERR(rb_hammer);
5962 goto out_free;
5963 }
5964
5965 ring_buffer_record_on(buffer);
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975 smp_wmb();
5976 rb_test_started = true;
5977
5978 set_current_state(TASK_INTERRUPTIBLE);
5979 ;
5980 schedule_timeout(10 * HZ);
5981
5982 kthread_stop(rb_hammer);
5983
5984 out_free:
5985 for_each_online_cpu(cpu) {
5986 if (!rb_threads[cpu])
5987 break;
5988 kthread_stop(rb_threads[cpu]);
5989 }
5990 if (ret) {
5991 ring_buffer_free(buffer);
5992 return ret;
5993 }
5994
5995
5996 pr_info("finished\n");
5997 for_each_online_cpu(cpu) {
5998 struct ring_buffer_event *event;
5999 struct rb_test_data *data = &rb_data[cpu];
6000 struct rb_item *item;
6001 unsigned long total_events;
6002 unsigned long total_dropped;
6003 unsigned long total_written;
6004 unsigned long total_alloc;
6005 unsigned long total_read = 0;
6006 unsigned long total_size = 0;
6007 unsigned long total_len = 0;
6008 unsigned long total_lost = 0;
6009 unsigned long lost;
6010 int big_event_size;
6011 int small_event_size;
6012
6013 ret = -1;
6014
6015 total_events = data->events + data->events_nested;
6016 total_written = data->bytes_written + data->bytes_written_nested;
6017 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
6018 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
6019
6020 big_event_size = data->max_size + data->max_size_nested;
6021 small_event_size = data->min_size + data->min_size_nested;
6022
6023 pr_info("CPU %d:\n", cpu);
6024 pr_info(" events: %ld\n", total_events);
6025 pr_info(" dropped bytes: %ld\n", total_dropped);
6026 pr_info(" alloced bytes: %ld\n", total_alloc);
6027 pr_info(" written bytes: %ld\n", total_written);
6028 pr_info(" biggest event: %d\n", big_event_size);
6029 pr_info(" smallest event: %d\n", small_event_size);
6030
6031 if (RB_WARN_ON(buffer, total_dropped))
6032 break;
6033
6034 ret = 0;
6035
6036 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
6037 total_lost += lost;
6038 item = ring_buffer_event_data(event);
6039 total_len += ring_buffer_event_length(event);
6040 total_size += item->size + sizeof(struct rb_item);
6041 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
6042 pr_info("FAILED!\n");
6043 pr_info("buffer had: %.*s\n", item->size, item->str);
6044 pr_info("expected: %.*s\n", item->size, rb_string);
6045 RB_WARN_ON(buffer, 1);
6046 ret = -1;
6047 break;
6048 }
6049 total_read++;
6050 }
6051 if (ret)
6052 break;
6053
6054 ret = -1;
6055
6056 pr_info(" read events: %ld\n", total_read);
6057 pr_info(" lost events: %ld\n", total_lost);
6058 pr_info(" total events: %ld\n", total_lost + total_read);
6059 pr_info(" recorded len bytes: %ld\n", total_len);
6060 pr_info(" recorded size bytes: %ld\n", total_size);
6061 if (total_lost) {
6062 pr_info(" With dropped events, record len and size may not match\n"
6063 " alloced and written from above\n");
6064 } else {
6065 if (RB_WARN_ON(buffer, total_len != total_alloc ||
6066 total_size != total_written))
6067 break;
6068 }
6069 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
6070 break;
6071
6072 ret = 0;
6073 }
6074 if (!ret)
6075 pr_info("Ring buffer PASSED!\n");
6076
6077 ring_buffer_free(buffer);
6078 return 0;
6079 }
6080
6081 late_initcall(test_ringbuffer);
6082 #endif