Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org>
0004  */
0005 #include <linux/spinlock.h>
0006 #include <linux/irq_work.h>
0007 #include <linux/slab.h>
0008 #include "trace.h"
0009 
0010 /* See pid_list.h for details */
0011 
0012 static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list)
0013 {
0014     union lower_chunk *chunk;
0015 
0016     lockdep_assert_held(&pid_list->lock);
0017 
0018     if (!pid_list->lower_list)
0019         return NULL;
0020 
0021     chunk = pid_list->lower_list;
0022     pid_list->lower_list = chunk->next;
0023     pid_list->free_lower_chunks--;
0024     WARN_ON_ONCE(pid_list->free_lower_chunks < 0);
0025     chunk->next = NULL;
0026     /*
0027      * If a refill needs to happen, it can not happen here
0028      * as the scheduler run queue locks are held.
0029      */
0030     if (pid_list->free_lower_chunks <= CHUNK_REALLOC)
0031         irq_work_queue(&pid_list->refill_irqwork);
0032 
0033     return chunk;
0034 }
0035 
0036 static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list)
0037 {
0038     union upper_chunk *chunk;
0039 
0040     lockdep_assert_held(&pid_list->lock);
0041 
0042     if (!pid_list->upper_list)
0043         return NULL;
0044 
0045     chunk = pid_list->upper_list;
0046     pid_list->upper_list = chunk->next;
0047     pid_list->free_upper_chunks--;
0048     WARN_ON_ONCE(pid_list->free_upper_chunks < 0);
0049     chunk->next = NULL;
0050     /*
0051      * If a refill needs to happen, it can not happen here
0052      * as the scheduler run queue locks are held.
0053      */
0054     if (pid_list->free_upper_chunks <= CHUNK_REALLOC)
0055         irq_work_queue(&pid_list->refill_irqwork);
0056 
0057     return chunk;
0058 }
0059 
0060 static inline void put_lower_chunk(struct trace_pid_list *pid_list,
0061                    union lower_chunk *chunk)
0062 {
0063     lockdep_assert_held(&pid_list->lock);
0064 
0065     chunk->next = pid_list->lower_list;
0066     pid_list->lower_list = chunk;
0067     pid_list->free_lower_chunks++;
0068 }
0069 
0070 static inline void put_upper_chunk(struct trace_pid_list *pid_list,
0071                    union upper_chunk *chunk)
0072 {
0073     lockdep_assert_held(&pid_list->lock);
0074 
0075     chunk->next = pid_list->upper_list;
0076     pid_list->upper_list = chunk;
0077     pid_list->free_upper_chunks++;
0078 }
0079 
0080 static inline bool upper_empty(union upper_chunk *chunk)
0081 {
0082     /*
0083      * If chunk->data has no lower chunks, it will be the same
0084      * as a zeroed bitmask. Use find_first_bit() to test it
0085      * and if it doesn't find any bits set, then the array
0086      * is empty.
0087      */
0088     int bit = find_first_bit((unsigned long *)chunk->data,
0089                  sizeof(chunk->data) * 8);
0090     return bit >= sizeof(chunk->data) * 8;
0091 }
0092 
0093 static inline int pid_split(unsigned int pid, unsigned int *upper1,
0094                  unsigned int *upper2, unsigned int *lower)
0095 {
0096     /* MAX_PID should cover all pids */
0097     BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT);
0098 
0099     /* In case a bad pid is passed in, then fail */
0100     if (unlikely(pid >= MAX_PID))
0101         return -1;
0102 
0103     *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK;
0104     *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK;
0105     *lower = pid & LOWER_MASK;
0106 
0107     return 0;
0108 }
0109 
0110 static inline unsigned int pid_join(unsigned int upper1,
0111                     unsigned int upper2, unsigned int lower)
0112 {
0113     return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) |
0114         ((upper2 & UPPER_MASK) << UPPER2_SHIFT) |
0115         (lower & LOWER_MASK);
0116 }
0117 
0118 /**
0119  * trace_pid_list_is_set - test if the pid is set in the list
0120  * @pid_list: The pid list to test
0121  * @pid: The pid to see if set in the list.
0122  *
0123  * Tests if @pid is set in the @pid_list. This is usually called
0124  * from the scheduler when a task is scheduled. Its pid is checked
0125  * if it should be traced or not.
0126  *
0127  * Return true if the pid is in the list, false otherwise.
0128  */
0129 bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid)
0130 {
0131     union upper_chunk *upper_chunk;
0132     union lower_chunk *lower_chunk;
0133     unsigned long flags;
0134     unsigned int upper1;
0135     unsigned int upper2;
0136     unsigned int lower;
0137     bool ret = false;
0138 
0139     if (!pid_list)
0140         return false;
0141 
0142     if (pid_split(pid, &upper1, &upper2, &lower) < 0)
0143         return false;
0144 
0145     raw_spin_lock_irqsave(&pid_list->lock, flags);
0146     upper_chunk = pid_list->upper[upper1];
0147     if (upper_chunk) {
0148         lower_chunk = upper_chunk->data[upper2];
0149         if (lower_chunk)
0150             ret = test_bit(lower, lower_chunk->data);
0151     }
0152     raw_spin_unlock_irqrestore(&pid_list->lock, flags);
0153 
0154     return ret;
0155 }
0156 
0157 /**
0158  * trace_pid_list_set - add a pid to the list
0159  * @pid_list: The pid list to add the @pid to.
0160  * @pid: The pid to add.
0161  *
0162  * Adds @pid to @pid_list. This is usually done explicitly by a user
0163  * adding a task to be traced, or indirectly by the fork function
0164  * when children should be traced and a task's pid is in the list.
0165  *
0166  * Return 0 on success, negative otherwise.
0167  */
0168 int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid)
0169 {
0170     union upper_chunk *upper_chunk;
0171     union lower_chunk *lower_chunk;
0172     unsigned long flags;
0173     unsigned int upper1;
0174     unsigned int upper2;
0175     unsigned int lower;
0176     int ret;
0177 
0178     if (!pid_list)
0179         return -ENODEV;
0180 
0181     if (pid_split(pid, &upper1, &upper2, &lower) < 0)
0182         return -EINVAL;
0183 
0184     raw_spin_lock_irqsave(&pid_list->lock, flags);
0185     upper_chunk = pid_list->upper[upper1];
0186     if (!upper_chunk) {
0187         upper_chunk = get_upper_chunk(pid_list);
0188         if (!upper_chunk) {
0189             ret = -ENOMEM;
0190             goto out;
0191         }
0192         pid_list->upper[upper1] = upper_chunk;
0193     }
0194     lower_chunk = upper_chunk->data[upper2];
0195     if (!lower_chunk) {
0196         lower_chunk = get_lower_chunk(pid_list);
0197         if (!lower_chunk) {
0198             ret = -ENOMEM;
0199             goto out;
0200         }
0201         upper_chunk->data[upper2] = lower_chunk;
0202     }
0203     set_bit(lower, lower_chunk->data);
0204     ret = 0;
0205  out:
0206     raw_spin_unlock_irqrestore(&pid_list->lock, flags);
0207     return ret;
0208 }
0209 
0210 /**
0211  * trace_pid_list_clear - remove a pid from the list
0212  * @pid_list: The pid list to remove the @pid from.
0213  * @pid: The pid to remove.
0214  *
0215  * Removes @pid from @pid_list. This is usually done explicitly by a user
0216  * removing tasks from tracing, or indirectly by the exit function
0217  * when a task that is set to be traced exits.
0218  *
0219  * Return 0 on success, negative otherwise.
0220  */
0221 int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid)
0222 {
0223     union upper_chunk *upper_chunk;
0224     union lower_chunk *lower_chunk;
0225     unsigned long flags;
0226     unsigned int upper1;
0227     unsigned int upper2;
0228     unsigned int lower;
0229 
0230     if (!pid_list)
0231         return -ENODEV;
0232 
0233     if (pid_split(pid, &upper1, &upper2, &lower) < 0)
0234         return -EINVAL;
0235 
0236     raw_spin_lock_irqsave(&pid_list->lock, flags);
0237     upper_chunk = pid_list->upper[upper1];
0238     if (!upper_chunk)
0239         goto out;
0240 
0241     lower_chunk = upper_chunk->data[upper2];
0242     if (!lower_chunk)
0243         goto out;
0244 
0245     clear_bit(lower, lower_chunk->data);
0246 
0247     /* if there's no more bits set, add it to the free list */
0248     if (find_first_bit(lower_chunk->data, LOWER_MAX) >= LOWER_MAX) {
0249         put_lower_chunk(pid_list, lower_chunk);
0250         upper_chunk->data[upper2] = NULL;
0251         if (upper_empty(upper_chunk)) {
0252             put_upper_chunk(pid_list, upper_chunk);
0253             pid_list->upper[upper1] = NULL;
0254         }
0255     }
0256  out:
0257     raw_spin_unlock_irqrestore(&pid_list->lock, flags);
0258     return 0;
0259 }
0260 
0261 /**
0262  * trace_pid_list_next - return the next pid in the list
0263  * @pid_list: The pid list to examine.
0264  * @pid: The pid to start from
0265  * @next: The pointer to place the pid that is set starting from @pid.
0266  *
0267  * Looks for the next consecutive pid that is in @pid_list starting
0268  * at the pid specified by @pid. If one is set (including @pid), then
0269  * that pid is placed into @next.
0270  *
0271  * Return 0 when a pid is found, -1 if there are no more pids included.
0272  */
0273 int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid,
0274             unsigned int *next)
0275 {
0276     union upper_chunk *upper_chunk;
0277     union lower_chunk *lower_chunk;
0278     unsigned long flags;
0279     unsigned int upper1;
0280     unsigned int upper2;
0281     unsigned int lower;
0282 
0283     if (!pid_list)
0284         return -ENODEV;
0285 
0286     if (pid_split(pid, &upper1, &upper2, &lower) < 0)
0287         return -EINVAL;
0288 
0289     raw_spin_lock_irqsave(&pid_list->lock, flags);
0290     for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) {
0291         upper_chunk = pid_list->upper[upper1];
0292 
0293         if (!upper_chunk)
0294             continue;
0295 
0296         for (; upper2 <= UPPER_MASK; upper2++, lower = 0) {
0297             lower_chunk = upper_chunk->data[upper2];
0298             if (!lower_chunk)
0299                 continue;
0300 
0301             lower = find_next_bit(lower_chunk->data, LOWER_MAX,
0302                         lower);
0303             if (lower < LOWER_MAX)
0304                 goto found;
0305         }
0306     }
0307 
0308  found:
0309     raw_spin_unlock_irqrestore(&pid_list->lock, flags);
0310     if (upper1 > UPPER_MASK)
0311         return -1;
0312 
0313     *next = pid_join(upper1, upper2, lower);
0314     return 0;
0315 }
0316 
0317 /**
0318  * trace_pid_list_first - return the first pid in the list
0319  * @pid_list: The pid list to examine.
0320  * @pid: The pointer to place the pid first found pid that is set.
0321  *
0322  * Looks for the first pid that is set in @pid_list, and places it
0323  * into @pid if found.
0324  *
0325  * Return 0 when a pid is found, -1 if there are no pids set.
0326  */
0327 int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid)
0328 {
0329     return trace_pid_list_next(pid_list, 0, pid);
0330 }
0331 
0332 static void pid_list_refill_irq(struct irq_work *iwork)
0333 {
0334     struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list,
0335                                refill_irqwork);
0336     union upper_chunk *upper = NULL;
0337     union lower_chunk *lower = NULL;
0338     union upper_chunk **upper_next = &upper;
0339     union lower_chunk **lower_next = &lower;
0340     int upper_count;
0341     int lower_count;
0342     int ucnt = 0;
0343     int lcnt = 0;
0344 
0345  again:
0346     raw_spin_lock(&pid_list->lock);
0347     upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks;
0348     lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks;
0349     raw_spin_unlock(&pid_list->lock);
0350 
0351     if (upper_count <= 0 && lower_count <= 0)
0352         return;
0353 
0354     while (upper_count-- > 0) {
0355         union upper_chunk *chunk;
0356 
0357         chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
0358         if (!chunk)
0359             break;
0360         *upper_next = chunk;
0361         upper_next = &chunk->next;
0362         ucnt++;
0363     }
0364 
0365     while (lower_count-- > 0) {
0366         union lower_chunk *chunk;
0367 
0368         chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
0369         if (!chunk)
0370             break;
0371         *lower_next = chunk;
0372         lower_next = &chunk->next;
0373         lcnt++;
0374     }
0375 
0376     raw_spin_lock(&pid_list->lock);
0377     if (upper) {
0378         *upper_next = pid_list->upper_list;
0379         pid_list->upper_list = upper;
0380         pid_list->free_upper_chunks += ucnt;
0381     }
0382     if (lower) {
0383         *lower_next = pid_list->lower_list;
0384         pid_list->lower_list = lower;
0385         pid_list->free_lower_chunks += lcnt;
0386     }
0387     raw_spin_unlock(&pid_list->lock);
0388 
0389     /*
0390      * On success of allocating all the chunks, both counters
0391      * will be less than zero. If they are not, then an allocation
0392      * failed, and we should not try again.
0393      */
0394     if (upper_count >= 0 || lower_count >= 0)
0395         return;
0396     /*
0397      * When the locks were released, free chunks could have
0398      * been used and allocation needs to be done again. Might as
0399      * well allocate it now.
0400      */
0401     goto again;
0402 }
0403 
0404 /**
0405  * trace_pid_list_alloc - create a new pid_list
0406  *
0407  * Allocates a new pid_list to store pids into.
0408  *
0409  * Returns the pid_list on success, NULL otherwise.
0410  */
0411 struct trace_pid_list *trace_pid_list_alloc(void)
0412 {
0413     struct trace_pid_list *pid_list;
0414     int i;
0415 
0416     /* According to linux/thread.h, pids can be no bigger that 30 bits */
0417     WARN_ON_ONCE(pid_max > (1 << 30));
0418 
0419     pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
0420     if (!pid_list)
0421         return NULL;
0422 
0423     init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq);
0424 
0425     raw_spin_lock_init(&pid_list->lock);
0426 
0427     for (i = 0; i < CHUNK_ALLOC; i++) {
0428         union upper_chunk *chunk;
0429 
0430         chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
0431         if (!chunk)
0432             break;
0433         chunk->next = pid_list->upper_list;
0434         pid_list->upper_list = chunk;
0435         pid_list->free_upper_chunks++;
0436     }
0437 
0438     for (i = 0; i < CHUNK_ALLOC; i++) {
0439         union lower_chunk *chunk;
0440 
0441         chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
0442         if (!chunk)
0443             break;
0444         chunk->next = pid_list->lower_list;
0445         pid_list->lower_list = chunk;
0446         pid_list->free_lower_chunks++;
0447     }
0448 
0449     return pid_list;
0450 }
0451 
0452 /**
0453  * trace_pid_list_free - Frees an allocated pid_list.
0454  *
0455  * Frees the memory for a pid_list that was allocated.
0456  */
0457 void trace_pid_list_free(struct trace_pid_list *pid_list)
0458 {
0459     union upper_chunk *upper;
0460     union lower_chunk *lower;
0461     int i, j;
0462 
0463     if (!pid_list)
0464         return;
0465 
0466     irq_work_sync(&pid_list->refill_irqwork);
0467 
0468     while (pid_list->lower_list) {
0469         union lower_chunk *chunk;
0470 
0471         chunk = pid_list->lower_list;
0472         pid_list->lower_list = pid_list->lower_list->next;
0473         kfree(chunk);
0474     }
0475 
0476     while (pid_list->upper_list) {
0477         union upper_chunk *chunk;
0478 
0479         chunk = pid_list->upper_list;
0480         pid_list->upper_list = pid_list->upper_list->next;
0481         kfree(chunk);
0482     }
0483 
0484     for (i = 0; i < UPPER1_SIZE; i++) {
0485         upper = pid_list->upper[i];
0486         if (upper) {
0487             for (j = 0; j < UPPER2_SIZE; j++) {
0488                 lower = upper->data[j];
0489                 kfree(lower);
0490             }
0491             kfree(upper);
0492         }
0493     }
0494     kfree(pid_list);
0495 }