0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #include <linux/export.h>
0029 #include <linux/kernel.h>
0030 #include <linux/sched.h>
0031 #include <linux/init.h>
0032 #include <linux/signal.h>
0033 #include <linux/completion.h>
0034 #include <linux/workqueue.h>
0035 #include <linux/slab.h>
0036 #include <linux/cpu.h>
0037 #include <linux/notifier.h>
0038 #include <linux/kthread.h>
0039 #include <linux/hardirq.h>
0040 #include <linux/mempolicy.h>
0041 #include <linux/freezer.h>
0042 #include <linux/debug_locks.h>
0043 #include <linux/lockdep.h>
0044 #include <linux/idr.h>
0045 #include <linux/jhash.h>
0046 #include <linux/hashtable.h>
0047 #include <linux/rculist.h>
0048 #include <linux/nodemask.h>
0049 #include <linux/moduleparam.h>
0050 #include <linux/uaccess.h>
0051 #include <linux/sched/isolation.h>
0052 #include <linux/nmi.h>
0053 #include <linux/kvm_para.h>
0054
0055 #include "workqueue_internal.h"
0056
0057 enum {
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074 POOL_MANAGER_ACTIVE = 1 << 0,
0075 POOL_DISASSOCIATED = 1 << 2,
0076
0077
0078 WORKER_DIE = 1 << 1,
0079 WORKER_IDLE = 1 << 2,
0080 WORKER_PREP = 1 << 3,
0081 WORKER_CPU_INTENSIVE = 1 << 6,
0082 WORKER_UNBOUND = 1 << 7,
0083 WORKER_REBOUND = 1 << 8,
0084
0085 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
0086 WORKER_UNBOUND | WORKER_REBOUND,
0087
0088 NR_STD_WORKER_POOLS = 2,
0089
0090 UNBOUND_POOL_HASH_ORDER = 6,
0091 BUSY_WORKER_HASH_ORDER = 6,
0092
0093 MAX_IDLE_WORKERS_RATIO = 4,
0094 IDLE_WORKER_TIMEOUT = 300 * HZ,
0095
0096 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
0097
0098
0099 MAYDAY_INTERVAL = HZ / 10,
0100 CREATE_COOLDOWN = HZ,
0101
0102
0103
0104
0105
0106 RESCUER_NICE_LEVEL = MIN_NICE,
0107 HIGHPRI_NICE_LEVEL = MIN_NICE,
0108
0109 WQ_NAME_LEN = 24,
0110 };
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148 struct worker_pool {
0149 raw_spinlock_t lock;
0150 int cpu;
0151 int node;
0152 int id;
0153 unsigned int flags;
0154
0155 unsigned long watchdog_ts;
0156
0157
0158
0159
0160
0161
0162
0163 int nr_running;
0164
0165 struct list_head worklist;
0166
0167 int nr_workers;
0168 int nr_idle;
0169
0170 struct list_head idle_list;
0171 struct timer_list idle_timer;
0172 struct timer_list mayday_timer;
0173
0174
0175 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
0176
0177
0178 struct worker *manager;
0179 struct list_head workers;
0180 struct completion *detach_completion;
0181
0182 struct ida worker_ida;
0183
0184 struct workqueue_attrs *attrs;
0185 struct hlist_node hash_node;
0186 int refcnt;
0187
0188
0189
0190
0191
0192 struct rcu_head rcu;
0193 };
0194
0195
0196
0197
0198
0199
0200
0201 struct pool_workqueue {
0202 struct worker_pool *pool;
0203 struct workqueue_struct *wq;
0204 int work_color;
0205 int flush_color;
0206 int refcnt;
0207 int nr_in_flight[WORK_NR_COLORS];
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226 int nr_active;
0227 int max_active;
0228 struct list_head inactive_works;
0229 struct list_head pwqs_node;
0230 struct list_head mayday_node;
0231
0232
0233
0234
0235
0236
0237
0238 struct work_struct unbound_release_work;
0239 struct rcu_head rcu;
0240 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
0241
0242
0243
0244
0245 struct wq_flusher {
0246 struct list_head list;
0247 int flush_color;
0248 struct completion done;
0249 };
0250
0251 struct wq_device;
0252
0253
0254
0255
0256
0257 struct workqueue_struct {
0258 struct list_head pwqs;
0259 struct list_head list;
0260
0261 struct mutex mutex;
0262 int work_color;
0263 int flush_color;
0264 atomic_t nr_pwqs_to_flush;
0265 struct wq_flusher *first_flusher;
0266 struct list_head flusher_queue;
0267 struct list_head flusher_overflow;
0268
0269 struct list_head maydays;
0270 struct worker *rescuer;
0271
0272 int nr_drainers;
0273 int saved_max_active;
0274
0275 struct workqueue_attrs *unbound_attrs;
0276 struct pool_workqueue *dfl_pwq;
0277
0278 #ifdef CONFIG_SYSFS
0279 struct wq_device *wq_dev;
0280 #endif
0281 #ifdef CONFIG_LOCKDEP
0282 char *lock_name;
0283 struct lock_class_key key;
0284 struct lockdep_map lockdep_map;
0285 #endif
0286 char name[WQ_NAME_LEN];
0287
0288
0289
0290
0291
0292
0293 struct rcu_head rcu;
0294
0295
0296 unsigned int flags ____cacheline_aligned;
0297 struct pool_workqueue __percpu *cpu_pwqs;
0298 struct pool_workqueue __rcu *numa_pwq_tbl[];
0299 };
0300
0301 static struct kmem_cache *pwq_cache;
0302
0303 static cpumask_var_t *wq_numa_possible_cpumask;
0304
0305
0306 static bool wq_disable_numa;
0307 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
0308
0309
0310 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
0311 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
0312
0313 static bool wq_online;
0314
0315 static bool wq_numa_enabled;
0316
0317
0318 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
0319
0320 static DEFINE_MUTEX(wq_pool_mutex);
0321 static DEFINE_MUTEX(wq_pool_attach_mutex);
0322 static DEFINE_RAW_SPINLOCK(wq_mayday_lock);
0323
0324 static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
0325
0326 static LIST_HEAD(workqueues);
0327 static bool workqueue_freezing;
0328
0329
0330 static cpumask_var_t wq_unbound_cpumask;
0331
0332
0333 static DEFINE_PER_CPU(int, wq_rr_cpu_last);
0334
0335
0336
0337
0338
0339
0340 #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
0341 static bool wq_debug_force_rr_cpu = true;
0342 #else
0343 static bool wq_debug_force_rr_cpu = false;
0344 #endif
0345 module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
0346
0347
0348 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
0349
0350 static DEFINE_IDR(worker_pool_idr);
0351
0352
0353 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
0354
0355
0356 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
0357
0358
0359 static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
0360
0361 struct workqueue_struct *system_wq __read_mostly;
0362 EXPORT_SYMBOL(system_wq);
0363 struct workqueue_struct *system_highpri_wq __read_mostly;
0364 EXPORT_SYMBOL_GPL(system_highpri_wq);
0365 struct workqueue_struct *system_long_wq __read_mostly;
0366 EXPORT_SYMBOL_GPL(system_long_wq);
0367 struct workqueue_struct *system_unbound_wq __read_mostly;
0368 EXPORT_SYMBOL_GPL(system_unbound_wq);
0369 struct workqueue_struct *system_freezable_wq __read_mostly;
0370 EXPORT_SYMBOL_GPL(system_freezable_wq);
0371 struct workqueue_struct *system_power_efficient_wq __read_mostly;
0372 EXPORT_SYMBOL_GPL(system_power_efficient_wq);
0373 struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
0374 EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
0375
0376 static int worker_thread(void *__worker);
0377 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
0378 static void show_pwq(struct pool_workqueue *pwq);
0379 static void show_one_worker_pool(struct worker_pool *pool);
0380
0381 #define CREATE_TRACE_POINTS
0382 #include <trace/events/workqueue.h>
0383
0384 #define assert_rcu_or_pool_mutex() \
0385 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
0386 !lockdep_is_held(&wq_pool_mutex), \
0387 "RCU or wq_pool_mutex should be held")
0388
0389 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
0390 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
0391 !lockdep_is_held(&wq->mutex) && \
0392 !lockdep_is_held(&wq_pool_mutex), \
0393 "RCU, wq->mutex or wq_pool_mutex should be held")
0394
0395 #define for_each_cpu_worker_pool(pool, cpu) \
0396 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
0397 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
0398 (pool)++)
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412 #define for_each_pool(pool, pi) \
0413 idr_for_each_entry(&worker_pool_idr, pool, pi) \
0414 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
0415 else
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427 #define for_each_pool_worker(worker, pool) \
0428 list_for_each_entry((worker), &(pool)->workers, node) \
0429 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
0430 else
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444 #define for_each_pwq(pwq, wq) \
0445 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
0446 lockdep_is_held(&(wq->mutex)))
0447
0448 #ifdef CONFIG_DEBUG_OBJECTS_WORK
0449
0450 static const struct debug_obj_descr work_debug_descr;
0451
0452 static void *work_debug_hint(void *addr)
0453 {
0454 return ((struct work_struct *) addr)->func;
0455 }
0456
0457 static bool work_is_static_object(void *addr)
0458 {
0459 struct work_struct *work = addr;
0460
0461 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
0462 }
0463
0464
0465
0466
0467
0468 static bool work_fixup_init(void *addr, enum debug_obj_state state)
0469 {
0470 struct work_struct *work = addr;
0471
0472 switch (state) {
0473 case ODEBUG_STATE_ACTIVE:
0474 cancel_work_sync(work);
0475 debug_object_init(work, &work_debug_descr);
0476 return true;
0477 default:
0478 return false;
0479 }
0480 }
0481
0482
0483
0484
0485
0486 static bool work_fixup_free(void *addr, enum debug_obj_state state)
0487 {
0488 struct work_struct *work = addr;
0489
0490 switch (state) {
0491 case ODEBUG_STATE_ACTIVE:
0492 cancel_work_sync(work);
0493 debug_object_free(work, &work_debug_descr);
0494 return true;
0495 default:
0496 return false;
0497 }
0498 }
0499
0500 static const struct debug_obj_descr work_debug_descr = {
0501 .name = "work_struct",
0502 .debug_hint = work_debug_hint,
0503 .is_static_object = work_is_static_object,
0504 .fixup_init = work_fixup_init,
0505 .fixup_free = work_fixup_free,
0506 };
0507
0508 static inline void debug_work_activate(struct work_struct *work)
0509 {
0510 debug_object_activate(work, &work_debug_descr);
0511 }
0512
0513 static inline void debug_work_deactivate(struct work_struct *work)
0514 {
0515 debug_object_deactivate(work, &work_debug_descr);
0516 }
0517
0518 void __init_work(struct work_struct *work, int onstack)
0519 {
0520 if (onstack)
0521 debug_object_init_on_stack(work, &work_debug_descr);
0522 else
0523 debug_object_init(work, &work_debug_descr);
0524 }
0525 EXPORT_SYMBOL_GPL(__init_work);
0526
0527 void destroy_work_on_stack(struct work_struct *work)
0528 {
0529 debug_object_free(work, &work_debug_descr);
0530 }
0531 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
0532
0533 void destroy_delayed_work_on_stack(struct delayed_work *work)
0534 {
0535 destroy_timer_on_stack(&work->timer);
0536 debug_object_free(&work->work, &work_debug_descr);
0537 }
0538 EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
0539
0540 #else
0541 static inline void debug_work_activate(struct work_struct *work) { }
0542 static inline void debug_work_deactivate(struct work_struct *work) { }
0543 #endif
0544
0545
0546
0547
0548
0549
0550
0551
0552 static int worker_pool_assign_id(struct worker_pool *pool)
0553 {
0554 int ret;
0555
0556 lockdep_assert_held(&wq_pool_mutex);
0557
0558 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
0559 GFP_KERNEL);
0560 if (ret >= 0) {
0561 pool->id = ret;
0562 return 0;
0563 }
0564 return ret;
0565 }
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579 static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
0580 int node)
0581 {
0582 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
0583
0584
0585
0586
0587
0588
0589
0590 if (unlikely(node == NUMA_NO_NODE))
0591 return wq->dfl_pwq;
0592
0593 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
0594 }
0595
0596 static unsigned int work_color_to_flags(int color)
0597 {
0598 return color << WORK_STRUCT_COLOR_SHIFT;
0599 }
0600
0601 static int get_work_color(unsigned long work_data)
0602 {
0603 return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
0604 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
0605 }
0606
0607 static int work_next_color(int color)
0608 {
0609 return (color + 1) % WORK_NR_COLORS;
0610 }
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632 static inline void set_work_data(struct work_struct *work, unsigned long data,
0633 unsigned long flags)
0634 {
0635 WARN_ON_ONCE(!work_pending(work));
0636 atomic_long_set(&work->data, data | flags | work_static(work));
0637 }
0638
0639 static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
0640 unsigned long extra_flags)
0641 {
0642 set_work_data(work, (unsigned long)pwq,
0643 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
0644 }
0645
0646 static void set_work_pool_and_keep_pending(struct work_struct *work,
0647 int pool_id)
0648 {
0649 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
0650 WORK_STRUCT_PENDING);
0651 }
0652
0653 static void set_work_pool_and_clear_pending(struct work_struct *work,
0654 int pool_id)
0655 {
0656
0657
0658
0659
0660
0661
0662 smp_wmb();
0663 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692 smp_mb();
0693 }
0694
0695 static void clear_work_data(struct work_struct *work)
0696 {
0697 smp_wmb();
0698 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
0699 }
0700
0701 static struct pool_workqueue *get_work_pwq(struct work_struct *work)
0702 {
0703 unsigned long data = atomic_long_read(&work->data);
0704
0705 if (data & WORK_STRUCT_PWQ)
0706 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
0707 else
0708 return NULL;
0709 }
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726 static struct worker_pool *get_work_pool(struct work_struct *work)
0727 {
0728 unsigned long data = atomic_long_read(&work->data);
0729 int pool_id;
0730
0731 assert_rcu_or_pool_mutex();
0732
0733 if (data & WORK_STRUCT_PWQ)
0734 return ((struct pool_workqueue *)
0735 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
0736
0737 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
0738 if (pool_id == WORK_OFFQ_POOL_NONE)
0739 return NULL;
0740
0741 return idr_find(&worker_pool_idr, pool_id);
0742 }
0743
0744
0745
0746
0747
0748
0749
0750
0751 static int get_work_pool_id(struct work_struct *work)
0752 {
0753 unsigned long data = atomic_long_read(&work->data);
0754
0755 if (data & WORK_STRUCT_PWQ)
0756 return ((struct pool_workqueue *)
0757 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
0758
0759 return data >> WORK_OFFQ_POOL_SHIFT;
0760 }
0761
0762 static void mark_work_canceling(struct work_struct *work)
0763 {
0764 unsigned long pool_id = get_work_pool_id(work);
0765
0766 pool_id <<= WORK_OFFQ_POOL_SHIFT;
0767 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
0768 }
0769
0770 static bool work_is_canceling(struct work_struct *work)
0771 {
0772 unsigned long data = atomic_long_read(&work->data);
0773
0774 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
0775 }
0776
0777
0778
0779
0780
0781
0782
0783 static bool __need_more_worker(struct worker_pool *pool)
0784 {
0785 return !pool->nr_running;
0786 }
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796 static bool need_more_worker(struct worker_pool *pool)
0797 {
0798 return !list_empty(&pool->worklist) && __need_more_worker(pool);
0799 }
0800
0801
0802 static bool may_start_working(struct worker_pool *pool)
0803 {
0804 return pool->nr_idle;
0805 }
0806
0807
0808 static bool keep_working(struct worker_pool *pool)
0809 {
0810 return !list_empty(&pool->worklist) && (pool->nr_running <= 1);
0811 }
0812
0813
0814 static bool need_to_create_worker(struct worker_pool *pool)
0815 {
0816 return need_more_worker(pool) && !may_start_working(pool);
0817 }
0818
0819
0820 static bool too_many_workers(struct worker_pool *pool)
0821 {
0822 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
0823 int nr_idle = pool->nr_idle + managing;
0824 int nr_busy = pool->nr_workers - nr_idle;
0825
0826 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
0827 }
0828
0829
0830
0831
0832
0833
0834 static struct worker *first_idle_worker(struct worker_pool *pool)
0835 {
0836 if (unlikely(list_empty(&pool->idle_list)))
0837 return NULL;
0838
0839 return list_first_entry(&pool->idle_list, struct worker, entry);
0840 }
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851 static void wake_up_worker(struct worker_pool *pool)
0852 {
0853 struct worker *worker = first_idle_worker(pool);
0854
0855 if (likely(worker))
0856 wake_up_process(worker->task);
0857 }
0858
0859
0860
0861
0862
0863
0864
0865 void wq_worker_running(struct task_struct *task)
0866 {
0867 struct worker *worker = kthread_data(task);
0868
0869 if (!worker->sleeping)
0870 return;
0871
0872
0873
0874
0875
0876
0877
0878 preempt_disable();
0879 if (!(worker->flags & WORKER_NOT_RUNNING))
0880 worker->pool->nr_running++;
0881 preempt_enable();
0882 worker->sleeping = 0;
0883 }
0884
0885
0886
0887
0888
0889
0890
0891
0892 void wq_worker_sleeping(struct task_struct *task)
0893 {
0894 struct worker *worker = kthread_data(task);
0895 struct worker_pool *pool;
0896
0897
0898
0899
0900
0901
0902 if (worker->flags & WORKER_NOT_RUNNING)
0903 return;
0904
0905 pool = worker->pool;
0906
0907
0908 if (worker->sleeping)
0909 return;
0910
0911 worker->sleeping = 1;
0912 raw_spin_lock_irq(&pool->lock);
0913
0914
0915
0916
0917
0918
0919 if (worker->flags & WORKER_NOT_RUNNING) {
0920 raw_spin_unlock_irq(&pool->lock);
0921 return;
0922 }
0923
0924 pool->nr_running--;
0925 if (need_more_worker(pool))
0926 wake_up_worker(pool);
0927 raw_spin_unlock_irq(&pool->lock);
0928 }
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954 work_func_t wq_worker_last_func(struct task_struct *task)
0955 {
0956 struct worker *worker = kthread_data(task);
0957
0958 return worker->last_func;
0959 }
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971 static inline void worker_set_flags(struct worker *worker, unsigned int flags)
0972 {
0973 struct worker_pool *pool = worker->pool;
0974
0975 WARN_ON_ONCE(worker->task != current);
0976
0977
0978 if ((flags & WORKER_NOT_RUNNING) &&
0979 !(worker->flags & WORKER_NOT_RUNNING)) {
0980 pool->nr_running--;
0981 }
0982
0983 worker->flags |= flags;
0984 }
0985
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995
0996 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
0997 {
0998 struct worker_pool *pool = worker->pool;
0999 unsigned int oflags = worker->flags;
1000
1001 WARN_ON_ONCE(worker->task != current);
1002
1003 worker->flags &= ~flags;
1004
1005
1006
1007
1008
1009
1010 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
1011 if (!(worker->flags & WORKER_NOT_RUNNING))
1012 pool->nr_running++;
1013 }
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048 static struct worker *find_worker_executing_work(struct worker_pool *pool,
1049 struct work_struct *work)
1050 {
1051 struct worker *worker;
1052
1053 hash_for_each_possible(pool->busy_hash, worker, hentry,
1054 (unsigned long)work)
1055 if (worker->current_work == work &&
1056 worker->current_func == work->func)
1057 return worker;
1058
1059 return NULL;
1060 }
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 static void move_linked_works(struct work_struct *work, struct list_head *head,
1080 struct work_struct **nextp)
1081 {
1082 struct work_struct *n;
1083
1084
1085
1086
1087
1088 list_for_each_entry_safe_from(work, n, NULL, entry) {
1089 list_move_tail(&work->entry, head);
1090 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1091 break;
1092 }
1093
1094
1095
1096
1097
1098
1099 if (nextp)
1100 *nextp = n;
1101 }
1102
1103
1104
1105
1106
1107
1108
1109
1110 static void get_pwq(struct pool_workqueue *pwq)
1111 {
1112 lockdep_assert_held(&pwq->pool->lock);
1113 WARN_ON_ONCE(pwq->refcnt <= 0);
1114 pwq->refcnt++;
1115 }
1116
1117
1118
1119
1120
1121
1122
1123
1124 static void put_pwq(struct pool_workqueue *pwq)
1125 {
1126 lockdep_assert_held(&pwq->pool->lock);
1127 if (likely(--pwq->refcnt))
1128 return;
1129 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1130 return;
1131
1132
1133
1134
1135
1136
1137
1138
1139 schedule_work(&pwq->unbound_release_work);
1140 }
1141
1142
1143
1144
1145
1146
1147
1148 static void put_pwq_unlocked(struct pool_workqueue *pwq)
1149 {
1150 if (pwq) {
1151
1152
1153
1154
1155 raw_spin_lock_irq(&pwq->pool->lock);
1156 put_pwq(pwq);
1157 raw_spin_unlock_irq(&pwq->pool->lock);
1158 }
1159 }
1160
1161 static void pwq_activate_inactive_work(struct work_struct *work)
1162 {
1163 struct pool_workqueue *pwq = get_work_pwq(work);
1164
1165 trace_workqueue_activate_work(work);
1166 if (list_empty(&pwq->pool->worklist))
1167 pwq->pool->watchdog_ts = jiffies;
1168 move_linked_works(work, &pwq->pool->worklist, NULL);
1169 __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
1170 pwq->nr_active++;
1171 }
1172
1173 static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
1174 {
1175 struct work_struct *work = list_first_entry(&pwq->inactive_works,
1176 struct work_struct, entry);
1177
1178 pwq_activate_inactive_work(work);
1179 }
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
1193 {
1194 int color = get_work_color(work_data);
1195
1196 if (!(work_data & WORK_STRUCT_INACTIVE)) {
1197 pwq->nr_active--;
1198 if (!list_empty(&pwq->inactive_works)) {
1199
1200 if (pwq->nr_active < pwq->max_active)
1201 pwq_activate_first_inactive(pwq);
1202 }
1203 }
1204
1205 pwq->nr_in_flight[color]--;
1206
1207
1208 if (likely(pwq->flush_color != color))
1209 goto out_put;
1210
1211
1212 if (pwq->nr_in_flight[color])
1213 goto out_put;
1214
1215
1216 pwq->flush_color = -1;
1217
1218
1219
1220
1221
1222 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1223 complete(&pwq->wq->first_flusher->done);
1224 out_put:
1225 put_pwq(pwq);
1226 }
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258 static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1259 unsigned long *flags)
1260 {
1261 struct worker_pool *pool;
1262 struct pool_workqueue *pwq;
1263
1264 local_irq_save(*flags);
1265
1266
1267 if (is_dwork) {
1268 struct delayed_work *dwork = to_delayed_work(work);
1269
1270
1271
1272
1273
1274
1275 if (likely(del_timer(&dwork->timer)))
1276 return 1;
1277 }
1278
1279
1280 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1281 return 0;
1282
1283 rcu_read_lock();
1284
1285
1286
1287
1288 pool = get_work_pool(work);
1289 if (!pool)
1290 goto fail;
1291
1292 raw_spin_lock(&pool->lock);
1293
1294
1295
1296
1297
1298
1299
1300
1301 pwq = get_work_pwq(work);
1302 if (pwq && pwq->pool == pool) {
1303 debug_work_deactivate(work);
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316 if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
1317 pwq_activate_inactive_work(work);
1318
1319 list_del_init(&work->entry);
1320 pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
1321
1322
1323 set_work_pool_and_keep_pending(work, pool->id);
1324
1325 raw_spin_unlock(&pool->lock);
1326 rcu_read_unlock();
1327 return 1;
1328 }
1329 raw_spin_unlock(&pool->lock);
1330 fail:
1331 rcu_read_unlock();
1332 local_irq_restore(*flags);
1333 if (work_is_canceling(work))
1334 return -ENOENT;
1335 cpu_relax();
1336 return -EAGAIN;
1337 }
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1353 struct list_head *head, unsigned int extra_flags)
1354 {
1355 struct worker_pool *pool = pwq->pool;
1356
1357
1358 kasan_record_aux_stack_noalloc(work);
1359
1360
1361 set_work_pwq(work, pwq, extra_flags);
1362 list_add_tail(&work->entry, head);
1363 get_pwq(pwq);
1364
1365 if (__need_more_worker(pool))
1366 wake_up_worker(pool);
1367 }
1368
1369
1370
1371
1372
1373 static bool is_chained_work(struct workqueue_struct *wq)
1374 {
1375 struct worker *worker;
1376
1377 worker = current_wq_worker();
1378
1379
1380
1381
1382 return worker && worker->current_pwq->wq == wq;
1383 }
1384
1385
1386
1387
1388
1389
1390 static int wq_select_unbound_cpu(int cpu)
1391 {
1392 static bool printed_dbg_warning;
1393 int new_cpu;
1394
1395 if (likely(!wq_debug_force_rr_cpu)) {
1396 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1397 return cpu;
1398 } else if (!printed_dbg_warning) {
1399 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1400 printed_dbg_warning = true;
1401 }
1402
1403 if (cpumask_empty(wq_unbound_cpumask))
1404 return cpu;
1405
1406 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1407 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1408 if (unlikely(new_cpu >= nr_cpu_ids)) {
1409 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1410 if (unlikely(new_cpu >= nr_cpu_ids))
1411 return cpu;
1412 }
1413 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1414
1415 return new_cpu;
1416 }
1417
1418 static void __queue_work(int cpu, struct workqueue_struct *wq,
1419 struct work_struct *work)
1420 {
1421 struct pool_workqueue *pwq;
1422 struct worker_pool *last_pool;
1423 struct list_head *worklist;
1424 unsigned int work_flags;
1425 unsigned int req_cpu = cpu;
1426
1427
1428
1429
1430
1431
1432
1433 lockdep_assert_irqs_disabled();
1434
1435
1436
1437 if (unlikely(wq->flags & __WQ_DRAINING) &&
1438 WARN_ON_ONCE(!is_chained_work(wq)))
1439 return;
1440 rcu_read_lock();
1441 retry:
1442
1443 if (wq->flags & WQ_UNBOUND) {
1444 if (req_cpu == WORK_CPU_UNBOUND)
1445 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1446 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1447 } else {
1448 if (req_cpu == WORK_CPU_UNBOUND)
1449 cpu = raw_smp_processor_id();
1450 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1451 }
1452
1453
1454
1455
1456
1457
1458 last_pool = get_work_pool(work);
1459 if (last_pool && last_pool != pwq->pool) {
1460 struct worker *worker;
1461
1462 raw_spin_lock(&last_pool->lock);
1463
1464 worker = find_worker_executing_work(last_pool, work);
1465
1466 if (worker && worker->current_pwq->wq == wq) {
1467 pwq = worker->current_pwq;
1468 } else {
1469
1470 raw_spin_unlock(&last_pool->lock);
1471 raw_spin_lock(&pwq->pool->lock);
1472 }
1473 } else {
1474 raw_spin_lock(&pwq->pool->lock);
1475 }
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 if (unlikely(!pwq->refcnt)) {
1486 if (wq->flags & WQ_UNBOUND) {
1487 raw_spin_unlock(&pwq->pool->lock);
1488 cpu_relax();
1489 goto retry;
1490 }
1491
1492 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1493 wq->name, cpu);
1494 }
1495
1496
1497 trace_workqueue_queue_work(req_cpu, pwq, work);
1498
1499 if (WARN_ON(!list_empty(&work->entry)))
1500 goto out;
1501
1502 pwq->nr_in_flight[pwq->work_color]++;
1503 work_flags = work_color_to_flags(pwq->work_color);
1504
1505 if (likely(pwq->nr_active < pwq->max_active)) {
1506 trace_workqueue_activate_work(work);
1507 pwq->nr_active++;
1508 worklist = &pwq->pool->worklist;
1509 if (list_empty(worklist))
1510 pwq->pool->watchdog_ts = jiffies;
1511 } else {
1512 work_flags |= WORK_STRUCT_INACTIVE;
1513 worklist = &pwq->inactive_works;
1514 }
1515
1516 debug_work_activate(work);
1517 insert_work(pwq, work, worklist, work_flags);
1518
1519 out:
1520 raw_spin_unlock(&pwq->pool->lock);
1521 rcu_read_unlock();
1522 }
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536 bool queue_work_on(int cpu, struct workqueue_struct *wq,
1537 struct work_struct *work)
1538 {
1539 bool ret = false;
1540 unsigned long flags;
1541
1542 local_irq_save(flags);
1543
1544 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1545 __queue_work(cpu, wq, work);
1546 ret = true;
1547 }
1548
1549 local_irq_restore(flags);
1550 return ret;
1551 }
1552 EXPORT_SYMBOL(queue_work_on);
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563 static int workqueue_select_cpu_near(int node)
1564 {
1565 int cpu;
1566
1567
1568 if (!wq_numa_enabled)
1569 return WORK_CPU_UNBOUND;
1570
1571
1572 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1573 return WORK_CPU_UNBOUND;
1574
1575
1576 cpu = raw_smp_processor_id();
1577 if (node == cpu_to_node(cpu))
1578 return cpu;
1579
1580
1581 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1582
1583
1584 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1585 }
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607 bool queue_work_node(int node, struct workqueue_struct *wq,
1608 struct work_struct *work)
1609 {
1610 unsigned long flags;
1611 bool ret = false;
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1623
1624 local_irq_save(flags);
1625
1626 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1627 int cpu = workqueue_select_cpu_near(node);
1628
1629 __queue_work(cpu, wq, work);
1630 ret = true;
1631 }
1632
1633 local_irq_restore(flags);
1634 return ret;
1635 }
1636 EXPORT_SYMBOL_GPL(queue_work_node);
1637
1638 void delayed_work_timer_fn(struct timer_list *t)
1639 {
1640 struct delayed_work *dwork = from_timer(dwork, t, timer);
1641
1642
1643 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1644 }
1645 EXPORT_SYMBOL(delayed_work_timer_fn);
1646
1647 static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1648 struct delayed_work *dwork, unsigned long delay)
1649 {
1650 struct timer_list *timer = &dwork->timer;
1651 struct work_struct *work = &dwork->work;
1652
1653 WARN_ON_ONCE(!wq);
1654 WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn);
1655 WARN_ON_ONCE(timer_pending(timer));
1656 WARN_ON_ONCE(!list_empty(&work->entry));
1657
1658
1659
1660
1661
1662
1663
1664 if (!delay) {
1665 __queue_work(cpu, wq, &dwork->work);
1666 return;
1667 }
1668
1669 dwork->wq = wq;
1670 dwork->cpu = cpu;
1671 timer->expires = jiffies + delay;
1672
1673 if (unlikely(cpu != WORK_CPU_UNBOUND))
1674 add_timer_on(timer, cpu);
1675 else
1676 add_timer(timer);
1677 }
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690 bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1691 struct delayed_work *dwork, unsigned long delay)
1692 {
1693 struct work_struct *work = &dwork->work;
1694 bool ret = false;
1695 unsigned long flags;
1696
1697
1698 local_irq_save(flags);
1699
1700 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1701 __queue_delayed_work(cpu, wq, dwork, delay);
1702 ret = true;
1703 }
1704
1705 local_irq_restore(flags);
1706 return ret;
1707 }
1708 EXPORT_SYMBOL(queue_delayed_work_on);
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728 bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1729 struct delayed_work *dwork, unsigned long delay)
1730 {
1731 unsigned long flags;
1732 int ret;
1733
1734 do {
1735 ret = try_to_grab_pending(&dwork->work, true, &flags);
1736 } while (unlikely(ret == -EAGAIN));
1737
1738 if (likely(ret >= 0)) {
1739 __queue_delayed_work(cpu, wq, dwork, delay);
1740 local_irq_restore(flags);
1741 }
1742
1743
1744 return ret;
1745 }
1746 EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1747
1748 static void rcu_work_rcufn(struct rcu_head *rcu)
1749 {
1750 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1751
1752
1753 local_irq_disable();
1754 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1755 local_irq_enable();
1756 }
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1769 {
1770 struct work_struct *work = &rwork->work;
1771
1772 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1773 rwork->wq = wq;
1774 call_rcu(&rwork->rcu, rcu_work_rcufn);
1775 return true;
1776 }
1777
1778 return false;
1779 }
1780 EXPORT_SYMBOL(queue_rcu_work);
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792 static void worker_enter_idle(struct worker *worker)
1793 {
1794 struct worker_pool *pool = worker->pool;
1795
1796 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1797 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1798 (worker->hentry.next || worker->hentry.pprev)))
1799 return;
1800
1801
1802 worker->flags |= WORKER_IDLE;
1803 pool->nr_idle++;
1804 worker->last_active = jiffies;
1805
1806
1807 list_add(&worker->entry, &pool->idle_list);
1808
1809 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1810 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1811
1812
1813 WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running);
1814 }
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825 static void worker_leave_idle(struct worker *worker)
1826 {
1827 struct worker_pool *pool = worker->pool;
1828
1829 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1830 return;
1831 worker_clr_flags(worker, WORKER_IDLE);
1832 pool->nr_idle--;
1833 list_del_init(&worker->entry);
1834 }
1835
1836 static struct worker *alloc_worker(int node)
1837 {
1838 struct worker *worker;
1839
1840 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1841 if (worker) {
1842 INIT_LIST_HEAD(&worker->entry);
1843 INIT_LIST_HEAD(&worker->scheduled);
1844 INIT_LIST_HEAD(&worker->node);
1845
1846 worker->flags = WORKER_PREP;
1847 }
1848 return worker;
1849 }
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860 static void worker_attach_to_pool(struct worker *worker,
1861 struct worker_pool *pool)
1862 {
1863 mutex_lock(&wq_pool_attach_mutex);
1864
1865
1866
1867
1868
1869
1870 if (pool->flags & POOL_DISASSOCIATED)
1871 worker->flags |= WORKER_UNBOUND;
1872 else
1873 kthread_set_per_cpu(worker->task, pool->cpu);
1874
1875 if (worker->rescue_wq)
1876 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1877
1878 list_add_tail(&worker->node, &pool->workers);
1879 worker->pool = pool;
1880
1881 mutex_unlock(&wq_pool_attach_mutex);
1882 }
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892 static void worker_detach_from_pool(struct worker *worker)
1893 {
1894 struct worker_pool *pool = worker->pool;
1895 struct completion *detach_completion = NULL;
1896
1897 mutex_lock(&wq_pool_attach_mutex);
1898
1899 kthread_set_per_cpu(worker->task, -1);
1900 list_del(&worker->node);
1901 worker->pool = NULL;
1902
1903 if (list_empty(&pool->workers))
1904 detach_completion = pool->detach_completion;
1905 mutex_unlock(&wq_pool_attach_mutex);
1906
1907
1908 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1909
1910 if (detach_completion)
1911 complete(detach_completion);
1912 }
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926 static struct worker *create_worker(struct worker_pool *pool)
1927 {
1928 struct worker *worker;
1929 int id;
1930 char id_buf[16];
1931
1932
1933 id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
1934 if (id < 0)
1935 return NULL;
1936
1937 worker = alloc_worker(pool->node);
1938 if (!worker)
1939 goto fail;
1940
1941 worker->id = id;
1942
1943 if (pool->cpu >= 0)
1944 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1945 pool->attrs->nice < 0 ? "H" : "");
1946 else
1947 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1948
1949 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1950 "kworker/%s", id_buf);
1951 if (IS_ERR(worker->task))
1952 goto fail;
1953
1954 set_user_nice(worker->task, pool->attrs->nice);
1955 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1956
1957
1958 worker_attach_to_pool(worker, pool);
1959
1960
1961 raw_spin_lock_irq(&pool->lock);
1962 worker->pool->nr_workers++;
1963 worker_enter_idle(worker);
1964 wake_up_process(worker->task);
1965 raw_spin_unlock_irq(&pool->lock);
1966
1967 return worker;
1968
1969 fail:
1970 ida_free(&pool->worker_ida, id);
1971 kfree(worker);
1972 return NULL;
1973 }
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 static void destroy_worker(struct worker *worker)
1986 {
1987 struct worker_pool *pool = worker->pool;
1988
1989 lockdep_assert_held(&pool->lock);
1990
1991
1992 if (WARN_ON(worker->current_work) ||
1993 WARN_ON(!list_empty(&worker->scheduled)) ||
1994 WARN_ON(!(worker->flags & WORKER_IDLE)))
1995 return;
1996
1997 pool->nr_workers--;
1998 pool->nr_idle--;
1999
2000 list_del_init(&worker->entry);
2001 worker->flags |= WORKER_DIE;
2002 wake_up_process(worker->task);
2003 }
2004
2005 static void idle_worker_timeout(struct timer_list *t)
2006 {
2007 struct worker_pool *pool = from_timer(pool, t, idle_timer);
2008
2009 raw_spin_lock_irq(&pool->lock);
2010
2011 while (too_many_workers(pool)) {
2012 struct worker *worker;
2013 unsigned long expires;
2014
2015
2016 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2017 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2018
2019 if (time_before(jiffies, expires)) {
2020 mod_timer(&pool->idle_timer, expires);
2021 break;
2022 }
2023
2024 destroy_worker(worker);
2025 }
2026
2027 raw_spin_unlock_irq(&pool->lock);
2028 }
2029
2030 static void send_mayday(struct work_struct *work)
2031 {
2032 struct pool_workqueue *pwq = get_work_pwq(work);
2033 struct workqueue_struct *wq = pwq->wq;
2034
2035 lockdep_assert_held(&wq_mayday_lock);
2036
2037 if (!wq->rescuer)
2038 return;
2039
2040
2041 if (list_empty(&pwq->mayday_node)) {
2042
2043
2044
2045
2046
2047 get_pwq(pwq);
2048 list_add_tail(&pwq->mayday_node, &wq->maydays);
2049 wake_up_process(wq->rescuer->task);
2050 }
2051 }
2052
2053 static void pool_mayday_timeout(struct timer_list *t)
2054 {
2055 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2056 struct work_struct *work;
2057
2058 raw_spin_lock_irq(&pool->lock);
2059 raw_spin_lock(&wq_mayday_lock);
2060
2061 if (need_to_create_worker(pool)) {
2062
2063
2064
2065
2066
2067
2068 list_for_each_entry(work, &pool->worklist, entry)
2069 send_mayday(work);
2070 }
2071
2072 raw_spin_unlock(&wq_mayday_lock);
2073 raw_spin_unlock_irq(&pool->lock);
2074
2075 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2076 }
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096 static void maybe_create_worker(struct worker_pool *pool)
2097 __releases(&pool->lock)
2098 __acquires(&pool->lock)
2099 {
2100 restart:
2101 raw_spin_unlock_irq(&pool->lock);
2102
2103
2104 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2105
2106 while (true) {
2107 if (create_worker(pool) || !need_to_create_worker(pool))
2108 break;
2109
2110 schedule_timeout_interruptible(CREATE_COOLDOWN);
2111
2112 if (!need_to_create_worker(pool))
2113 break;
2114 }
2115
2116 del_timer_sync(&pool->mayday_timer);
2117 raw_spin_lock_irq(&pool->lock);
2118
2119
2120
2121
2122
2123 if (need_to_create_worker(pool))
2124 goto restart;
2125 }
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149 static bool manage_workers(struct worker *worker)
2150 {
2151 struct worker_pool *pool = worker->pool;
2152
2153 if (pool->flags & POOL_MANAGER_ACTIVE)
2154 return false;
2155
2156 pool->flags |= POOL_MANAGER_ACTIVE;
2157 pool->manager = worker;
2158
2159 maybe_create_worker(pool);
2160
2161 pool->manager = NULL;
2162 pool->flags &= ~POOL_MANAGER_ACTIVE;
2163 rcuwait_wake_up(&manager_wait);
2164 return true;
2165 }
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181 static void process_one_work(struct worker *worker, struct work_struct *work)
2182 __releases(&pool->lock)
2183 __acquires(&pool->lock)
2184 {
2185 struct pool_workqueue *pwq = get_work_pwq(work);
2186 struct worker_pool *pool = worker->pool;
2187 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2188 unsigned long work_data;
2189 struct worker *collision;
2190 #ifdef CONFIG_LOCKDEP
2191
2192
2193
2194
2195
2196
2197
2198 struct lockdep_map lockdep_map;
2199
2200 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2201 #endif
2202
2203 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2204 raw_smp_processor_id() != pool->cpu);
2205
2206
2207
2208
2209
2210
2211
2212 collision = find_worker_executing_work(pool, work);
2213 if (unlikely(collision)) {
2214 move_linked_works(work, &collision->scheduled, NULL);
2215 return;
2216 }
2217
2218
2219 debug_work_deactivate(work);
2220 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2221 worker->current_work = work;
2222 worker->current_func = work->func;
2223 worker->current_pwq = pwq;
2224 work_data = *work_data_bits(work);
2225 worker->current_color = get_work_color(work_data);
2226
2227
2228
2229
2230
2231 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2232
2233 list_del_init(&work->entry);
2234
2235
2236
2237
2238
2239
2240
2241 if (unlikely(cpu_intensive))
2242 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2243
2244
2245
2246
2247
2248
2249
2250
2251 if (need_more_worker(pool))
2252 wake_up_worker(pool);
2253
2254
2255
2256
2257
2258
2259
2260 set_work_pool_and_clear_pending(work, pool->id);
2261
2262 raw_spin_unlock_irq(&pool->lock);
2263
2264 lock_map_acquire(&pwq->wq->lockdep_map);
2265 lock_map_acquire(&lockdep_map);
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287 lockdep_invariant_state(true);
2288 trace_workqueue_execute_start(work);
2289 worker->current_func(work);
2290
2291
2292
2293
2294 trace_workqueue_execute_end(work, worker->current_func);
2295 lock_map_release(&lockdep_map);
2296 lock_map_release(&pwq->wq->lockdep_map);
2297
2298 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2299 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2300 " last function: %ps\n",
2301 current->comm, preempt_count(), task_pid_nr(current),
2302 worker->current_func);
2303 debug_show_held_locks(current);
2304 dump_stack();
2305 }
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315 cond_resched();
2316
2317 raw_spin_lock_irq(&pool->lock);
2318
2319
2320 if (unlikely(cpu_intensive))
2321 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2322
2323
2324 worker->last_func = worker->current_func;
2325
2326
2327 hash_del(&worker->hentry);
2328 worker->current_work = NULL;
2329 worker->current_func = NULL;
2330 worker->current_pwq = NULL;
2331 worker->current_color = INT_MAX;
2332 pwq_dec_nr_in_flight(pwq, work_data);
2333 }
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347 static void process_scheduled_works(struct worker *worker)
2348 {
2349 while (!list_empty(&worker->scheduled)) {
2350 struct work_struct *work = list_first_entry(&worker->scheduled,
2351 struct work_struct, entry);
2352 process_one_work(worker, work);
2353 }
2354 }
2355
2356 static void set_pf_worker(bool val)
2357 {
2358 mutex_lock(&wq_pool_attach_mutex);
2359 if (val)
2360 current->flags |= PF_WQ_WORKER;
2361 else
2362 current->flags &= ~PF_WQ_WORKER;
2363 mutex_unlock(&wq_pool_attach_mutex);
2364 }
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378 static int worker_thread(void *__worker)
2379 {
2380 struct worker *worker = __worker;
2381 struct worker_pool *pool = worker->pool;
2382
2383
2384 set_pf_worker(true);
2385 woke_up:
2386 raw_spin_lock_irq(&pool->lock);
2387
2388
2389 if (unlikely(worker->flags & WORKER_DIE)) {
2390 raw_spin_unlock_irq(&pool->lock);
2391 WARN_ON_ONCE(!list_empty(&worker->entry));
2392 set_pf_worker(false);
2393
2394 set_task_comm(worker->task, "kworker/dying");
2395 ida_free(&pool->worker_ida, worker->id);
2396 worker_detach_from_pool(worker);
2397 kfree(worker);
2398 return 0;
2399 }
2400
2401 worker_leave_idle(worker);
2402 recheck:
2403
2404 if (!need_more_worker(pool))
2405 goto sleep;
2406
2407
2408 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2409 goto recheck;
2410
2411
2412
2413
2414
2415
2416 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2417
2418
2419
2420
2421
2422
2423
2424
2425 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2426
2427 do {
2428 struct work_struct *work =
2429 list_first_entry(&pool->worklist,
2430 struct work_struct, entry);
2431
2432 pool->watchdog_ts = jiffies;
2433
2434 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2435
2436 process_one_work(worker, work);
2437 if (unlikely(!list_empty(&worker->scheduled)))
2438 process_scheduled_works(worker);
2439 } else {
2440 move_linked_works(work, &worker->scheduled, NULL);
2441 process_scheduled_works(worker);
2442 }
2443 } while (keep_working(pool));
2444
2445 worker_set_flags(worker, WORKER_PREP);
2446 sleep:
2447
2448
2449
2450
2451
2452
2453
2454 worker_enter_idle(worker);
2455 __set_current_state(TASK_IDLE);
2456 raw_spin_unlock_irq(&pool->lock);
2457 schedule();
2458 goto woke_up;
2459 }
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482 static int rescuer_thread(void *__rescuer)
2483 {
2484 struct worker *rescuer = __rescuer;
2485 struct workqueue_struct *wq = rescuer->rescue_wq;
2486 struct list_head *scheduled = &rescuer->scheduled;
2487 bool should_stop;
2488
2489 set_user_nice(current, RESCUER_NICE_LEVEL);
2490
2491
2492
2493
2494
2495 set_pf_worker(true);
2496 repeat:
2497 set_current_state(TASK_IDLE);
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507 should_stop = kthread_should_stop();
2508
2509
2510 raw_spin_lock_irq(&wq_mayday_lock);
2511
2512 while (!list_empty(&wq->maydays)) {
2513 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2514 struct pool_workqueue, mayday_node);
2515 struct worker_pool *pool = pwq->pool;
2516 struct work_struct *work, *n;
2517 bool first = true;
2518
2519 __set_current_state(TASK_RUNNING);
2520 list_del_init(&pwq->mayday_node);
2521
2522 raw_spin_unlock_irq(&wq_mayday_lock);
2523
2524 worker_attach_to_pool(rescuer, pool);
2525
2526 raw_spin_lock_irq(&pool->lock);
2527
2528
2529
2530
2531
2532 WARN_ON_ONCE(!list_empty(scheduled));
2533 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2534 if (get_work_pwq(work) == pwq) {
2535 if (first)
2536 pool->watchdog_ts = jiffies;
2537 move_linked_works(work, scheduled, &n);
2538 }
2539 first = false;
2540 }
2541
2542 if (!list_empty(scheduled)) {
2543 process_scheduled_works(rescuer);
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554 if (pwq->nr_active && need_to_create_worker(pool)) {
2555 raw_spin_lock(&wq_mayday_lock);
2556
2557
2558
2559
2560 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2561 get_pwq(pwq);
2562 list_add_tail(&pwq->mayday_node, &wq->maydays);
2563 }
2564 raw_spin_unlock(&wq_mayday_lock);
2565 }
2566 }
2567
2568
2569
2570
2571
2572 put_pwq(pwq);
2573
2574
2575
2576
2577
2578
2579 if (need_more_worker(pool))
2580 wake_up_worker(pool);
2581
2582 raw_spin_unlock_irq(&pool->lock);
2583
2584 worker_detach_from_pool(rescuer);
2585
2586 raw_spin_lock_irq(&wq_mayday_lock);
2587 }
2588
2589 raw_spin_unlock_irq(&wq_mayday_lock);
2590
2591 if (should_stop) {
2592 __set_current_state(TASK_RUNNING);
2593 set_pf_worker(false);
2594 return 0;
2595 }
2596
2597
2598 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2599 schedule();
2600 goto repeat;
2601 }
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614 static void check_flush_dependency(struct workqueue_struct *target_wq,
2615 struct work_struct *target_work)
2616 {
2617 work_func_t target_func = target_work ? target_work->func : NULL;
2618 struct worker *worker;
2619
2620 if (target_wq->flags & WQ_MEM_RECLAIM)
2621 return;
2622
2623 worker = current_wq_worker();
2624
2625 WARN_ONCE(current->flags & PF_MEMALLOC,
2626 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2627 current->pid, current->comm, target_wq->name, target_func);
2628 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2629 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2630 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2631 worker->current_pwq->wq->name, worker->current_func,
2632 target_wq->name, target_func);
2633 }
2634
2635 struct wq_barrier {
2636 struct work_struct work;
2637 struct completion done;
2638 struct task_struct *task;
2639 };
2640
2641 static void wq_barrier_func(struct work_struct *work)
2642 {
2643 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2644 complete(&barr->done);
2645 }
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671 static void insert_wq_barrier(struct pool_workqueue *pwq,
2672 struct wq_barrier *barr,
2673 struct work_struct *target, struct worker *worker)
2674 {
2675 unsigned int work_flags = 0;
2676 unsigned int work_color;
2677 struct list_head *head;
2678
2679
2680
2681
2682
2683
2684
2685 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2686 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2687
2688 init_completion_map(&barr->done, &target->lockdep_map);
2689
2690 barr->task = current;
2691
2692
2693 work_flags |= WORK_STRUCT_INACTIVE;
2694
2695
2696
2697
2698
2699 if (worker) {
2700 head = worker->scheduled.next;
2701 work_color = worker->current_color;
2702 } else {
2703 unsigned long *bits = work_data_bits(target);
2704
2705 head = target->entry.next;
2706
2707 work_flags |= *bits & WORK_STRUCT_LINKED;
2708 work_color = get_work_color(*bits);
2709 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2710 }
2711
2712 pwq->nr_in_flight[work_color]++;
2713 work_flags |= work_color_to_flags(work_color);
2714
2715 debug_work_activate(&barr->work);
2716 insert_work(pwq, &barr->work, head, work_flags);
2717 }
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750 static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2751 int flush_color, int work_color)
2752 {
2753 bool wait = false;
2754 struct pool_workqueue *pwq;
2755
2756 if (flush_color >= 0) {
2757 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2758 atomic_set(&wq->nr_pwqs_to_flush, 1);
2759 }
2760
2761 for_each_pwq(pwq, wq) {
2762 struct worker_pool *pool = pwq->pool;
2763
2764 raw_spin_lock_irq(&pool->lock);
2765
2766 if (flush_color >= 0) {
2767 WARN_ON_ONCE(pwq->flush_color != -1);
2768
2769 if (pwq->nr_in_flight[flush_color]) {
2770 pwq->flush_color = flush_color;
2771 atomic_inc(&wq->nr_pwqs_to_flush);
2772 wait = true;
2773 }
2774 }
2775
2776 if (work_color >= 0) {
2777 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2778 pwq->work_color = work_color;
2779 }
2780
2781 raw_spin_unlock_irq(&pool->lock);
2782 }
2783
2784 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2785 complete(&wq->first_flusher->done);
2786
2787 return wait;
2788 }
2789
2790
2791
2792
2793
2794
2795
2796
2797 void __flush_workqueue(struct workqueue_struct *wq)
2798 {
2799 struct wq_flusher this_flusher = {
2800 .list = LIST_HEAD_INIT(this_flusher.list),
2801 .flush_color = -1,
2802 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2803 };
2804 int next_color;
2805
2806 if (WARN_ON(!wq_online))
2807 return;
2808
2809 lock_map_acquire(&wq->lockdep_map);
2810 lock_map_release(&wq->lockdep_map);
2811
2812 mutex_lock(&wq->mutex);
2813
2814
2815
2816
2817 next_color = work_next_color(wq->work_color);
2818
2819 if (next_color != wq->flush_color) {
2820
2821
2822
2823
2824
2825 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2826 this_flusher.flush_color = wq->work_color;
2827 wq->work_color = next_color;
2828
2829 if (!wq->first_flusher) {
2830
2831 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2832
2833 wq->first_flusher = &this_flusher;
2834
2835 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2836 wq->work_color)) {
2837
2838 wq->flush_color = next_color;
2839 wq->first_flusher = NULL;
2840 goto out_unlock;
2841 }
2842 } else {
2843
2844 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2845 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2846 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2847 }
2848 } else {
2849
2850
2851
2852
2853
2854 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2855 }
2856
2857 check_flush_dependency(wq, NULL);
2858
2859 mutex_unlock(&wq->mutex);
2860
2861 wait_for_completion(&this_flusher.done);
2862
2863
2864
2865
2866
2867
2868
2869 if (READ_ONCE(wq->first_flusher) != &this_flusher)
2870 return;
2871
2872 mutex_lock(&wq->mutex);
2873
2874
2875 if (wq->first_flusher != &this_flusher)
2876 goto out_unlock;
2877
2878 WRITE_ONCE(wq->first_flusher, NULL);
2879
2880 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2881 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2882
2883 while (true) {
2884 struct wq_flusher *next, *tmp;
2885
2886
2887 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2888 if (next->flush_color != wq->flush_color)
2889 break;
2890 list_del_init(&next->list);
2891 complete(&next->done);
2892 }
2893
2894 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2895 wq->flush_color != work_next_color(wq->work_color));
2896
2897
2898 wq->flush_color = work_next_color(wq->flush_color);
2899
2900
2901 if (!list_empty(&wq->flusher_overflow)) {
2902
2903
2904
2905
2906
2907
2908 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2909 tmp->flush_color = wq->work_color;
2910
2911 wq->work_color = work_next_color(wq->work_color);
2912
2913 list_splice_tail_init(&wq->flusher_overflow,
2914 &wq->flusher_queue);
2915 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2916 }
2917
2918 if (list_empty(&wq->flusher_queue)) {
2919 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2920 break;
2921 }
2922
2923
2924
2925
2926
2927 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2928 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2929
2930 list_del_init(&next->list);
2931 wq->first_flusher = next;
2932
2933 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2934 break;
2935
2936
2937
2938
2939
2940 wq->first_flusher = NULL;
2941 }
2942
2943 out_unlock:
2944 mutex_unlock(&wq->mutex);
2945 }
2946 EXPORT_SYMBOL(__flush_workqueue);
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959 void drain_workqueue(struct workqueue_struct *wq)
2960 {
2961 unsigned int flush_cnt = 0;
2962 struct pool_workqueue *pwq;
2963
2964
2965
2966
2967
2968
2969 mutex_lock(&wq->mutex);
2970 if (!wq->nr_drainers++)
2971 wq->flags |= __WQ_DRAINING;
2972 mutex_unlock(&wq->mutex);
2973 reflush:
2974 __flush_workqueue(wq);
2975
2976 mutex_lock(&wq->mutex);
2977
2978 for_each_pwq(pwq, wq) {
2979 bool drained;
2980
2981 raw_spin_lock_irq(&pwq->pool->lock);
2982 drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
2983 raw_spin_unlock_irq(&pwq->pool->lock);
2984
2985 if (drained)
2986 continue;
2987
2988 if (++flush_cnt == 10 ||
2989 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2990 pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
2991 wq->name, __func__, flush_cnt);
2992
2993 mutex_unlock(&wq->mutex);
2994 goto reflush;
2995 }
2996
2997 if (!--wq->nr_drainers)
2998 wq->flags &= ~__WQ_DRAINING;
2999 mutex_unlock(&wq->mutex);
3000 }
3001 EXPORT_SYMBOL_GPL(drain_workqueue);
3002
3003 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
3004 bool from_cancel)
3005 {
3006 struct worker *worker = NULL;
3007 struct worker_pool *pool;
3008 struct pool_workqueue *pwq;
3009
3010 might_sleep();
3011
3012 rcu_read_lock();
3013 pool = get_work_pool(work);
3014 if (!pool) {
3015 rcu_read_unlock();
3016 return false;
3017 }
3018
3019 raw_spin_lock_irq(&pool->lock);
3020
3021 pwq = get_work_pwq(work);
3022 if (pwq) {
3023 if (unlikely(pwq->pool != pool))
3024 goto already_gone;
3025 } else {
3026 worker = find_worker_executing_work(pool, work);
3027 if (!worker)
3028 goto already_gone;
3029 pwq = worker->current_pwq;
3030 }
3031
3032 check_flush_dependency(pwq->wq, work);
3033
3034 insert_wq_barrier(pwq, barr, work, worker);
3035 raw_spin_unlock_irq(&pool->lock);
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046 if (!from_cancel &&
3047 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3048 lock_map_acquire(&pwq->wq->lockdep_map);
3049 lock_map_release(&pwq->wq->lockdep_map);
3050 }
3051 rcu_read_unlock();
3052 return true;
3053 already_gone:
3054 raw_spin_unlock_irq(&pool->lock);
3055 rcu_read_unlock();
3056 return false;
3057 }
3058
3059 static bool __flush_work(struct work_struct *work, bool from_cancel)
3060 {
3061 struct wq_barrier barr;
3062
3063 if (WARN_ON(!wq_online))
3064 return false;
3065
3066 if (WARN_ON(!work->func))
3067 return false;
3068
3069 lock_map_acquire(&work->lockdep_map);
3070 lock_map_release(&work->lockdep_map);
3071
3072 if (start_flush_work(work, &barr, from_cancel)) {
3073 wait_for_completion(&barr.done);
3074 destroy_work_on_stack(&barr.work);
3075 return true;
3076 } else {
3077 return false;
3078 }
3079 }
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092 bool flush_work(struct work_struct *work)
3093 {
3094 return __flush_work(work, false);
3095 }
3096 EXPORT_SYMBOL_GPL(flush_work);
3097
3098 struct cwt_wait {
3099 wait_queue_entry_t wait;
3100 struct work_struct *work;
3101 };
3102
3103 static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3104 {
3105 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3106
3107 if (cwait->work != key)
3108 return 0;
3109 return autoremove_wake_function(wait, mode, sync, key);
3110 }
3111
3112 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3113 {
3114 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3115 unsigned long flags;
3116 int ret;
3117
3118 do {
3119 ret = try_to_grab_pending(work, is_dwork, &flags);
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136 if (unlikely(ret == -ENOENT)) {
3137 struct cwt_wait cwait;
3138
3139 init_wait(&cwait.wait);
3140 cwait.wait.func = cwt_wakefn;
3141 cwait.work = work;
3142
3143 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3144 TASK_UNINTERRUPTIBLE);
3145 if (work_is_canceling(work))
3146 schedule();
3147 finish_wait(&cancel_waitq, &cwait.wait);
3148 }
3149 } while (unlikely(ret < 0));
3150
3151
3152 mark_work_canceling(work);
3153 local_irq_restore(flags);
3154
3155
3156
3157
3158
3159 if (wq_online)
3160 __flush_work(work, true);
3161
3162 clear_work_data(work);
3163
3164
3165
3166
3167
3168
3169 smp_mb();
3170 if (waitqueue_active(&cancel_waitq))
3171 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3172
3173 return ret;
3174 }
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194 bool cancel_work_sync(struct work_struct *work)
3195 {
3196 return __cancel_work_timer(work, false);
3197 }
3198 EXPORT_SYMBOL_GPL(cancel_work_sync);
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212 bool flush_delayed_work(struct delayed_work *dwork)
3213 {
3214 local_irq_disable();
3215 if (del_timer_sync(&dwork->timer))
3216 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3217 local_irq_enable();
3218 return flush_work(&dwork->work);
3219 }
3220 EXPORT_SYMBOL(flush_delayed_work);
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230 bool flush_rcu_work(struct rcu_work *rwork)
3231 {
3232 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3233 rcu_barrier();
3234 flush_work(&rwork->work);
3235 return true;
3236 } else {
3237 return flush_work(&rwork->work);
3238 }
3239 }
3240 EXPORT_SYMBOL(flush_rcu_work);
3241
3242 static bool __cancel_work(struct work_struct *work, bool is_dwork)
3243 {
3244 unsigned long flags;
3245 int ret;
3246
3247 do {
3248 ret = try_to_grab_pending(work, is_dwork, &flags);
3249 } while (unlikely(ret == -EAGAIN));
3250
3251 if (unlikely(ret < 0))
3252 return false;
3253
3254 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3255 local_irq_restore(flags);
3256 return ret;
3257 }
3258
3259
3260
3261
3262 bool cancel_work(struct work_struct *work)
3263 {
3264 return __cancel_work(work, false);
3265 }
3266 EXPORT_SYMBOL(cancel_work);
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284 bool cancel_delayed_work(struct delayed_work *dwork)
3285 {
3286 return __cancel_work(&dwork->work, true);
3287 }
3288 EXPORT_SYMBOL(cancel_delayed_work);
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299 bool cancel_delayed_work_sync(struct delayed_work *dwork)
3300 {
3301 return __cancel_work_timer(&dwork->work, true);
3302 }
3303 EXPORT_SYMBOL(cancel_delayed_work_sync);
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316 int schedule_on_each_cpu(work_func_t func)
3317 {
3318 int cpu;
3319 struct work_struct __percpu *works;
3320
3321 works = alloc_percpu(struct work_struct);
3322 if (!works)
3323 return -ENOMEM;
3324
3325 cpus_read_lock();
3326
3327 for_each_online_cpu(cpu) {
3328 struct work_struct *work = per_cpu_ptr(works, cpu);
3329
3330 INIT_WORK(work, func);
3331 schedule_work_on(cpu, work);
3332 }
3333
3334 for_each_online_cpu(cpu)
3335 flush_work(per_cpu_ptr(works, cpu));
3336
3337 cpus_read_unlock();
3338 free_percpu(works);
3339 return 0;
3340 }
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3355 {
3356 if (!in_interrupt()) {
3357 fn(&ew->work);
3358 return 0;
3359 }
3360
3361 INIT_WORK(&ew->work, fn);
3362 schedule_work(&ew->work);
3363
3364 return 1;
3365 }
3366 EXPORT_SYMBOL_GPL(execute_in_process_context);
3367
3368
3369
3370
3371
3372
3373
3374 void free_workqueue_attrs(struct workqueue_attrs *attrs)
3375 {
3376 if (attrs) {
3377 free_cpumask_var(attrs->cpumask);
3378 kfree(attrs);
3379 }
3380 }
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390 struct workqueue_attrs *alloc_workqueue_attrs(void)
3391 {
3392 struct workqueue_attrs *attrs;
3393
3394 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3395 if (!attrs)
3396 goto fail;
3397 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3398 goto fail;
3399
3400 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3401 return attrs;
3402 fail:
3403 free_workqueue_attrs(attrs);
3404 return NULL;
3405 }
3406
3407 static void copy_workqueue_attrs(struct workqueue_attrs *to,
3408 const struct workqueue_attrs *from)
3409 {
3410 to->nice = from->nice;
3411 cpumask_copy(to->cpumask, from->cpumask);
3412
3413
3414
3415
3416
3417 to->no_numa = from->no_numa;
3418 }
3419
3420
3421 static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3422 {
3423 u32 hash = 0;
3424
3425 hash = jhash_1word(attrs->nice, hash);
3426 hash = jhash(cpumask_bits(attrs->cpumask),
3427 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3428 return hash;
3429 }
3430
3431
3432 static bool wqattrs_equal(const struct workqueue_attrs *a,
3433 const struct workqueue_attrs *b)
3434 {
3435 if (a->nice != b->nice)
3436 return false;
3437 if (!cpumask_equal(a->cpumask, b->cpumask))
3438 return false;
3439 return true;
3440 }
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452 static int init_worker_pool(struct worker_pool *pool)
3453 {
3454 raw_spin_lock_init(&pool->lock);
3455 pool->id = -1;
3456 pool->cpu = -1;
3457 pool->node = NUMA_NO_NODE;
3458 pool->flags |= POOL_DISASSOCIATED;
3459 pool->watchdog_ts = jiffies;
3460 INIT_LIST_HEAD(&pool->worklist);
3461 INIT_LIST_HEAD(&pool->idle_list);
3462 hash_init(pool->busy_hash);
3463
3464 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3465
3466 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3467
3468 INIT_LIST_HEAD(&pool->workers);
3469
3470 ida_init(&pool->worker_ida);
3471 INIT_HLIST_NODE(&pool->hash_node);
3472 pool->refcnt = 1;
3473
3474
3475 pool->attrs = alloc_workqueue_attrs();
3476 if (!pool->attrs)
3477 return -ENOMEM;
3478 return 0;
3479 }
3480
3481 #ifdef CONFIG_LOCKDEP
3482 static void wq_init_lockdep(struct workqueue_struct *wq)
3483 {
3484 char *lock_name;
3485
3486 lockdep_register_key(&wq->key);
3487 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3488 if (!lock_name)
3489 lock_name = wq->name;
3490
3491 wq->lock_name = lock_name;
3492 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3493 }
3494
3495 static void wq_unregister_lockdep(struct workqueue_struct *wq)
3496 {
3497 lockdep_unregister_key(&wq->key);
3498 }
3499
3500 static void wq_free_lockdep(struct workqueue_struct *wq)
3501 {
3502 if (wq->lock_name != wq->name)
3503 kfree(wq->lock_name);
3504 }
3505 #else
3506 static void wq_init_lockdep(struct workqueue_struct *wq)
3507 {
3508 }
3509
3510 static void wq_unregister_lockdep(struct workqueue_struct *wq)
3511 {
3512 }
3513
3514 static void wq_free_lockdep(struct workqueue_struct *wq)
3515 {
3516 }
3517 #endif
3518
3519 static void rcu_free_wq(struct rcu_head *rcu)
3520 {
3521 struct workqueue_struct *wq =
3522 container_of(rcu, struct workqueue_struct, rcu);
3523
3524 wq_free_lockdep(wq);
3525
3526 if (!(wq->flags & WQ_UNBOUND))
3527 free_percpu(wq->cpu_pwqs);
3528 else
3529 free_workqueue_attrs(wq->unbound_attrs);
3530
3531 kfree(wq);
3532 }
3533
3534 static void rcu_free_pool(struct rcu_head *rcu)
3535 {
3536 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3537
3538 ida_destroy(&pool->worker_ida);
3539 free_workqueue_attrs(pool->attrs);
3540 kfree(pool);
3541 }
3542
3543
3544 static bool wq_manager_inactive(struct worker_pool *pool)
3545 {
3546 raw_spin_lock_irq(&pool->lock);
3547
3548 if (pool->flags & POOL_MANAGER_ACTIVE) {
3549 raw_spin_unlock_irq(&pool->lock);
3550 return false;
3551 }
3552 return true;
3553 }
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566 static void put_unbound_pool(struct worker_pool *pool)
3567 {
3568 DECLARE_COMPLETION_ONSTACK(detach_completion);
3569 struct worker *worker;
3570
3571 lockdep_assert_held(&wq_pool_mutex);
3572
3573 if (--pool->refcnt)
3574 return;
3575
3576
3577 if (WARN_ON(!(pool->cpu < 0)) ||
3578 WARN_ON(!list_empty(&pool->worklist)))
3579 return;
3580
3581
3582 if (pool->id >= 0)
3583 idr_remove(&worker_pool_idr, pool->id);
3584 hash_del(&pool->hash_node);
3585
3586
3587
3588
3589
3590
3591
3592
3593 rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3594 TASK_UNINTERRUPTIBLE);
3595 pool->flags |= POOL_MANAGER_ACTIVE;
3596
3597 while ((worker = first_idle_worker(pool)))
3598 destroy_worker(worker);
3599 WARN_ON(pool->nr_workers || pool->nr_idle);
3600 raw_spin_unlock_irq(&pool->lock);
3601
3602 mutex_lock(&wq_pool_attach_mutex);
3603 if (!list_empty(&pool->workers))
3604 pool->detach_completion = &detach_completion;
3605 mutex_unlock(&wq_pool_attach_mutex);
3606
3607 if (pool->detach_completion)
3608 wait_for_completion(pool->detach_completion);
3609
3610
3611 del_timer_sync(&pool->idle_timer);
3612 del_timer_sync(&pool->mayday_timer);
3613
3614
3615 call_rcu(&pool->rcu, rcu_free_pool);
3616 }
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632 static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3633 {
3634 u32 hash = wqattrs_hash(attrs);
3635 struct worker_pool *pool;
3636 int node;
3637 int target_node = NUMA_NO_NODE;
3638
3639 lockdep_assert_held(&wq_pool_mutex);
3640
3641
3642 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3643 if (wqattrs_equal(pool->attrs, attrs)) {
3644 pool->refcnt++;
3645 return pool;
3646 }
3647 }
3648
3649
3650 if (wq_numa_enabled) {
3651 for_each_node(node) {
3652 if (cpumask_subset(attrs->cpumask,
3653 wq_numa_possible_cpumask[node])) {
3654 target_node = node;
3655 break;
3656 }
3657 }
3658 }
3659
3660
3661 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3662 if (!pool || init_worker_pool(pool) < 0)
3663 goto fail;
3664
3665 lockdep_set_subclass(&pool->lock, 1);
3666 copy_workqueue_attrs(pool->attrs, attrs);
3667 pool->node = target_node;
3668
3669
3670
3671
3672
3673 pool->attrs->no_numa = false;
3674
3675 if (worker_pool_assign_id(pool) < 0)
3676 goto fail;
3677
3678
3679 if (wq_online && !create_worker(pool))
3680 goto fail;
3681
3682
3683 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3684
3685 return pool;
3686 fail:
3687 if (pool)
3688 put_unbound_pool(pool);
3689 return NULL;
3690 }
3691
3692 static void rcu_free_pwq(struct rcu_head *rcu)
3693 {
3694 kmem_cache_free(pwq_cache,
3695 container_of(rcu, struct pool_workqueue, rcu));
3696 }
3697
3698
3699
3700
3701
3702 static void pwq_unbound_release_workfn(struct work_struct *work)
3703 {
3704 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3705 unbound_release_work);
3706 struct workqueue_struct *wq = pwq->wq;
3707 struct worker_pool *pool = pwq->pool;
3708 bool is_last = false;
3709
3710
3711
3712
3713
3714 if (!list_empty(&pwq->pwqs_node)) {
3715 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3716 return;
3717
3718 mutex_lock(&wq->mutex);
3719 list_del_rcu(&pwq->pwqs_node);
3720 is_last = list_empty(&wq->pwqs);
3721 mutex_unlock(&wq->mutex);
3722 }
3723
3724 mutex_lock(&wq_pool_mutex);
3725 put_unbound_pool(pool);
3726 mutex_unlock(&wq_pool_mutex);
3727
3728 call_rcu(&pwq->rcu, rcu_free_pwq);
3729
3730
3731
3732
3733
3734 if (is_last) {
3735 wq_unregister_lockdep(wq);
3736 call_rcu(&wq->rcu, rcu_free_wq);
3737 }
3738 }
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748 static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3749 {
3750 struct workqueue_struct *wq = pwq->wq;
3751 bool freezable = wq->flags & WQ_FREEZABLE;
3752 unsigned long flags;
3753
3754
3755 lockdep_assert_held(&wq->mutex);
3756
3757
3758 if (!freezable && pwq->max_active == wq->saved_max_active)
3759 return;
3760
3761
3762 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
3763
3764
3765
3766
3767
3768
3769 if (!freezable || !workqueue_freezing) {
3770 bool kick = false;
3771
3772 pwq->max_active = wq->saved_max_active;
3773
3774 while (!list_empty(&pwq->inactive_works) &&
3775 pwq->nr_active < pwq->max_active) {
3776 pwq_activate_first_inactive(pwq);
3777 kick = true;
3778 }
3779
3780
3781
3782
3783
3784
3785
3786 if (kick)
3787 wake_up_worker(pwq->pool);
3788 } else {
3789 pwq->max_active = 0;
3790 }
3791
3792 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
3793 }
3794
3795
3796 static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3797 struct worker_pool *pool)
3798 {
3799 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3800
3801 memset(pwq, 0, sizeof(*pwq));
3802
3803 pwq->pool = pool;
3804 pwq->wq = wq;
3805 pwq->flush_color = -1;
3806 pwq->refcnt = 1;
3807 INIT_LIST_HEAD(&pwq->inactive_works);
3808 INIT_LIST_HEAD(&pwq->pwqs_node);
3809 INIT_LIST_HEAD(&pwq->mayday_node);
3810 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3811 }
3812
3813
3814 static void link_pwq(struct pool_workqueue *pwq)
3815 {
3816 struct workqueue_struct *wq = pwq->wq;
3817
3818 lockdep_assert_held(&wq->mutex);
3819
3820
3821 if (!list_empty(&pwq->pwqs_node))
3822 return;
3823
3824
3825 pwq->work_color = wq->work_color;
3826
3827
3828 pwq_adjust_max_active(pwq);
3829
3830
3831 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3832 }
3833
3834
3835 static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3836 const struct workqueue_attrs *attrs)
3837 {
3838 struct worker_pool *pool;
3839 struct pool_workqueue *pwq;
3840
3841 lockdep_assert_held(&wq_pool_mutex);
3842
3843 pool = get_unbound_pool(attrs);
3844 if (!pool)
3845 return NULL;
3846
3847 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3848 if (!pwq) {
3849 put_unbound_pool(pool);
3850 return NULL;
3851 }
3852
3853 init_pwq(pwq, wq, pool);
3854 return pwq;
3855 }
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879 static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3880 int cpu_going_down, cpumask_t *cpumask)
3881 {
3882 if (!wq_numa_enabled || attrs->no_numa)
3883 goto use_dfl;
3884
3885
3886 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3887 if (cpu_going_down >= 0)
3888 cpumask_clear_cpu(cpu_going_down, cpumask);
3889
3890 if (cpumask_empty(cpumask))
3891 goto use_dfl;
3892
3893
3894 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3895
3896 if (cpumask_empty(cpumask)) {
3897 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3898 "possible intersect\n");
3899 return false;
3900 }
3901
3902 return !cpumask_equal(cpumask, attrs->cpumask);
3903
3904 use_dfl:
3905 cpumask_copy(cpumask, attrs->cpumask);
3906 return false;
3907 }
3908
3909
3910 static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3911 int node,
3912 struct pool_workqueue *pwq)
3913 {
3914 struct pool_workqueue *old_pwq;
3915
3916 lockdep_assert_held(&wq_pool_mutex);
3917 lockdep_assert_held(&wq->mutex);
3918
3919
3920 link_pwq(pwq);
3921
3922 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3923 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3924 return old_pwq;
3925 }
3926
3927
3928 struct apply_wqattrs_ctx {
3929 struct workqueue_struct *wq;
3930 struct workqueue_attrs *attrs;
3931 struct list_head list;
3932 struct pool_workqueue *dfl_pwq;
3933 struct pool_workqueue *pwq_tbl[];
3934 };
3935
3936
3937 static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3938 {
3939 if (ctx) {
3940 int node;
3941
3942 for_each_node(node)
3943 put_pwq_unlocked(ctx->pwq_tbl[node]);
3944 put_pwq_unlocked(ctx->dfl_pwq);
3945
3946 free_workqueue_attrs(ctx->attrs);
3947
3948 kfree(ctx);
3949 }
3950 }
3951
3952
3953 static struct apply_wqattrs_ctx *
3954 apply_wqattrs_prepare(struct workqueue_struct *wq,
3955 const struct workqueue_attrs *attrs)
3956 {
3957 struct apply_wqattrs_ctx *ctx;
3958 struct workqueue_attrs *new_attrs, *tmp_attrs;
3959 int node;
3960
3961 lockdep_assert_held(&wq_pool_mutex);
3962
3963 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
3964
3965 new_attrs = alloc_workqueue_attrs();
3966 tmp_attrs = alloc_workqueue_attrs();
3967 if (!ctx || !new_attrs || !tmp_attrs)
3968 goto out_free;
3969
3970
3971
3972
3973
3974
3975 copy_workqueue_attrs(new_attrs, attrs);
3976 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3977 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3978 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3979
3980
3981
3982
3983
3984
3985 copy_workqueue_attrs(tmp_attrs, new_attrs);
3986
3987
3988
3989
3990
3991
3992 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3993 if (!ctx->dfl_pwq)
3994 goto out_free;
3995
3996 for_each_node(node) {
3997 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3998 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3999 if (!ctx->pwq_tbl[node])
4000 goto out_free;
4001 } else {
4002 ctx->dfl_pwq->refcnt++;
4003 ctx->pwq_tbl[node] = ctx->dfl_pwq;
4004 }
4005 }
4006
4007
4008 copy_workqueue_attrs(new_attrs, attrs);
4009 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
4010 ctx->attrs = new_attrs;
4011
4012 ctx->wq = wq;
4013 free_workqueue_attrs(tmp_attrs);
4014 return ctx;
4015
4016 out_free:
4017 free_workqueue_attrs(tmp_attrs);
4018 free_workqueue_attrs(new_attrs);
4019 apply_wqattrs_cleanup(ctx);
4020 return NULL;
4021 }
4022
4023
4024 static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
4025 {
4026 int node;
4027
4028
4029 mutex_lock(&ctx->wq->mutex);
4030
4031 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
4032
4033
4034 for_each_node(node)
4035 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
4036 ctx->pwq_tbl[node]);
4037
4038
4039 link_pwq(ctx->dfl_pwq);
4040 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
4041
4042 mutex_unlock(&ctx->wq->mutex);
4043 }
4044
4045 static void apply_wqattrs_lock(void)
4046 {
4047
4048 cpus_read_lock();
4049 mutex_lock(&wq_pool_mutex);
4050 }
4051
4052 static void apply_wqattrs_unlock(void)
4053 {
4054 mutex_unlock(&wq_pool_mutex);
4055 cpus_read_unlock();
4056 }
4057
4058 static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4059 const struct workqueue_attrs *attrs)
4060 {
4061 struct apply_wqattrs_ctx *ctx;
4062
4063
4064 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4065 return -EINVAL;
4066
4067
4068 if (!list_empty(&wq->pwqs)) {
4069 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4070 return -EINVAL;
4071
4072 wq->flags &= ~__WQ_ORDERED;
4073 }
4074
4075 ctx = apply_wqattrs_prepare(wq, attrs);
4076 if (!ctx)
4077 return -ENOMEM;
4078
4079
4080 apply_wqattrs_commit(ctx);
4081 apply_wqattrs_cleanup(ctx);
4082
4083 return 0;
4084 }
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104 int apply_workqueue_attrs(struct workqueue_struct *wq,
4105 const struct workqueue_attrs *attrs)
4106 {
4107 int ret;
4108
4109 lockdep_assert_cpus_held();
4110
4111 mutex_lock(&wq_pool_mutex);
4112 ret = apply_workqueue_attrs_locked(wq, attrs);
4113 mutex_unlock(&wq_pool_mutex);
4114
4115 return ret;
4116 }
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140 static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4141 bool online)
4142 {
4143 int node = cpu_to_node(cpu);
4144 int cpu_off = online ? -1 : cpu;
4145 struct pool_workqueue *old_pwq = NULL, *pwq;
4146 struct workqueue_attrs *target_attrs;
4147 cpumask_t *cpumask;
4148
4149 lockdep_assert_held(&wq_pool_mutex);
4150
4151 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4152 wq->unbound_attrs->no_numa)
4153 return;
4154
4155
4156
4157
4158
4159
4160 target_attrs = wq_update_unbound_numa_attrs_buf;
4161 cpumask = target_attrs->cpumask;
4162
4163 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4164 pwq = unbound_pwq_by_node(wq, node);
4165
4166
4167
4168
4169
4170
4171
4172 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4173 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4174 return;
4175 } else {
4176 goto use_dfl_pwq;
4177 }
4178
4179
4180 pwq = alloc_unbound_pwq(wq, target_attrs);
4181 if (!pwq) {
4182 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4183 wq->name);
4184 goto use_dfl_pwq;
4185 }
4186
4187
4188 mutex_lock(&wq->mutex);
4189 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4190 goto out_unlock;
4191
4192 use_dfl_pwq:
4193 mutex_lock(&wq->mutex);
4194 raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
4195 get_pwq(wq->dfl_pwq);
4196 raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4197 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4198 out_unlock:
4199 mutex_unlock(&wq->mutex);
4200 put_pwq_unlocked(old_pwq);
4201 }
4202
4203 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4204 {
4205 bool highpri = wq->flags & WQ_HIGHPRI;
4206 int cpu, ret;
4207
4208 if (!(wq->flags & WQ_UNBOUND)) {
4209 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4210 if (!wq->cpu_pwqs)
4211 return -ENOMEM;
4212
4213 for_each_possible_cpu(cpu) {
4214 struct pool_workqueue *pwq =
4215 per_cpu_ptr(wq->cpu_pwqs, cpu);
4216 struct worker_pool *cpu_pools =
4217 per_cpu(cpu_worker_pools, cpu);
4218
4219 init_pwq(pwq, wq, &cpu_pools[highpri]);
4220
4221 mutex_lock(&wq->mutex);
4222 link_pwq(pwq);
4223 mutex_unlock(&wq->mutex);
4224 }
4225 return 0;
4226 }
4227
4228 cpus_read_lock();
4229 if (wq->flags & __WQ_ORDERED) {
4230 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4231
4232 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4233 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4234 "ordering guarantee broken for workqueue %s\n", wq->name);
4235 } else {
4236 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4237 }
4238 cpus_read_unlock();
4239
4240 return ret;
4241 }
4242
4243 static int wq_clamp_max_active(int max_active, unsigned int flags,
4244 const char *name)
4245 {
4246 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4247
4248 if (max_active < 1 || max_active > lim)
4249 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4250 max_active, name, 1, lim);
4251
4252 return clamp_val(max_active, 1, lim);
4253 }
4254
4255
4256
4257
4258
4259 static int init_rescuer(struct workqueue_struct *wq)
4260 {
4261 struct worker *rescuer;
4262 int ret;
4263
4264 if (!(wq->flags & WQ_MEM_RECLAIM))
4265 return 0;
4266
4267 rescuer = alloc_worker(NUMA_NO_NODE);
4268 if (!rescuer)
4269 return -ENOMEM;
4270
4271 rescuer->rescue_wq = wq;
4272 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4273 if (IS_ERR(rescuer->task)) {
4274 ret = PTR_ERR(rescuer->task);
4275 kfree(rescuer);
4276 return ret;
4277 }
4278
4279 wq->rescuer = rescuer;
4280 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4281 wake_up_process(rescuer->task);
4282
4283 return 0;
4284 }
4285
4286 __printf(1, 4)
4287 struct workqueue_struct *alloc_workqueue(const char *fmt,
4288 unsigned int flags,
4289 int max_active, ...)
4290 {
4291 size_t tbl_size = 0;
4292 va_list args;
4293 struct workqueue_struct *wq;
4294 struct pool_workqueue *pwq;
4295
4296
4297
4298
4299
4300
4301
4302
4303 if ((flags & WQ_UNBOUND) && max_active == 1)
4304 flags |= __WQ_ORDERED;
4305
4306
4307 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4308 flags |= WQ_UNBOUND;
4309
4310
4311 if (flags & WQ_UNBOUND)
4312 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4313
4314 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4315 if (!wq)
4316 return NULL;
4317
4318 if (flags & WQ_UNBOUND) {
4319 wq->unbound_attrs = alloc_workqueue_attrs();
4320 if (!wq->unbound_attrs)
4321 goto err_free_wq;
4322 }
4323
4324 va_start(args, max_active);
4325 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4326 va_end(args);
4327
4328 max_active = max_active ?: WQ_DFL_ACTIVE;
4329 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4330
4331
4332 wq->flags = flags;
4333 wq->saved_max_active = max_active;
4334 mutex_init(&wq->mutex);
4335 atomic_set(&wq->nr_pwqs_to_flush, 0);
4336 INIT_LIST_HEAD(&wq->pwqs);
4337 INIT_LIST_HEAD(&wq->flusher_queue);
4338 INIT_LIST_HEAD(&wq->flusher_overflow);
4339 INIT_LIST_HEAD(&wq->maydays);
4340
4341 wq_init_lockdep(wq);
4342 INIT_LIST_HEAD(&wq->list);
4343
4344 if (alloc_and_link_pwqs(wq) < 0)
4345 goto err_unreg_lockdep;
4346
4347 if (wq_online && init_rescuer(wq) < 0)
4348 goto err_destroy;
4349
4350 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4351 goto err_destroy;
4352
4353
4354
4355
4356
4357
4358 mutex_lock(&wq_pool_mutex);
4359
4360 mutex_lock(&wq->mutex);
4361 for_each_pwq(pwq, wq)
4362 pwq_adjust_max_active(pwq);
4363 mutex_unlock(&wq->mutex);
4364
4365 list_add_tail_rcu(&wq->list, &workqueues);
4366
4367 mutex_unlock(&wq_pool_mutex);
4368
4369 return wq;
4370
4371 err_unreg_lockdep:
4372 wq_unregister_lockdep(wq);
4373 wq_free_lockdep(wq);
4374 err_free_wq:
4375 free_workqueue_attrs(wq->unbound_attrs);
4376 kfree(wq);
4377 return NULL;
4378 err_destroy:
4379 destroy_workqueue(wq);
4380 return NULL;
4381 }
4382 EXPORT_SYMBOL_GPL(alloc_workqueue);
4383
4384 static bool pwq_busy(struct pool_workqueue *pwq)
4385 {
4386 int i;
4387
4388 for (i = 0; i < WORK_NR_COLORS; i++)
4389 if (pwq->nr_in_flight[i])
4390 return true;
4391
4392 if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4393 return true;
4394 if (pwq->nr_active || !list_empty(&pwq->inactive_works))
4395 return true;
4396
4397 return false;
4398 }
4399
4400
4401
4402
4403
4404
4405
4406 void destroy_workqueue(struct workqueue_struct *wq)
4407 {
4408 struct pool_workqueue *pwq;
4409 int node;
4410
4411
4412
4413
4414
4415 workqueue_sysfs_unregister(wq);
4416
4417
4418 drain_workqueue(wq);
4419
4420
4421 if (wq->rescuer) {
4422 struct worker *rescuer = wq->rescuer;
4423
4424
4425 raw_spin_lock_irq(&wq_mayday_lock);
4426 wq->rescuer = NULL;
4427 raw_spin_unlock_irq(&wq_mayday_lock);
4428
4429
4430 kthread_stop(rescuer->task);
4431 kfree(rescuer);
4432 }
4433
4434
4435
4436
4437
4438 mutex_lock(&wq_pool_mutex);
4439 mutex_lock(&wq->mutex);
4440 for_each_pwq(pwq, wq) {
4441 raw_spin_lock_irq(&pwq->pool->lock);
4442 if (WARN_ON(pwq_busy(pwq))) {
4443 pr_warn("%s: %s has the following busy pwq\n",
4444 __func__, wq->name);
4445 show_pwq(pwq);
4446 raw_spin_unlock_irq(&pwq->pool->lock);
4447 mutex_unlock(&wq->mutex);
4448 mutex_unlock(&wq_pool_mutex);
4449 show_one_workqueue(wq);
4450 return;
4451 }
4452 raw_spin_unlock_irq(&pwq->pool->lock);
4453 }
4454 mutex_unlock(&wq->mutex);
4455
4456
4457
4458
4459
4460 list_del_rcu(&wq->list);
4461 mutex_unlock(&wq_pool_mutex);
4462
4463 if (!(wq->flags & WQ_UNBOUND)) {
4464 wq_unregister_lockdep(wq);
4465
4466
4467
4468
4469 call_rcu(&wq->rcu, rcu_free_wq);
4470 } else {
4471
4472
4473
4474
4475
4476 for_each_node(node) {
4477 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4478 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4479 put_pwq_unlocked(pwq);
4480 }
4481
4482
4483
4484
4485
4486 pwq = wq->dfl_pwq;
4487 wq->dfl_pwq = NULL;
4488 put_pwq_unlocked(pwq);
4489 }
4490 }
4491 EXPORT_SYMBOL_GPL(destroy_workqueue);
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4504 {
4505 struct pool_workqueue *pwq;
4506
4507
4508 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4509 return;
4510
4511 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4512
4513 mutex_lock(&wq->mutex);
4514
4515 wq->flags &= ~__WQ_ORDERED;
4516 wq->saved_max_active = max_active;
4517
4518 for_each_pwq(pwq, wq)
4519 pwq_adjust_max_active(pwq);
4520
4521 mutex_unlock(&wq->mutex);
4522 }
4523 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533 struct work_struct *current_work(void)
4534 {
4535 struct worker *worker = current_wq_worker();
4536
4537 return worker ? worker->current_work : NULL;
4538 }
4539 EXPORT_SYMBOL(current_work);
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549 bool current_is_workqueue_rescuer(void)
4550 {
4551 struct worker *worker = current_wq_worker();
4552
4553 return worker && worker->rescue_wq;
4554 }
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574 bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4575 {
4576 struct pool_workqueue *pwq;
4577 bool ret;
4578
4579 rcu_read_lock();
4580 preempt_disable();
4581
4582 if (cpu == WORK_CPU_UNBOUND)
4583 cpu = smp_processor_id();
4584
4585 if (!(wq->flags & WQ_UNBOUND))
4586 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4587 else
4588 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4589
4590 ret = !list_empty(&pwq->inactive_works);
4591 preempt_enable();
4592 rcu_read_unlock();
4593
4594 return ret;
4595 }
4596 EXPORT_SYMBOL_GPL(workqueue_congested);
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609 unsigned int work_busy(struct work_struct *work)
4610 {
4611 struct worker_pool *pool;
4612 unsigned long flags;
4613 unsigned int ret = 0;
4614
4615 if (work_pending(work))
4616 ret |= WORK_BUSY_PENDING;
4617
4618 rcu_read_lock();
4619 pool = get_work_pool(work);
4620 if (pool) {
4621 raw_spin_lock_irqsave(&pool->lock, flags);
4622 if (find_worker_executing_work(pool, work))
4623 ret |= WORK_BUSY_RUNNING;
4624 raw_spin_unlock_irqrestore(&pool->lock, flags);
4625 }
4626 rcu_read_unlock();
4627
4628 return ret;
4629 }
4630 EXPORT_SYMBOL_GPL(work_busy);
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642 void set_worker_desc(const char *fmt, ...)
4643 {
4644 struct worker *worker = current_wq_worker();
4645 va_list args;
4646
4647 if (worker) {
4648 va_start(args, fmt);
4649 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4650 va_end(args);
4651 }
4652 }
4653 EXPORT_SYMBOL_GPL(set_worker_desc);
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668 void print_worker_info(const char *log_lvl, struct task_struct *task)
4669 {
4670 work_func_t *fn = NULL;
4671 char name[WQ_NAME_LEN] = { };
4672 char desc[WORKER_DESC_LEN] = { };
4673 struct pool_workqueue *pwq = NULL;
4674 struct workqueue_struct *wq = NULL;
4675 struct worker *worker;
4676
4677 if (!(task->flags & PF_WQ_WORKER))
4678 return;
4679
4680
4681
4682
4683
4684 worker = kthread_probe_data(task);
4685
4686
4687
4688
4689
4690 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4691 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4692 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4693 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4694 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
4695
4696 if (fn || name[0] || desc[0]) {
4697 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4698 if (strcmp(name, desc))
4699 pr_cont(" (%s)", desc);
4700 pr_cont("\n");
4701 }
4702 }
4703
4704 static void pr_cont_pool_info(struct worker_pool *pool)
4705 {
4706 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4707 if (pool->node != NUMA_NO_NODE)
4708 pr_cont(" node=%d", pool->node);
4709 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4710 }
4711
4712 static void pr_cont_work(bool comma, struct work_struct *work)
4713 {
4714 if (work->func == wq_barrier_func) {
4715 struct wq_barrier *barr;
4716
4717 barr = container_of(work, struct wq_barrier, work);
4718
4719 pr_cont("%s BAR(%d)", comma ? "," : "",
4720 task_pid_nr(barr->task));
4721 } else {
4722 pr_cont("%s %ps", comma ? "," : "", work->func);
4723 }
4724 }
4725
4726 static void show_pwq(struct pool_workqueue *pwq)
4727 {
4728 struct worker_pool *pool = pwq->pool;
4729 struct work_struct *work;
4730 struct worker *worker;
4731 bool has_in_flight = false, has_pending = false;
4732 int bkt;
4733
4734 pr_info(" pwq %d:", pool->id);
4735 pr_cont_pool_info(pool);
4736
4737 pr_cont(" active=%d/%d refcnt=%d%s\n",
4738 pwq->nr_active, pwq->max_active, pwq->refcnt,
4739 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4740
4741 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4742 if (worker->current_pwq == pwq) {
4743 has_in_flight = true;
4744 break;
4745 }
4746 }
4747 if (has_in_flight) {
4748 bool comma = false;
4749
4750 pr_info(" in-flight:");
4751 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4752 if (worker->current_pwq != pwq)
4753 continue;
4754
4755 pr_cont("%s %d%s:%ps", comma ? "," : "",
4756 task_pid_nr(worker->task),
4757 worker->rescue_wq ? "(RESCUER)" : "",
4758 worker->current_func);
4759 list_for_each_entry(work, &worker->scheduled, entry)
4760 pr_cont_work(false, work);
4761 comma = true;
4762 }
4763 pr_cont("\n");
4764 }
4765
4766 list_for_each_entry(work, &pool->worklist, entry) {
4767 if (get_work_pwq(work) == pwq) {
4768 has_pending = true;
4769 break;
4770 }
4771 }
4772 if (has_pending) {
4773 bool comma = false;
4774
4775 pr_info(" pending:");
4776 list_for_each_entry(work, &pool->worklist, entry) {
4777 if (get_work_pwq(work) != pwq)
4778 continue;
4779
4780 pr_cont_work(comma, work);
4781 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4782 }
4783 pr_cont("\n");
4784 }
4785
4786 if (!list_empty(&pwq->inactive_works)) {
4787 bool comma = false;
4788
4789 pr_info(" inactive:");
4790 list_for_each_entry(work, &pwq->inactive_works, entry) {
4791 pr_cont_work(comma, work);
4792 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4793 }
4794 pr_cont("\n");
4795 }
4796 }
4797
4798
4799
4800
4801
4802 void show_one_workqueue(struct workqueue_struct *wq)
4803 {
4804 struct pool_workqueue *pwq;
4805 bool idle = true;
4806 unsigned long flags;
4807
4808 for_each_pwq(pwq, wq) {
4809 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4810 idle = false;
4811 break;
4812 }
4813 }
4814 if (idle)
4815 return;
4816
4817 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4818
4819 for_each_pwq(pwq, wq) {
4820 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
4821 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4822
4823
4824
4825
4826
4827 printk_deferred_enter();
4828 show_pwq(pwq);
4829 printk_deferred_exit();
4830 }
4831 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
4832
4833
4834
4835
4836
4837 touch_nmi_watchdog();
4838 }
4839
4840 }
4841
4842
4843
4844
4845
4846 static void show_one_worker_pool(struct worker_pool *pool)
4847 {
4848 struct worker *worker;
4849 bool first = true;
4850 unsigned long flags;
4851
4852 raw_spin_lock_irqsave(&pool->lock, flags);
4853 if (pool->nr_workers == pool->nr_idle)
4854 goto next_pool;
4855
4856
4857
4858
4859
4860 printk_deferred_enter();
4861 pr_info("pool %d:", pool->id);
4862 pr_cont_pool_info(pool);
4863 pr_cont(" hung=%us workers=%d",
4864 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4865 pool->nr_workers);
4866 if (pool->manager)
4867 pr_cont(" manager: %d",
4868 task_pid_nr(pool->manager->task));
4869 list_for_each_entry(worker, &pool->idle_list, entry) {
4870 pr_cont(" %s%d", first ? "idle: " : "",
4871 task_pid_nr(worker->task));
4872 first = false;
4873 }
4874 pr_cont("\n");
4875 printk_deferred_exit();
4876 next_pool:
4877 raw_spin_unlock_irqrestore(&pool->lock, flags);
4878
4879
4880
4881
4882
4883 touch_nmi_watchdog();
4884
4885 }
4886
4887
4888
4889
4890
4891
4892
4893 void show_all_workqueues(void)
4894 {
4895 struct workqueue_struct *wq;
4896 struct worker_pool *pool;
4897 int pi;
4898
4899 rcu_read_lock();
4900
4901 pr_info("Showing busy workqueues and worker pools:\n");
4902
4903 list_for_each_entry_rcu(wq, &workqueues, list)
4904 show_one_workqueue(wq);
4905
4906 for_each_pool(pool, pi)
4907 show_one_worker_pool(pool);
4908
4909 rcu_read_unlock();
4910 }
4911
4912
4913 void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4914 {
4915 int off;
4916
4917
4918 off = strscpy(buf, task->comm, size);
4919 if (off < 0)
4920 return;
4921
4922
4923 mutex_lock(&wq_pool_attach_mutex);
4924
4925 if (task->flags & PF_WQ_WORKER) {
4926 struct worker *worker = kthread_data(task);
4927 struct worker_pool *pool = worker->pool;
4928
4929 if (pool) {
4930 raw_spin_lock_irq(&pool->lock);
4931
4932
4933
4934
4935
4936 if (worker->desc[0] != '\0') {
4937 if (worker->current_work)
4938 scnprintf(buf + off, size - off, "+%s",
4939 worker->desc);
4940 else
4941 scnprintf(buf + off, size - off, "-%s",
4942 worker->desc);
4943 }
4944 raw_spin_unlock_irq(&pool->lock);
4945 }
4946 }
4947
4948 mutex_unlock(&wq_pool_attach_mutex);
4949 }
4950
4951 #ifdef CONFIG_SMP
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968 static void unbind_workers(int cpu)
4969 {
4970 struct worker_pool *pool;
4971 struct worker *worker;
4972
4973 for_each_cpu_worker_pool(pool, cpu) {
4974 mutex_lock(&wq_pool_attach_mutex);
4975 raw_spin_lock_irq(&pool->lock);
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985 for_each_pool_worker(worker, pool)
4986 worker->flags |= WORKER_UNBOUND;
4987
4988 pool->flags |= POOL_DISASSOCIATED;
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998 pool->nr_running = 0;
4999
5000
5001
5002
5003
5004
5005 wake_up_worker(pool);
5006
5007 raw_spin_unlock_irq(&pool->lock);
5008
5009 for_each_pool_worker(worker, pool) {
5010 kthread_set_per_cpu(worker->task, -1);
5011 if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
5012 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
5013 else
5014 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
5015 }
5016
5017 mutex_unlock(&wq_pool_attach_mutex);
5018 }
5019 }
5020
5021
5022
5023
5024
5025
5026
5027 static void rebind_workers(struct worker_pool *pool)
5028 {
5029 struct worker *worker;
5030
5031 lockdep_assert_held(&wq_pool_attach_mutex);
5032
5033
5034
5035
5036
5037
5038
5039
5040 for_each_pool_worker(worker, pool) {
5041 kthread_set_per_cpu(worker->task, pool->cpu);
5042 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
5043 pool->attrs->cpumask) < 0);
5044 }
5045
5046 raw_spin_lock_irq(&pool->lock);
5047
5048 pool->flags &= ~POOL_DISASSOCIATED;
5049
5050 for_each_pool_worker(worker, pool) {
5051 unsigned int worker_flags = worker->flags;
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
5069 worker_flags |= WORKER_REBOUND;
5070 worker_flags &= ~WORKER_UNBOUND;
5071 WRITE_ONCE(worker->flags, worker_flags);
5072 }
5073
5074 raw_spin_unlock_irq(&pool->lock);
5075 }
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087 static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
5088 {
5089 static cpumask_t cpumask;
5090 struct worker *worker;
5091
5092 lockdep_assert_held(&wq_pool_attach_mutex);
5093
5094
5095 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5096 return;
5097
5098 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5099
5100
5101 for_each_pool_worker(worker, pool)
5102 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5103 }
5104
5105 int workqueue_prepare_cpu(unsigned int cpu)
5106 {
5107 struct worker_pool *pool;
5108
5109 for_each_cpu_worker_pool(pool, cpu) {
5110 if (pool->nr_workers)
5111 continue;
5112 if (!create_worker(pool))
5113 return -ENOMEM;
5114 }
5115 return 0;
5116 }
5117
5118 int workqueue_online_cpu(unsigned int cpu)
5119 {
5120 struct worker_pool *pool;
5121 struct workqueue_struct *wq;
5122 int pi;
5123
5124 mutex_lock(&wq_pool_mutex);
5125
5126 for_each_pool(pool, pi) {
5127 mutex_lock(&wq_pool_attach_mutex);
5128
5129 if (pool->cpu == cpu)
5130 rebind_workers(pool);
5131 else if (pool->cpu < 0)
5132 restore_unbound_workers_cpumask(pool, cpu);
5133
5134 mutex_unlock(&wq_pool_attach_mutex);
5135 }
5136
5137
5138 list_for_each_entry(wq, &workqueues, list)
5139 wq_update_unbound_numa(wq, cpu, true);
5140
5141 mutex_unlock(&wq_pool_mutex);
5142 return 0;
5143 }
5144
5145 int workqueue_offline_cpu(unsigned int cpu)
5146 {
5147 struct workqueue_struct *wq;
5148
5149
5150 if (WARN_ON(cpu != smp_processor_id()))
5151 return -1;
5152
5153 unbind_workers(cpu);
5154
5155
5156 mutex_lock(&wq_pool_mutex);
5157 list_for_each_entry(wq, &workqueues, list)
5158 wq_update_unbound_numa(wq, cpu, false);
5159 mutex_unlock(&wq_pool_mutex);
5160
5161 return 0;
5162 }
5163
5164 struct work_for_cpu {
5165 struct work_struct work;
5166 long (*fn)(void *);
5167 void *arg;
5168 long ret;
5169 };
5170
5171 static void work_for_cpu_fn(struct work_struct *work)
5172 {
5173 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5174
5175 wfc->ret = wfc->fn(wfc->arg);
5176 }
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189 long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5190 {
5191 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5192
5193 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5194 schedule_work_on(cpu, &wfc.work);
5195 flush_work(&wfc.work);
5196 destroy_work_on_stack(&wfc.work);
5197 return wfc.ret;
5198 }
5199 EXPORT_SYMBOL_GPL(work_on_cpu);
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212 long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5213 {
5214 long ret = -ENODEV;
5215
5216 cpus_read_lock();
5217 if (cpu_online(cpu))
5218 ret = work_on_cpu(cpu, fn, arg);
5219 cpus_read_unlock();
5220 return ret;
5221 }
5222 EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5223 #endif
5224
5225 #ifdef CONFIG_FREEZER
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237 void freeze_workqueues_begin(void)
5238 {
5239 struct workqueue_struct *wq;
5240 struct pool_workqueue *pwq;
5241
5242 mutex_lock(&wq_pool_mutex);
5243
5244 WARN_ON_ONCE(workqueue_freezing);
5245 workqueue_freezing = true;
5246
5247 list_for_each_entry(wq, &workqueues, list) {
5248 mutex_lock(&wq->mutex);
5249 for_each_pwq(pwq, wq)
5250 pwq_adjust_max_active(pwq);
5251 mutex_unlock(&wq->mutex);
5252 }
5253
5254 mutex_unlock(&wq_pool_mutex);
5255 }
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270 bool freeze_workqueues_busy(void)
5271 {
5272 bool busy = false;
5273 struct workqueue_struct *wq;
5274 struct pool_workqueue *pwq;
5275
5276 mutex_lock(&wq_pool_mutex);
5277
5278 WARN_ON_ONCE(!workqueue_freezing);
5279
5280 list_for_each_entry(wq, &workqueues, list) {
5281 if (!(wq->flags & WQ_FREEZABLE))
5282 continue;
5283
5284
5285
5286
5287 rcu_read_lock();
5288 for_each_pwq(pwq, wq) {
5289 WARN_ON_ONCE(pwq->nr_active < 0);
5290 if (pwq->nr_active) {
5291 busy = true;
5292 rcu_read_unlock();
5293 goto out_unlock;
5294 }
5295 }
5296 rcu_read_unlock();
5297 }
5298 out_unlock:
5299 mutex_unlock(&wq_pool_mutex);
5300 return busy;
5301 }
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312 void thaw_workqueues(void)
5313 {
5314 struct workqueue_struct *wq;
5315 struct pool_workqueue *pwq;
5316
5317 mutex_lock(&wq_pool_mutex);
5318
5319 if (!workqueue_freezing)
5320 goto out_unlock;
5321
5322 workqueue_freezing = false;
5323
5324
5325 list_for_each_entry(wq, &workqueues, list) {
5326 mutex_lock(&wq->mutex);
5327 for_each_pwq(pwq, wq)
5328 pwq_adjust_max_active(pwq);
5329 mutex_unlock(&wq->mutex);
5330 }
5331
5332 out_unlock:
5333 mutex_unlock(&wq_pool_mutex);
5334 }
5335 #endif
5336
5337 static int workqueue_apply_unbound_cpumask(void)
5338 {
5339 LIST_HEAD(ctxs);
5340 int ret = 0;
5341 struct workqueue_struct *wq;
5342 struct apply_wqattrs_ctx *ctx, *n;
5343
5344 lockdep_assert_held(&wq_pool_mutex);
5345
5346 list_for_each_entry(wq, &workqueues, list) {
5347 if (!(wq->flags & WQ_UNBOUND))
5348 continue;
5349
5350 if (wq->flags & __WQ_ORDERED)
5351 continue;
5352
5353 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5354 if (!ctx) {
5355 ret = -ENOMEM;
5356 break;
5357 }
5358
5359 list_add_tail(&ctx->list, &ctxs);
5360 }
5361
5362 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5363 if (!ret)
5364 apply_wqattrs_commit(ctx);
5365 apply_wqattrs_cleanup(ctx);
5366 }
5367
5368 return ret;
5369 }
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383 int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5384 {
5385 int ret = -EINVAL;
5386 cpumask_var_t saved_cpumask;
5387
5388
5389
5390
5391
5392 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5393 if (!cpumask_empty(cpumask)) {
5394 apply_wqattrs_lock();
5395 if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
5396 ret = 0;
5397 goto out_unlock;
5398 }
5399
5400 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) {
5401 ret = -ENOMEM;
5402 goto out_unlock;
5403 }
5404
5405
5406 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5407
5408
5409 cpumask_copy(wq_unbound_cpumask, cpumask);
5410 ret = workqueue_apply_unbound_cpumask();
5411
5412
5413 if (ret < 0)
5414 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5415
5416 free_cpumask_var(saved_cpumask);
5417 out_unlock:
5418 apply_wqattrs_unlock();
5419 }
5420
5421 return ret;
5422 }
5423
5424 #ifdef CONFIG_SYSFS
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440 struct wq_device {
5441 struct workqueue_struct *wq;
5442 struct device dev;
5443 };
5444
5445 static struct workqueue_struct *dev_to_wq(struct device *dev)
5446 {
5447 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5448
5449 return wq_dev->wq;
5450 }
5451
5452 static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5453 char *buf)
5454 {
5455 struct workqueue_struct *wq = dev_to_wq(dev);
5456
5457 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5458 }
5459 static DEVICE_ATTR_RO(per_cpu);
5460
5461 static ssize_t max_active_show(struct device *dev,
5462 struct device_attribute *attr, char *buf)
5463 {
5464 struct workqueue_struct *wq = dev_to_wq(dev);
5465
5466 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5467 }
5468
5469 static ssize_t max_active_store(struct device *dev,
5470 struct device_attribute *attr, const char *buf,
5471 size_t count)
5472 {
5473 struct workqueue_struct *wq = dev_to_wq(dev);
5474 int val;
5475
5476 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5477 return -EINVAL;
5478
5479 workqueue_set_max_active(wq, val);
5480 return count;
5481 }
5482 static DEVICE_ATTR_RW(max_active);
5483
5484 static struct attribute *wq_sysfs_attrs[] = {
5485 &dev_attr_per_cpu.attr,
5486 &dev_attr_max_active.attr,
5487 NULL,
5488 };
5489 ATTRIBUTE_GROUPS(wq_sysfs);
5490
5491 static ssize_t wq_pool_ids_show(struct device *dev,
5492 struct device_attribute *attr, char *buf)
5493 {
5494 struct workqueue_struct *wq = dev_to_wq(dev);
5495 const char *delim = "";
5496 int node, written = 0;
5497
5498 cpus_read_lock();
5499 rcu_read_lock();
5500 for_each_node(node) {
5501 written += scnprintf(buf + written, PAGE_SIZE - written,
5502 "%s%d:%d", delim, node,
5503 unbound_pwq_by_node(wq, node)->pool->id);
5504 delim = " ";
5505 }
5506 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5507 rcu_read_unlock();
5508 cpus_read_unlock();
5509
5510 return written;
5511 }
5512
5513 static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5514 char *buf)
5515 {
5516 struct workqueue_struct *wq = dev_to_wq(dev);
5517 int written;
5518
5519 mutex_lock(&wq->mutex);
5520 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5521 mutex_unlock(&wq->mutex);
5522
5523 return written;
5524 }
5525
5526
5527 static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5528 {
5529 struct workqueue_attrs *attrs;
5530
5531 lockdep_assert_held(&wq_pool_mutex);
5532
5533 attrs = alloc_workqueue_attrs();
5534 if (!attrs)
5535 return NULL;
5536
5537 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5538 return attrs;
5539 }
5540
5541 static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5542 const char *buf, size_t count)
5543 {
5544 struct workqueue_struct *wq = dev_to_wq(dev);
5545 struct workqueue_attrs *attrs;
5546 int ret = -ENOMEM;
5547
5548 apply_wqattrs_lock();
5549
5550 attrs = wq_sysfs_prep_attrs(wq);
5551 if (!attrs)
5552 goto out_unlock;
5553
5554 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5555 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5556 ret = apply_workqueue_attrs_locked(wq, attrs);
5557 else
5558 ret = -EINVAL;
5559
5560 out_unlock:
5561 apply_wqattrs_unlock();
5562 free_workqueue_attrs(attrs);
5563 return ret ?: count;
5564 }
5565
5566 static ssize_t wq_cpumask_show(struct device *dev,
5567 struct device_attribute *attr, char *buf)
5568 {
5569 struct workqueue_struct *wq = dev_to_wq(dev);
5570 int written;
5571
5572 mutex_lock(&wq->mutex);
5573 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5574 cpumask_pr_args(wq->unbound_attrs->cpumask));
5575 mutex_unlock(&wq->mutex);
5576 return written;
5577 }
5578
5579 static ssize_t wq_cpumask_store(struct device *dev,
5580 struct device_attribute *attr,
5581 const char *buf, size_t count)
5582 {
5583 struct workqueue_struct *wq = dev_to_wq(dev);
5584 struct workqueue_attrs *attrs;
5585 int ret = -ENOMEM;
5586
5587 apply_wqattrs_lock();
5588
5589 attrs = wq_sysfs_prep_attrs(wq);
5590 if (!attrs)
5591 goto out_unlock;
5592
5593 ret = cpumask_parse(buf, attrs->cpumask);
5594 if (!ret)
5595 ret = apply_workqueue_attrs_locked(wq, attrs);
5596
5597 out_unlock:
5598 apply_wqattrs_unlock();
5599 free_workqueue_attrs(attrs);
5600 return ret ?: count;
5601 }
5602
5603 static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5604 char *buf)
5605 {
5606 struct workqueue_struct *wq = dev_to_wq(dev);
5607 int written;
5608
5609 mutex_lock(&wq->mutex);
5610 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5611 !wq->unbound_attrs->no_numa);
5612 mutex_unlock(&wq->mutex);
5613
5614 return written;
5615 }
5616
5617 static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5618 const char *buf, size_t count)
5619 {
5620 struct workqueue_struct *wq = dev_to_wq(dev);
5621 struct workqueue_attrs *attrs;
5622 int v, ret = -ENOMEM;
5623
5624 apply_wqattrs_lock();
5625
5626 attrs = wq_sysfs_prep_attrs(wq);
5627 if (!attrs)
5628 goto out_unlock;
5629
5630 ret = -EINVAL;
5631 if (sscanf(buf, "%d", &v) == 1) {
5632 attrs->no_numa = !v;
5633 ret = apply_workqueue_attrs_locked(wq, attrs);
5634 }
5635
5636 out_unlock:
5637 apply_wqattrs_unlock();
5638 free_workqueue_attrs(attrs);
5639 return ret ?: count;
5640 }
5641
5642 static struct device_attribute wq_sysfs_unbound_attrs[] = {
5643 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5644 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5645 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5646 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5647 __ATTR_NULL,
5648 };
5649
5650 static struct bus_type wq_subsys = {
5651 .name = "workqueue",
5652 .dev_groups = wq_sysfs_groups,
5653 };
5654
5655 static ssize_t wq_unbound_cpumask_show(struct device *dev,
5656 struct device_attribute *attr, char *buf)
5657 {
5658 int written;
5659
5660 mutex_lock(&wq_pool_mutex);
5661 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5662 cpumask_pr_args(wq_unbound_cpumask));
5663 mutex_unlock(&wq_pool_mutex);
5664
5665 return written;
5666 }
5667
5668 static ssize_t wq_unbound_cpumask_store(struct device *dev,
5669 struct device_attribute *attr, const char *buf, size_t count)
5670 {
5671 cpumask_var_t cpumask;
5672 int ret;
5673
5674 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5675 return -ENOMEM;
5676
5677 ret = cpumask_parse(buf, cpumask);
5678 if (!ret)
5679 ret = workqueue_set_unbound_cpumask(cpumask);
5680
5681 free_cpumask_var(cpumask);
5682 return ret ? ret : count;
5683 }
5684
5685 static struct device_attribute wq_sysfs_cpumask_attr =
5686 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5687 wq_unbound_cpumask_store);
5688
5689 static int __init wq_sysfs_init(void)
5690 {
5691 int err;
5692
5693 err = subsys_virtual_register(&wq_subsys, NULL);
5694 if (err)
5695 return err;
5696
5697 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5698 }
5699 core_initcall(wq_sysfs_init);
5700
5701 static void wq_device_release(struct device *dev)
5702 {
5703 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5704
5705 kfree(wq_dev);
5706 }
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723 int workqueue_sysfs_register(struct workqueue_struct *wq)
5724 {
5725 struct wq_device *wq_dev;
5726 int ret;
5727
5728
5729
5730
5731
5732
5733 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5734 return -EINVAL;
5735
5736 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5737 if (!wq_dev)
5738 return -ENOMEM;
5739
5740 wq_dev->wq = wq;
5741 wq_dev->dev.bus = &wq_subsys;
5742 wq_dev->dev.release = wq_device_release;
5743 dev_set_name(&wq_dev->dev, "%s", wq->name);
5744
5745
5746
5747
5748
5749 dev_set_uevent_suppress(&wq_dev->dev, true);
5750
5751 ret = device_register(&wq_dev->dev);
5752 if (ret) {
5753 put_device(&wq_dev->dev);
5754 wq->wq_dev = NULL;
5755 return ret;
5756 }
5757
5758 if (wq->flags & WQ_UNBOUND) {
5759 struct device_attribute *attr;
5760
5761 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5762 ret = device_create_file(&wq_dev->dev, attr);
5763 if (ret) {
5764 device_unregister(&wq_dev->dev);
5765 wq->wq_dev = NULL;
5766 return ret;
5767 }
5768 }
5769 }
5770
5771 dev_set_uevent_suppress(&wq_dev->dev, false);
5772 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5773 return 0;
5774 }
5775
5776
5777
5778
5779
5780
5781
5782 static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5783 {
5784 struct wq_device *wq_dev = wq->wq_dev;
5785
5786 if (!wq->wq_dev)
5787 return;
5788
5789 wq->wq_dev = NULL;
5790 device_unregister(&wq_dev->dev);
5791 }
5792 #else
5793 static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5794 #endif
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813 #ifdef CONFIG_WQ_WATCHDOG
5814
5815 static unsigned long wq_watchdog_thresh = 30;
5816 static struct timer_list wq_watchdog_timer;
5817
5818 static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5819 static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5820
5821 static void wq_watchdog_reset_touched(void)
5822 {
5823 int cpu;
5824
5825 wq_watchdog_touched = jiffies;
5826 for_each_possible_cpu(cpu)
5827 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5828 }
5829
5830 static void wq_watchdog_timer_fn(struct timer_list *unused)
5831 {
5832 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5833 bool lockup_detected = false;
5834 unsigned long now = jiffies;
5835 struct worker_pool *pool;
5836 int pi;
5837
5838 if (!thresh)
5839 return;
5840
5841 rcu_read_lock();
5842
5843 for_each_pool(pool, pi) {
5844 unsigned long pool_ts, touched, ts;
5845
5846 if (list_empty(&pool->worklist))
5847 continue;
5848
5849
5850
5851
5852
5853 kvm_check_and_clear_guest_paused();
5854
5855
5856 if (pool->cpu >= 0)
5857 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
5858 else
5859 touched = READ_ONCE(wq_watchdog_touched);
5860 pool_ts = READ_ONCE(pool->watchdog_ts);
5861
5862 if (time_after(pool_ts, touched))
5863 ts = pool_ts;
5864 else
5865 ts = touched;
5866
5867
5868 if (time_after(now, ts + thresh)) {
5869 lockup_detected = true;
5870 pr_emerg("BUG: workqueue lockup - pool");
5871 pr_cont_pool_info(pool);
5872 pr_cont(" stuck for %us!\n",
5873 jiffies_to_msecs(now - pool_ts) / 1000);
5874 }
5875 }
5876
5877 rcu_read_unlock();
5878
5879 if (lockup_detected)
5880 show_all_workqueues();
5881
5882 wq_watchdog_reset_touched();
5883 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5884 }
5885
5886 notrace void wq_watchdog_touch(int cpu)
5887 {
5888 if (cpu >= 0)
5889 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5890
5891 wq_watchdog_touched = jiffies;
5892 }
5893
5894 static void wq_watchdog_set_thresh(unsigned long thresh)
5895 {
5896 wq_watchdog_thresh = 0;
5897 del_timer_sync(&wq_watchdog_timer);
5898
5899 if (thresh) {
5900 wq_watchdog_thresh = thresh;
5901 wq_watchdog_reset_touched();
5902 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5903 }
5904 }
5905
5906 static int wq_watchdog_param_set_thresh(const char *val,
5907 const struct kernel_param *kp)
5908 {
5909 unsigned long thresh;
5910 int ret;
5911
5912 ret = kstrtoul(val, 0, &thresh);
5913 if (ret)
5914 return ret;
5915
5916 if (system_wq)
5917 wq_watchdog_set_thresh(thresh);
5918 else
5919 wq_watchdog_thresh = thresh;
5920
5921 return 0;
5922 }
5923
5924 static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5925 .set = wq_watchdog_param_set_thresh,
5926 .get = param_get_ulong,
5927 };
5928
5929 module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5930 0644);
5931
5932 static void wq_watchdog_init(void)
5933 {
5934 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5935 wq_watchdog_set_thresh(wq_watchdog_thresh);
5936 }
5937
5938 #else
5939
5940 static inline void wq_watchdog_init(void) { }
5941
5942 #endif
5943
5944 static void __init wq_numa_init(void)
5945 {
5946 cpumask_var_t *tbl;
5947 int node, cpu;
5948
5949 if (num_possible_nodes() <= 1)
5950 return;
5951
5952 if (wq_disable_numa) {
5953 pr_info("workqueue: NUMA affinity support disabled\n");
5954 return;
5955 }
5956
5957 for_each_possible_cpu(cpu) {
5958 if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5959 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5960 return;
5961 }
5962 }
5963
5964 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5965 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5966
5967
5968
5969
5970
5971
5972 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
5973 BUG_ON(!tbl);
5974
5975 for_each_node(node)
5976 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5977 node_online(node) ? node : NUMA_NO_NODE));
5978
5979 for_each_possible_cpu(cpu) {
5980 node = cpu_to_node(cpu);
5981 cpumask_set_cpu(cpu, tbl[node]);
5982 }
5983
5984 wq_numa_possible_cpumask = tbl;
5985 wq_numa_enabled = true;
5986 }
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998 void __init workqueue_init_early(void)
5999 {
6000 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
6001 int i, cpu;
6002
6003 BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
6004
6005 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
6006 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
6007 cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
6008
6009 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
6010
6011
6012 for_each_possible_cpu(cpu) {
6013 struct worker_pool *pool;
6014
6015 i = 0;
6016 for_each_cpu_worker_pool(pool, cpu) {
6017 BUG_ON(init_worker_pool(pool));
6018 pool->cpu = cpu;
6019 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
6020 pool->attrs->nice = std_nice[i++];
6021 pool->node = cpu_to_node(cpu);
6022
6023
6024 mutex_lock(&wq_pool_mutex);
6025 BUG_ON(worker_pool_assign_id(pool));
6026 mutex_unlock(&wq_pool_mutex);
6027 }
6028 }
6029
6030
6031 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
6032 struct workqueue_attrs *attrs;
6033
6034 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6035 attrs->nice = std_nice[i];
6036 unbound_std_wq_attrs[i] = attrs;
6037
6038
6039
6040
6041
6042
6043 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6044 attrs->nice = std_nice[i];
6045 attrs->no_numa = true;
6046 ordered_wq_attrs[i] = attrs;
6047 }
6048
6049 system_wq = alloc_workqueue("events", 0, 0);
6050 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
6051 system_long_wq = alloc_workqueue("events_long", 0, 0);
6052 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
6053 WQ_UNBOUND_MAX_ACTIVE);
6054 system_freezable_wq = alloc_workqueue("events_freezable",
6055 WQ_FREEZABLE, 0);
6056 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
6057 WQ_POWER_EFFICIENT, 0);
6058 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
6059 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
6060 0);
6061 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
6062 !system_unbound_wq || !system_freezable_wq ||
6063 !system_power_efficient_wq ||
6064 !system_freezable_power_efficient_wq);
6065 }
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076 void __init workqueue_init(void)
6077 {
6078 struct workqueue_struct *wq;
6079 struct worker_pool *pool;
6080 int cpu, bkt;
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091 wq_numa_init();
6092
6093 mutex_lock(&wq_pool_mutex);
6094
6095 for_each_possible_cpu(cpu) {
6096 for_each_cpu_worker_pool(pool, cpu) {
6097 pool->node = cpu_to_node(cpu);
6098 }
6099 }
6100
6101 list_for_each_entry(wq, &workqueues, list) {
6102 wq_update_unbound_numa(wq, smp_processor_id(), true);
6103 WARN(init_rescuer(wq),
6104 "workqueue: failed to create early rescuer for %s",
6105 wq->name);
6106 }
6107
6108 mutex_unlock(&wq_pool_mutex);
6109
6110
6111 for_each_online_cpu(cpu) {
6112 for_each_cpu_worker_pool(pool, cpu) {
6113 pool->flags &= ~POOL_DISASSOCIATED;
6114 BUG_ON(!create_worker(pool));
6115 }
6116 }
6117
6118 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6119 BUG_ON(!create_worker(pool));
6120
6121 wq_online = true;
6122 wq_watchdog_init();
6123 }
6124
6125
6126
6127
6128
6129
6130 void __warn_flushing_systemwide_wq(void) { }
6131 EXPORT_SYMBOL(__warn_flushing_systemwide_wq);