0001
0002 #include <linux/mm.h>
0003 #include <linux/mmzone.h>
0004 #include <linux/page_reporting.h>
0005 #include <linux/gfp.h>
0006 #include <linux/export.h>
0007 #include <linux/module.h>
0008 #include <linux/delay.h>
0009 #include <linux/scatterlist.h>
0010
0011 #include "page_reporting.h"
0012 #include "internal.h"
0013
0014 unsigned int page_reporting_order = MAX_ORDER;
0015 module_param(page_reporting_order, uint, 0644);
0016 MODULE_PARM_DESC(page_reporting_order, "Set page reporting order");
0017
0018 #define PAGE_REPORTING_DELAY (2 * HZ)
0019 static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
0020
0021 enum {
0022 PAGE_REPORTING_IDLE = 0,
0023 PAGE_REPORTING_REQUESTED,
0024 PAGE_REPORTING_ACTIVE
0025 };
0026
0027
0028 static void
0029 __page_reporting_request(struct page_reporting_dev_info *prdev)
0030 {
0031 unsigned int state;
0032
0033
0034 state = atomic_read(&prdev->state);
0035 if (state == PAGE_REPORTING_REQUESTED)
0036 return;
0037
0038
0039
0040
0041
0042 state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED);
0043 if (state != PAGE_REPORTING_IDLE)
0044 return;
0045
0046
0047
0048
0049
0050
0051 schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
0052 }
0053
0054
0055 void __page_reporting_notify(void)
0056 {
0057 struct page_reporting_dev_info *prdev;
0058
0059
0060
0061
0062
0063
0064 rcu_read_lock();
0065 prdev = rcu_dereference(pr_dev_info);
0066 if (likely(prdev))
0067 __page_reporting_request(prdev);
0068
0069 rcu_read_unlock();
0070 }
0071
0072 static void
0073 page_reporting_drain(struct page_reporting_dev_info *prdev,
0074 struct scatterlist *sgl, unsigned int nents, bool reported)
0075 {
0076 struct scatterlist *sg = sgl;
0077
0078
0079
0080
0081
0082 do {
0083 struct page *page = sg_page(sg);
0084 int mt = get_pageblock_migratetype(page);
0085 unsigned int order = get_order(sg->length);
0086
0087 __putback_isolated_page(page, order, mt);
0088
0089
0090 if (!reported)
0091 continue;
0092
0093
0094
0095
0096
0097
0098
0099
0100 if (PageBuddy(page) && buddy_order(page) == order)
0101 __SetPageReported(page);
0102 } while ((sg = sg_next(sg)));
0103
0104
0105 sg_init_table(sgl, nents);
0106 }
0107
0108
0109
0110
0111
0112
0113 static int
0114 page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
0115 unsigned int order, unsigned int mt,
0116 struct scatterlist *sgl, unsigned int *offset)
0117 {
0118 struct free_area *area = &zone->free_area[order];
0119 struct list_head *list = &area->free_list[mt];
0120 unsigned int page_len = PAGE_SIZE << order;
0121 struct page *page, *next;
0122 long budget;
0123 int err = 0;
0124
0125
0126
0127
0128
0129 if (list_empty(list))
0130 return err;
0131
0132 spin_lock_irq(&zone->lock);
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148 budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
0149
0150
0151 list_for_each_entry_safe(page, next, list, lru) {
0152
0153 if (PageReported(page))
0154 continue;
0155
0156
0157
0158
0159
0160
0161 if (budget < 0) {
0162 atomic_set(&prdev->state, PAGE_REPORTING_REQUESTED);
0163 next = page;
0164 break;
0165 }
0166
0167
0168 if (*offset) {
0169 if (!__isolate_free_page(page, order)) {
0170 next = page;
0171 break;
0172 }
0173
0174
0175 --(*offset);
0176 sg_set_page(&sgl[*offset], page, page_len, 0);
0177
0178 continue;
0179 }
0180
0181
0182
0183
0184
0185
0186 if (!list_is_first(&page->lru, list))
0187 list_rotate_to_front(&page->lru, list);
0188
0189
0190 spin_unlock_irq(&zone->lock);
0191
0192
0193 err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
0194
0195
0196 *offset = PAGE_REPORTING_CAPACITY;
0197
0198
0199 budget--;
0200
0201
0202 spin_lock_irq(&zone->lock);
0203
0204
0205 page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
0206
0207
0208
0209
0210
0211 next = list_first_entry(list, struct page, lru);
0212
0213
0214 if (err)
0215 break;
0216 }
0217
0218
0219 if (!list_entry_is_head(next, list, lru) && !list_is_first(&next->lru, list))
0220 list_rotate_to_front(&next->lru, list);
0221
0222 spin_unlock_irq(&zone->lock);
0223
0224 return err;
0225 }
0226
0227 static int
0228 page_reporting_process_zone(struct page_reporting_dev_info *prdev,
0229 struct scatterlist *sgl, struct zone *zone)
0230 {
0231 unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
0232 unsigned long watermark;
0233 int err = 0;
0234
0235
0236 watermark = low_wmark_pages(zone) +
0237 (PAGE_REPORTING_CAPACITY << page_reporting_order);
0238
0239
0240
0241
0242
0243 if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
0244 return err;
0245
0246
0247 for (order = page_reporting_order; order < MAX_ORDER; order++) {
0248 for (mt = 0; mt < MIGRATE_TYPES; mt++) {
0249
0250 if (is_migrate_isolate(mt))
0251 continue;
0252
0253 err = page_reporting_cycle(prdev, zone, order, mt,
0254 sgl, &offset);
0255 if (err)
0256 return err;
0257 }
0258 }
0259
0260
0261 leftover = PAGE_REPORTING_CAPACITY - offset;
0262 if (leftover) {
0263 sgl = &sgl[offset];
0264 err = prdev->report(prdev, sgl, leftover);
0265
0266
0267 spin_lock_irq(&zone->lock);
0268 page_reporting_drain(prdev, sgl, leftover, !err);
0269 spin_unlock_irq(&zone->lock);
0270 }
0271
0272 return err;
0273 }
0274
0275 static void page_reporting_process(struct work_struct *work)
0276 {
0277 struct delayed_work *d_work = to_delayed_work(work);
0278 struct page_reporting_dev_info *prdev =
0279 container_of(d_work, struct page_reporting_dev_info, work);
0280 int err = 0, state = PAGE_REPORTING_ACTIVE;
0281 struct scatterlist *sgl;
0282 struct zone *zone;
0283
0284
0285
0286
0287
0288
0289
0290 atomic_set(&prdev->state, state);
0291
0292
0293 sgl = kmalloc_array(PAGE_REPORTING_CAPACITY, sizeof(*sgl), GFP_KERNEL);
0294 if (!sgl)
0295 goto err_out;
0296
0297 sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
0298
0299 for_each_zone(zone) {
0300 err = page_reporting_process_zone(prdev, sgl, zone);
0301 if (err)
0302 break;
0303 }
0304
0305 kfree(sgl);
0306 err_out:
0307
0308
0309
0310
0311
0312 state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
0313 if (state == PAGE_REPORTING_REQUESTED)
0314 schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
0315 }
0316
0317 static DEFINE_MUTEX(page_reporting_mutex);
0318 DEFINE_STATIC_KEY_FALSE(page_reporting_enabled);
0319
0320 int page_reporting_register(struct page_reporting_dev_info *prdev)
0321 {
0322 int err = 0;
0323
0324 mutex_lock(&page_reporting_mutex);
0325
0326
0327 if (rcu_access_pointer(pr_dev_info)) {
0328 err = -EBUSY;
0329 goto err_out;
0330 }
0331
0332
0333
0334
0335
0336 page_reporting_order = prdev->order ? : pageblock_order;
0337
0338
0339 atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
0340 INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
0341
0342
0343 __page_reporting_request(prdev);
0344
0345
0346 rcu_assign_pointer(pr_dev_info, prdev);
0347
0348
0349 if (!static_key_enabled(&page_reporting_enabled)) {
0350 static_branch_enable(&page_reporting_enabled);
0351 pr_info("Free page reporting enabled\n");
0352 }
0353 err_out:
0354 mutex_unlock(&page_reporting_mutex);
0355
0356 return err;
0357 }
0358 EXPORT_SYMBOL_GPL(page_reporting_register);
0359
0360 void page_reporting_unregister(struct page_reporting_dev_info *prdev)
0361 {
0362 mutex_lock(&page_reporting_mutex);
0363
0364 if (rcu_access_pointer(pr_dev_info) == prdev) {
0365
0366 RCU_INIT_POINTER(pr_dev_info, NULL);
0367 synchronize_rcu();
0368
0369
0370 cancel_delayed_work_sync(&prdev->work);
0371 }
0372
0373 mutex_unlock(&page_reporting_mutex);
0374 }
0375 EXPORT_SYMBOL_GPL(page_reporting_unregister);