Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2018 Intel Corporation
0004  */
0005 
0006 #include <linux/crc32.h>
0007 
0008 #include "gem/i915_gem_stolen.h"
0009 
0010 #include "i915_memcpy.h"
0011 #include "i915_selftest.h"
0012 #include "intel_gpu_commands.h"
0013 #include "selftests/igt_reset.h"
0014 #include "selftests/igt_atomic.h"
0015 #include "selftests/igt_spinner.h"
0016 
0017 static int
0018 __igt_reset_stolen(struct intel_gt *gt,
0019            intel_engine_mask_t mask,
0020            const char *msg)
0021 {
0022     struct i915_ggtt *ggtt = gt->ggtt;
0023     const struct resource *dsm = &gt->i915->dsm;
0024     resource_size_t num_pages, page;
0025     struct intel_engine_cs *engine;
0026     intel_wakeref_t wakeref;
0027     enum intel_engine_id id;
0028     struct igt_spinner spin;
0029     long max, count;
0030     void *tmp;
0031     u32 *crc;
0032     int err;
0033 
0034     if (!drm_mm_node_allocated(&ggtt->error_capture))
0035         return 0;
0036 
0037     num_pages = resource_size(dsm) >> PAGE_SHIFT;
0038     if (!num_pages)
0039         return 0;
0040 
0041     crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
0042     if (!crc)
0043         return -ENOMEM;
0044 
0045     tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
0046     if (!tmp) {
0047         err = -ENOMEM;
0048         goto err_crc;
0049     }
0050 
0051     igt_global_reset_lock(gt);
0052     wakeref = intel_runtime_pm_get(gt->uncore->rpm);
0053 
0054     err = igt_spinner_init(&spin, gt);
0055     if (err)
0056         goto err_lock;
0057 
0058     for_each_engine(engine, gt, id) {
0059         struct intel_context *ce;
0060         struct i915_request *rq;
0061 
0062         if (!(mask & engine->mask))
0063             continue;
0064 
0065         if (!intel_engine_can_store_dword(engine))
0066             continue;
0067 
0068         ce = intel_context_create(engine);
0069         if (IS_ERR(ce)) {
0070             err = PTR_ERR(ce);
0071             goto err_spin;
0072         }
0073         rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
0074         intel_context_put(ce);
0075         if (IS_ERR(rq)) {
0076             err = PTR_ERR(rq);
0077             goto err_spin;
0078         }
0079         i915_request_add(rq);
0080     }
0081 
0082     for (page = 0; page < num_pages; page++) {
0083         dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
0084         void __iomem *s;
0085         void *in;
0086 
0087         ggtt->vm.insert_page(&ggtt->vm, dma,
0088                      ggtt->error_capture.start,
0089                      I915_CACHE_NONE, 0);
0090         mb();
0091 
0092         s = io_mapping_map_wc(&ggtt->iomap,
0093                       ggtt->error_capture.start,
0094                       PAGE_SIZE);
0095 
0096         if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
0097                          page << PAGE_SHIFT,
0098                          ((page + 1) << PAGE_SHIFT) - 1))
0099             memset_io(s, STACK_MAGIC, PAGE_SIZE);
0100 
0101         in = (void __force *)s;
0102         if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
0103             in = tmp;
0104         crc[page] = crc32_le(0, in, PAGE_SIZE);
0105 
0106         io_mapping_unmap(s);
0107     }
0108     mb();
0109     ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
0110 
0111     if (mask == ALL_ENGINES) {
0112         intel_gt_reset(gt, mask, NULL);
0113     } else {
0114         for_each_engine(engine, gt, id) {
0115             if (mask & engine->mask)
0116                 intel_engine_reset(engine, NULL);
0117         }
0118     }
0119 
0120     max = -1;
0121     count = 0;
0122     for (page = 0; page < num_pages; page++) {
0123         dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
0124         void __iomem *s;
0125         void *in;
0126         u32 x;
0127 
0128         ggtt->vm.insert_page(&ggtt->vm, dma,
0129                      ggtt->error_capture.start,
0130                      I915_CACHE_NONE, 0);
0131         mb();
0132 
0133         s = io_mapping_map_wc(&ggtt->iomap,
0134                       ggtt->error_capture.start,
0135                       PAGE_SIZE);
0136 
0137         in = (void __force *)s;
0138         if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
0139             in = tmp;
0140         x = crc32_le(0, in, PAGE_SIZE);
0141 
0142         if (x != crc[page] &&
0143             !__drm_mm_interval_first(&gt->i915->mm.stolen,
0144                          page << PAGE_SHIFT,
0145                          ((page + 1) << PAGE_SHIFT) - 1)) {
0146             pr_debug("unused stolen page %pa modified by GPU reset\n",
0147                  &page);
0148             if (count++ == 0)
0149                 igt_hexdump(in, PAGE_SIZE);
0150             max = page;
0151         }
0152 
0153         io_mapping_unmap(s);
0154     }
0155     mb();
0156     ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
0157 
0158     if (count > 0) {
0159         pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
0160             msg, count, max);
0161     }
0162     if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
0163         pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
0164                msg, I915_GEM_STOLEN_BIAS);
0165         err = -EINVAL;
0166     }
0167 
0168 err_spin:
0169     igt_spinner_fini(&spin);
0170 
0171 err_lock:
0172     intel_runtime_pm_put(gt->uncore->rpm, wakeref);
0173     igt_global_reset_unlock(gt);
0174 
0175     kfree(tmp);
0176 err_crc:
0177     kfree(crc);
0178     return err;
0179 }
0180 
0181 static int igt_reset_device_stolen(void *arg)
0182 {
0183     return __igt_reset_stolen(arg, ALL_ENGINES, "device");
0184 }
0185 
0186 static int igt_reset_engines_stolen(void *arg)
0187 {
0188     struct intel_gt *gt = arg;
0189     struct intel_engine_cs *engine;
0190     enum intel_engine_id id;
0191     int err;
0192 
0193     if (!intel_has_reset_engine(gt))
0194         return 0;
0195 
0196     for_each_engine(engine, gt, id) {
0197         err = __igt_reset_stolen(gt, engine->mask, engine->name);
0198         if (err)
0199             return err;
0200     }
0201 
0202     return 0;
0203 }
0204 
0205 static int igt_global_reset(void *arg)
0206 {
0207     struct intel_gt *gt = arg;
0208     unsigned int reset_count;
0209     intel_wakeref_t wakeref;
0210     int err = 0;
0211 
0212     /* Check that we can issue a global GPU reset */
0213 
0214     igt_global_reset_lock(gt);
0215     wakeref = intel_runtime_pm_get(gt->uncore->rpm);
0216 
0217     reset_count = i915_reset_count(&gt->i915->gpu_error);
0218 
0219     intel_gt_reset(gt, ALL_ENGINES, NULL);
0220 
0221     if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
0222         pr_err("No GPU reset recorded!\n");
0223         err = -EINVAL;
0224     }
0225 
0226     intel_runtime_pm_put(gt->uncore->rpm, wakeref);
0227     igt_global_reset_unlock(gt);
0228 
0229     if (intel_gt_is_wedged(gt))
0230         err = -EIO;
0231 
0232     return err;
0233 }
0234 
0235 static int igt_wedged_reset(void *arg)
0236 {
0237     struct intel_gt *gt = arg;
0238     intel_wakeref_t wakeref;
0239 
0240     /* Check that we can recover a wedged device with a GPU reset */
0241 
0242     igt_global_reset_lock(gt);
0243     wakeref = intel_runtime_pm_get(gt->uncore->rpm);
0244 
0245     intel_gt_set_wedged(gt);
0246 
0247     GEM_BUG_ON(!intel_gt_is_wedged(gt));
0248     intel_gt_reset(gt, ALL_ENGINES, NULL);
0249 
0250     intel_runtime_pm_put(gt->uncore->rpm, wakeref);
0251     igt_global_reset_unlock(gt);
0252 
0253     return intel_gt_is_wedged(gt) ? -EIO : 0;
0254 }
0255 
0256 static int igt_atomic_reset(void *arg)
0257 {
0258     struct intel_gt *gt = arg;
0259     const typeof(*igt_atomic_phases) *p;
0260     int err = 0;
0261 
0262     /* Check that the resets are usable from atomic context */
0263 
0264     intel_gt_pm_get(gt);
0265     igt_global_reset_lock(gt);
0266 
0267     /* Flush any requests before we get started and check basics */
0268     if (!igt_force_reset(gt))
0269         goto unlock;
0270 
0271     for (p = igt_atomic_phases; p->name; p++) {
0272         intel_engine_mask_t awake;
0273 
0274         GEM_TRACE("__intel_gt_reset under %s\n", p->name);
0275 
0276         awake = reset_prepare(gt);
0277         p->critical_section_begin();
0278 
0279         err = __intel_gt_reset(gt, ALL_ENGINES);
0280 
0281         p->critical_section_end();
0282         reset_finish(gt, awake);
0283 
0284         if (err) {
0285             pr_err("__intel_gt_reset failed under %s\n", p->name);
0286             break;
0287         }
0288     }
0289 
0290     /* As we poke around the guts, do a full reset before continuing. */
0291     igt_force_reset(gt);
0292 
0293 unlock:
0294     igt_global_reset_unlock(gt);
0295     intel_gt_pm_put(gt);
0296 
0297     return err;
0298 }
0299 
0300 static int igt_atomic_engine_reset(void *arg)
0301 {
0302     struct intel_gt *gt = arg;
0303     const typeof(*igt_atomic_phases) *p;
0304     struct intel_engine_cs *engine;
0305     enum intel_engine_id id;
0306     int err = 0;
0307 
0308     /* Check that the resets are usable from atomic context */
0309 
0310     if (!intel_has_reset_engine(gt))
0311         return 0;
0312 
0313     if (intel_uc_uses_guc_submission(&gt->uc))
0314         return 0;
0315 
0316     intel_gt_pm_get(gt);
0317     igt_global_reset_lock(gt);
0318 
0319     /* Flush any requests before we get started and check basics */
0320     if (!igt_force_reset(gt))
0321         goto out_unlock;
0322 
0323     for_each_engine(engine, gt, id) {
0324         struct tasklet_struct *t = &engine->sched_engine->tasklet;
0325 
0326         if (t->func)
0327             tasklet_disable(t);
0328         intel_engine_pm_get(engine);
0329 
0330         for (p = igt_atomic_phases; p->name; p++) {
0331             GEM_TRACE("intel_engine_reset(%s) under %s\n",
0332                   engine->name, p->name);
0333             if (strcmp(p->name, "softirq"))
0334                 local_bh_disable();
0335 
0336             p->critical_section_begin();
0337             err = __intel_engine_reset_bh(engine, NULL);
0338             p->critical_section_end();
0339 
0340             if (strcmp(p->name, "softirq"))
0341                 local_bh_enable();
0342 
0343             if (err) {
0344                 pr_err("intel_engine_reset(%s) failed under %s\n",
0345                        engine->name, p->name);
0346                 break;
0347             }
0348         }
0349 
0350         intel_engine_pm_put(engine);
0351         if (t->func) {
0352             tasklet_enable(t);
0353             tasklet_hi_schedule(t);
0354         }
0355         if (err)
0356             break;
0357     }
0358 
0359     /* As we poke around the guts, do a full reset before continuing. */
0360     igt_force_reset(gt);
0361 
0362 out_unlock:
0363     igt_global_reset_unlock(gt);
0364     intel_gt_pm_put(gt);
0365 
0366     return err;
0367 }
0368 
0369 int intel_reset_live_selftests(struct drm_i915_private *i915)
0370 {
0371     static const struct i915_subtest tests[] = {
0372         SUBTEST(igt_global_reset), /* attempt to recover GPU first */
0373         SUBTEST(igt_reset_device_stolen),
0374         SUBTEST(igt_reset_engines_stolen),
0375         SUBTEST(igt_wedged_reset),
0376         SUBTEST(igt_atomic_reset),
0377         SUBTEST(igt_atomic_engine_reset),
0378     };
0379     struct intel_gt *gt = to_gt(i915);
0380 
0381     if (!intel_has_gpu_reset(gt))
0382         return 0;
0383 
0384     if (intel_gt_is_wedged(gt))
0385         return -EIO; /* we're long past hope of a successful reset */
0386 
0387     return intel_gt_live_subtests(tests, gt);
0388 }