0001
0002
0003
0004
0005
0006 #include "i915_drv.h"
0007 #include "intel_context.h"
0008 #include "intel_gpu_commands.h"
0009 #include "intel_gt.h"
0010 #include "intel_gtt.h"
0011 #include "intel_migrate.h"
0012 #include "intel_ring.h"
0013
0014 struct insert_pte_data {
0015 u64 offset;
0016 };
0017
0018 #define CHUNK_SZ SZ_8M
0019
0020 #define GET_CCS_BYTES(i915, size) (HAS_FLAT_CCS(i915) ? \
0021 DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0)
0022 static bool engine_supports_migration(struct intel_engine_cs *engine)
0023 {
0024 if (!engine)
0025 return false;
0026
0027
0028
0029
0030
0031
0032 GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS);
0033
0034 return true;
0035 }
0036
0037 static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
0038 struct i915_page_table *pt,
0039 void *data)
0040 {
0041 struct insert_pte_data *d = data;
0042
0043
0044
0045
0046
0047 vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM);
0048 GEM_BUG_ON(!pt->is_compact);
0049 d->offset += SZ_2M;
0050 }
0051
0052 static void xehpsdv_insert_pte(struct i915_address_space *vm,
0053 struct i915_page_table *pt,
0054 void *data)
0055 {
0056 struct insert_pte_data *d = data;
0057
0058
0059
0060
0061
0062
0063
0064
0065 vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM);
0066 d->offset += SZ_64K;
0067 }
0068
0069 static void insert_pte(struct i915_address_space *vm,
0070 struct i915_page_table *pt,
0071 void *data)
0072 {
0073 struct insert_pte_data *d = data;
0074
0075 vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
0076 i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
0077 d->offset += PAGE_SIZE;
0078 }
0079
0080 static struct i915_address_space *migrate_vm(struct intel_gt *gt)
0081 {
0082 struct i915_vm_pt_stash stash = {};
0083 struct i915_ppgtt *vm;
0084 int err;
0085 int i;
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139 vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
0140 if (IS_ERR(vm))
0141 return ERR_CAST(vm);
0142
0143 if (!vm->vm.allocate_va_range || !vm->vm.foreach) {
0144 err = -ENODEV;
0145 goto err_vm;
0146 }
0147
0148 if (HAS_64K_PAGES(gt->i915))
0149 stash.pt_sz = I915_GTT_PAGE_SIZE_64K;
0150
0151
0152
0153
0154
0155 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
0156 struct intel_engine_cs *engine;
0157 u64 base = (u64)i << 32;
0158 struct insert_pte_data d = {};
0159 struct i915_gem_ww_ctx ww;
0160 u64 sz;
0161
0162 engine = gt->engine_class[COPY_ENGINE_CLASS][i];
0163 if (!engine_supports_migration(engine))
0164 continue;
0165
0166
0167
0168
0169
0170 if (HAS_64K_PAGES(gt->i915))
0171 sz = 3 * CHUNK_SZ;
0172 else
0173 sz = 2 * CHUNK_SZ;
0174 d.offset = base + sz;
0175
0176
0177
0178
0179
0180 if (HAS_64K_PAGES(gt->i915))
0181 sz += (sz / SZ_2M) * SZ_64K;
0182 else
0183 sz += (sz >> 12) * sizeof(u64);
0184
0185 err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz);
0186 if (err)
0187 goto err_vm;
0188
0189 for_i915_gem_ww(&ww, err, true) {
0190 err = i915_vm_lock_objects(&vm->vm, &ww);
0191 if (err)
0192 continue;
0193 err = i915_vm_map_pt_stash(&vm->vm, &stash);
0194 if (err)
0195 continue;
0196
0197 vm->vm.allocate_va_range(&vm->vm, &stash, base, sz);
0198 }
0199 i915_vm_free_pt_stash(&vm->vm, &stash);
0200 if (err)
0201 goto err_vm;
0202
0203
0204 if (HAS_64K_PAGES(gt->i915)) {
0205 vm->vm.foreach(&vm->vm, base, d.offset - base,
0206 xehpsdv_insert_pte, &d);
0207 d.offset = base + CHUNK_SZ;
0208 vm->vm.foreach(&vm->vm,
0209 d.offset,
0210 2 * CHUNK_SZ,
0211 xehpsdv_toggle_pdes, &d);
0212 } else {
0213 vm->vm.foreach(&vm->vm, base, d.offset - base,
0214 insert_pte, &d);
0215 }
0216 }
0217
0218 return &vm->vm;
0219
0220 err_vm:
0221 i915_vm_put(&vm->vm);
0222 return ERR_PTR(err);
0223 }
0224
0225 static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt)
0226 {
0227 struct intel_engine_cs *engine;
0228 int i;
0229
0230 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
0231 engine = gt->engine_class[COPY_ENGINE_CLASS][i];
0232 if (engine_supports_migration(engine))
0233 return engine;
0234 }
0235
0236 return NULL;
0237 }
0238
0239 static struct intel_context *pinned_context(struct intel_gt *gt)
0240 {
0241 static struct lock_class_key key;
0242 struct intel_engine_cs *engine;
0243 struct i915_address_space *vm;
0244 struct intel_context *ce;
0245
0246 engine = first_copy_engine(gt);
0247 if (!engine)
0248 return ERR_PTR(-ENODEV);
0249
0250 vm = migrate_vm(gt);
0251 if (IS_ERR(vm))
0252 return ERR_CAST(vm);
0253
0254 ce = intel_engine_create_pinned_context(engine, vm, SZ_512K,
0255 I915_GEM_HWS_MIGRATE,
0256 &key, "migrate");
0257 i915_vm_put(vm);
0258 return ce;
0259 }
0260
0261 int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt)
0262 {
0263 struct intel_context *ce;
0264
0265 memset(m, 0, sizeof(*m));
0266
0267 ce = pinned_context(gt);
0268 if (IS_ERR(ce))
0269 return PTR_ERR(ce);
0270
0271 m->context = ce;
0272 return 0;
0273 }
0274
0275 static int random_index(unsigned int max)
0276 {
0277 return upper_32_bits(mul_u32_u32(get_random_u32(), max));
0278 }
0279
0280 static struct intel_context *__migrate_engines(struct intel_gt *gt)
0281 {
0282 struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE];
0283 struct intel_engine_cs *engine;
0284 unsigned int count, i;
0285
0286 count = 0;
0287 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
0288 engine = gt->engine_class[COPY_ENGINE_CLASS][i];
0289 if (engine_supports_migration(engine))
0290 engines[count++] = engine;
0291 }
0292
0293 return intel_context_create(engines[random_index(count)]);
0294 }
0295
0296 struct intel_context *intel_migrate_create_context(struct intel_migrate *m)
0297 {
0298 struct intel_context *ce;
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308 ce = __migrate_engines(m->context->engine->gt);
0309 if (IS_ERR(ce))
0310 return ce;
0311
0312 ce->ring = NULL;
0313 ce->ring_size = SZ_256K;
0314
0315 i915_vm_put(ce->vm);
0316 ce->vm = i915_vm_get(m->context->vm);
0317
0318 return ce;
0319 }
0320
0321 static inline struct sgt_dma sg_sgt(struct scatterlist *sg)
0322 {
0323 dma_addr_t addr = sg_dma_address(sg);
0324
0325 return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
0326 }
0327
0328 static int emit_no_arbitration(struct i915_request *rq)
0329 {
0330 u32 *cs;
0331
0332 cs = intel_ring_begin(rq, 2);
0333 if (IS_ERR(cs))
0334 return PTR_ERR(cs);
0335
0336
0337 *cs++ = MI_ARB_ON_OFF;
0338 *cs++ = MI_NOOP;
0339 intel_ring_advance(rq, cs);
0340
0341 return 0;
0342 }
0343
0344 static int emit_pte(struct i915_request *rq,
0345 struct sgt_dma *it,
0346 enum i915_cache_level cache_level,
0347 bool is_lmem,
0348 u64 offset,
0349 int length)
0350 {
0351 bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
0352 const u64 encode = rq->context->vm->pte_encode(0, cache_level,
0353 is_lmem ? PTE_LM : 0);
0354 struct intel_ring *ring = rq->ring;
0355 int pkt, dword_length;
0356 u32 total = 0;
0357 u32 page_size;
0358 u32 *hdr, *cs;
0359
0360 GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
0361
0362 page_size = I915_GTT_PAGE_SIZE;
0363 dword_length = 0x400;
0364
0365
0366 if (has_64K_pages) {
0367 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M));
0368
0369 offset /= SZ_2M;
0370 offset *= SZ_64K;
0371 offset += 3 * CHUNK_SZ;
0372
0373 if (is_lmem) {
0374 page_size = I915_GTT_PAGE_SIZE_64K;
0375 dword_length = 0x40;
0376 }
0377 } else {
0378 offset >>= 12;
0379 offset *= sizeof(u64);
0380 offset += 2 * CHUNK_SZ;
0381 }
0382
0383 offset += (u64)rq->engine->instance << 32;
0384
0385 cs = intel_ring_begin(rq, 6);
0386 if (IS_ERR(cs))
0387 return PTR_ERR(cs);
0388
0389
0390 pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5);
0391 pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
0392
0393 hdr = cs;
0394 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21);
0395 *cs++ = lower_32_bits(offset);
0396 *cs++ = upper_32_bits(offset);
0397
0398 do {
0399 if (cs - hdr >= pkt) {
0400 int dword_rem;
0401
0402 *hdr += cs - hdr - 2;
0403 *cs++ = MI_NOOP;
0404
0405 ring->emit = (void *)cs - ring->vaddr;
0406 intel_ring_advance(rq, cs);
0407 intel_ring_update_space(ring);
0408
0409 cs = intel_ring_begin(rq, 6);
0410 if (IS_ERR(cs))
0411 return PTR_ERR(cs);
0412
0413 dword_rem = dword_length;
0414 if (has_64K_pages) {
0415 if (IS_ALIGNED(total, SZ_2M)) {
0416 offset = round_up(offset, SZ_64K);
0417 } else {
0418 dword_rem = SZ_2M - (total & (SZ_2M - 1));
0419 dword_rem /= page_size;
0420 dword_rem *= 2;
0421 }
0422 }
0423
0424 pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5);
0425 pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
0426
0427 hdr = cs;
0428 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21);
0429 *cs++ = lower_32_bits(offset);
0430 *cs++ = upper_32_bits(offset);
0431 }
0432
0433 GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size));
0434
0435 *cs++ = lower_32_bits(encode | it->dma);
0436 *cs++ = upper_32_bits(encode | it->dma);
0437
0438 offset += 8;
0439 total += page_size;
0440
0441 it->dma += page_size;
0442 if (it->dma >= it->max) {
0443 it->sg = __sg_next(it->sg);
0444 if (!it->sg || sg_dma_len(it->sg) == 0)
0445 break;
0446
0447 it->dma = sg_dma_address(it->sg);
0448 it->max = it->dma + sg_dma_len(it->sg);
0449 }
0450 } while (total < length);
0451
0452 *hdr += cs - hdr - 2;
0453 *cs++ = MI_NOOP;
0454
0455 ring->emit = (void *)cs - ring->vaddr;
0456 intel_ring_advance(rq, cs);
0457 intel_ring_update_space(ring);
0458
0459 return total;
0460 }
0461
0462 static bool wa_1209644611_applies(int ver, u32 size)
0463 {
0464 u32 height = size >> PAGE_SHIFT;
0465
0466 if (ver != 11)
0467 return false;
0468
0469 return height % 4 == 3 && height <= 8;
0470 }
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505 static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
0506 {
0507 *cmd++ = MI_FLUSH_DW | flags;
0508 *cmd++ = 0;
0509 *cmd++ = 0;
0510
0511 return cmd;
0512 }
0513
0514 static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
0515 {
0516 u32 num_cmds, num_blks, total_size;
0517
0518 if (!GET_CCS_BYTES(i915, size))
0519 return 0;
0520
0521
0522
0523
0524
0525
0526 num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
0527 NUM_CCS_BYTES_PER_BLOCK);
0528 num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
0529 total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
0530
0531
0532
0533
0534 total_size += 2 * MI_FLUSH_DW_SIZE;
0535
0536 return total_size;
0537 }
0538
0539 static int emit_copy_ccs(struct i915_request *rq,
0540 u32 dst_offset, u8 dst_access,
0541 u32 src_offset, u8 src_access, int size)
0542 {
0543 struct drm_i915_private *i915 = rq->engine->i915;
0544 int mocs = rq->engine->gt->mocs.uc_index << 1;
0545 u32 num_ccs_blks, ccs_ring_size;
0546 u32 *cs;
0547
0548 ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
0549 WARN_ON(!ccs_ring_size);
0550
0551 cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
0552 if (IS_ERR(cs))
0553 return PTR_ERR(cs);
0554
0555 num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
0556 NUM_CCS_BYTES_PER_BLOCK);
0557 GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
0558 cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574 *cs++ = XY_CTRL_SURF_COPY_BLT |
0575 src_access << SRC_ACCESS_TYPE_SHIFT |
0576 dst_access << DST_ACCESS_TYPE_SHIFT |
0577 ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
0578 *cs++ = src_offset;
0579 *cs++ = rq->engine->instance |
0580 FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
0581 *cs++ = dst_offset;
0582 *cs++ = rq->engine->instance |
0583 FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
0584
0585 cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
0586 if (ccs_ring_size & 1)
0587 *cs++ = MI_NOOP;
0588
0589 intel_ring_advance(rq, cs);
0590
0591 return 0;
0592 }
0593
0594 static int emit_copy(struct i915_request *rq,
0595 u32 dst_offset, u32 src_offset, int size)
0596 {
0597 const int ver = GRAPHICS_VER(rq->engine->i915);
0598 u32 instance = rq->engine->instance;
0599 u32 *cs;
0600
0601 cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6);
0602 if (IS_ERR(cs))
0603 return PTR_ERR(cs);
0604
0605 if (ver >= 9 && !wa_1209644611_applies(ver, size)) {
0606 *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
0607 *cs++ = BLT_DEPTH_32 | PAGE_SIZE;
0608 *cs++ = 0;
0609 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
0610 *cs++ = dst_offset;
0611 *cs++ = instance;
0612 *cs++ = 0;
0613 *cs++ = PAGE_SIZE;
0614 *cs++ = src_offset;
0615 *cs++ = instance;
0616 } else if (ver >= 8) {
0617 *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
0618 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
0619 *cs++ = 0;
0620 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
0621 *cs++ = dst_offset;
0622 *cs++ = instance;
0623 *cs++ = 0;
0624 *cs++ = PAGE_SIZE;
0625 *cs++ = src_offset;
0626 *cs++ = instance;
0627 } else {
0628 GEM_BUG_ON(instance);
0629 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
0630 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
0631 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
0632 *cs++ = dst_offset;
0633 *cs++ = PAGE_SIZE;
0634 *cs++ = src_offset;
0635 }
0636
0637 intel_ring_advance(rq, cs);
0638 return 0;
0639 }
0640
0641 static u64 scatter_list_length(struct scatterlist *sg)
0642 {
0643 u64 len = 0;
0644
0645 while (sg && sg_dma_len(sg)) {
0646 len += sg_dma_len(sg);
0647 sg = sg_next(sg);
0648 };
0649
0650 return len;
0651 }
0652
0653 static int
0654 calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
0655 u64 bytes_to_cpy, u64 ccs_bytes_to_cpy)
0656 {
0657 if (ccs_bytes_to_cpy && !src_is_lmem)
0658
0659
0660
0661
0662
0663
0664
0665 return min_t(u64, bytes_to_cpy, CHUNK_SZ);
0666 else
0667 return CHUNK_SZ;
0668 }
0669
0670 static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy)
0671 {
0672 u64 len;
0673
0674 do {
0675 GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
0676 len = it->max - it->dma;
0677 if (len > bytes_to_cpy) {
0678 it->dma += bytes_to_cpy;
0679 break;
0680 }
0681
0682 bytes_to_cpy -= len;
0683
0684 it->sg = __sg_next(it->sg);
0685 it->dma = sg_dma_address(it->sg);
0686 it->max = it->dma + sg_dma_len(it->sg);
0687 } while (bytes_to_cpy);
0688 }
0689
0690 int
0691 intel_context_migrate_copy(struct intel_context *ce,
0692 const struct i915_deps *deps,
0693 struct scatterlist *src,
0694 enum i915_cache_level src_cache_level,
0695 bool src_is_lmem,
0696 struct scatterlist *dst,
0697 enum i915_cache_level dst_cache_level,
0698 bool dst_is_lmem,
0699 struct i915_request **out)
0700 {
0701 struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
0702 struct drm_i915_private *i915 = ce->engine->i915;
0703 u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
0704 enum i915_cache_level ccs_cache_level;
0705 u32 src_offset, dst_offset;
0706 u8 src_access, dst_access;
0707 struct i915_request *rq;
0708 u64 src_sz, dst_sz;
0709 bool ccs_is_src, overwrite_ccs;
0710 int err;
0711
0712 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
0713 GEM_BUG_ON(IS_DGFX(ce->engine->i915) && (!src_is_lmem && !dst_is_lmem));
0714 *out = NULL;
0715
0716 GEM_BUG_ON(ce->ring->size < SZ_64K);
0717
0718 src_sz = scatter_list_length(src);
0719 bytes_to_cpy = src_sz;
0720
0721 if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) {
0722 src_access = !src_is_lmem && dst_is_lmem;
0723 dst_access = !src_access;
0724
0725 dst_sz = scatter_list_length(dst);
0726 if (src_is_lmem) {
0727 it_ccs = it_dst;
0728 ccs_cache_level = dst_cache_level;
0729 ccs_is_src = false;
0730 } else if (dst_is_lmem) {
0731 bytes_to_cpy = dst_sz;
0732 it_ccs = it_src;
0733 ccs_cache_level = src_cache_level;
0734 ccs_is_src = true;
0735 }
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745 ccs_bytes_to_cpy = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0;
0746 if (ccs_bytes_to_cpy)
0747 get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
0748 }
0749
0750 overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
0751
0752 src_offset = 0;
0753 dst_offset = CHUNK_SZ;
0754 if (HAS_64K_PAGES(ce->engine->i915)) {
0755 src_offset = 0;
0756 dst_offset = 0;
0757 if (src_is_lmem)
0758 src_offset = CHUNK_SZ;
0759 if (dst_is_lmem)
0760 dst_offset = 2 * CHUNK_SZ;
0761 }
0762
0763 do {
0764 int len;
0765
0766 rq = i915_request_create(ce);
0767 if (IS_ERR(rq)) {
0768 err = PTR_ERR(rq);
0769 goto out_ce;
0770 }
0771
0772 if (deps) {
0773 err = i915_request_await_deps(rq, deps);
0774 if (err)
0775 goto out_rq;
0776
0777 if (rq->engine->emit_init_breadcrumb) {
0778 err = rq->engine->emit_init_breadcrumb(rq);
0779 if (err)
0780 goto out_rq;
0781 }
0782
0783 deps = NULL;
0784 }
0785
0786
0787 err = emit_no_arbitration(rq);
0788 if (err)
0789 goto out_rq;
0790
0791 src_sz = calculate_chunk_sz(i915, src_is_lmem,
0792 bytes_to_cpy, ccs_bytes_to_cpy);
0793
0794 len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
0795 src_offset, src_sz);
0796 if (!len) {
0797 err = -EINVAL;
0798 goto out_rq;
0799 }
0800 if (len < 0) {
0801 err = len;
0802 goto out_rq;
0803 }
0804
0805 err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
0806 dst_offset, len);
0807 if (err < 0)
0808 goto out_rq;
0809 if (err < len) {
0810 err = -EINVAL;
0811 goto out_rq;
0812 }
0813
0814 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0815 if (err)
0816 goto out_rq;
0817
0818 err = emit_copy(rq, dst_offset, src_offset, len);
0819 if (err)
0820 goto out_rq;
0821
0822 bytes_to_cpy -= len;
0823
0824 if (ccs_bytes_to_cpy) {
0825 int ccs_sz;
0826
0827 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0828 if (err)
0829 goto out_rq;
0830
0831 ccs_sz = GET_CCS_BYTES(i915, len);
0832 err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
0833 ccs_is_src ? src_offset : dst_offset,
0834 ccs_sz);
0835 if (err < 0)
0836 goto out_rq;
0837 if (err < ccs_sz) {
0838 err = -EINVAL;
0839 goto out_rq;
0840 }
0841
0842 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0843 if (err)
0844 goto out_rq;
0845
0846 err = emit_copy_ccs(rq, dst_offset, dst_access,
0847 src_offset, src_access, len);
0848 if (err)
0849 goto out_rq;
0850
0851 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0852 if (err)
0853 goto out_rq;
0854 ccs_bytes_to_cpy -= ccs_sz;
0855 } else if (overwrite_ccs) {
0856 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0857 if (err)
0858 goto out_rq;
0859
0860
0861
0862
0863
0864
0865
0866 err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
0867 dst_offset, DIRECT_ACCESS, len);
0868 if (err)
0869 goto out_rq;
0870
0871 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
0872 if (err)
0873 goto out_rq;
0874 }
0875
0876
0877 out_rq:
0878 if (*out)
0879 i915_request_put(*out);
0880 *out = i915_request_get(rq);
0881 i915_request_add(rq);
0882
0883 if (err)
0884 break;
0885
0886 if (!bytes_to_cpy && !ccs_bytes_to_cpy) {
0887 if (src_is_lmem)
0888 WARN_ON(it_src.sg && sg_dma_len(it_src.sg));
0889 else
0890 WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg));
0891 break;
0892 }
0893
0894 if (WARN_ON(!it_src.sg || !sg_dma_len(it_src.sg) ||
0895 !it_dst.sg || !sg_dma_len(it_dst.sg) ||
0896 (ccs_bytes_to_cpy && (!it_ccs.sg ||
0897 !sg_dma_len(it_ccs.sg))))) {
0898 err = -EINVAL;
0899 break;
0900 }
0901
0902 cond_resched();
0903 } while (1);
0904
0905 out_ce:
0906 return err;
0907 }
0908
0909 static int emit_clear(struct i915_request *rq, u32 offset, int size,
0910 u32 value, bool is_lmem)
0911 {
0912 struct drm_i915_private *i915 = rq->engine->i915;
0913 int mocs = rq->engine->gt->mocs.uc_index << 1;
0914 const int ver = GRAPHICS_VER(i915);
0915 int ring_sz;
0916 u32 *cs;
0917
0918 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
0919
0920 if (HAS_FLAT_CCS(i915) && ver >= 12)
0921 ring_sz = XY_FAST_COLOR_BLT_DW;
0922 else if (ver >= 8)
0923 ring_sz = 8;
0924 else
0925 ring_sz = 6;
0926
0927 cs = intel_ring_begin(rq, ring_sz);
0928 if (IS_ERR(cs))
0929 return PTR_ERR(cs);
0930
0931 if (HAS_FLAT_CCS(i915) && ver >= 12) {
0932 *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
0933 (XY_FAST_COLOR_BLT_DW - 2);
0934 *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
0935 (PAGE_SIZE - 1);
0936 *cs++ = 0;
0937 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
0938 *cs++ = offset;
0939 *cs++ = rq->engine->instance;
0940 *cs++ = !is_lmem << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
0941
0942 *cs++ = value;
0943 *cs++ = 0;
0944 *cs++ = 0;
0945 *cs++ = 0;
0946
0947 *cs++ = 0;
0948 *cs++ = 0;
0949
0950 *cs++ = 0;
0951 *cs++ = 0;
0952 *cs++ = 0;
0953 } else if (ver >= 8) {
0954 *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
0955 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
0956 *cs++ = 0;
0957 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
0958 *cs++ = offset;
0959 *cs++ = rq->engine->instance;
0960 *cs++ = value;
0961 *cs++ = MI_NOOP;
0962 } else {
0963 *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
0964 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
0965 *cs++ = 0;
0966 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
0967 *cs++ = offset;
0968 *cs++ = value;
0969 }
0970
0971 intel_ring_advance(rq, cs);
0972 return 0;
0973 }
0974
0975 int
0976 intel_context_migrate_clear(struct intel_context *ce,
0977 const struct i915_deps *deps,
0978 struct scatterlist *sg,
0979 enum i915_cache_level cache_level,
0980 bool is_lmem,
0981 u32 value,
0982 struct i915_request **out)
0983 {
0984 struct drm_i915_private *i915 = ce->engine->i915;
0985 struct sgt_dma it = sg_sgt(sg);
0986 struct i915_request *rq;
0987 u32 offset;
0988 int err;
0989
0990 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
0991 *out = NULL;
0992
0993 GEM_BUG_ON(ce->ring->size < SZ_64K);
0994
0995 offset = 0;
0996 if (HAS_64K_PAGES(i915) && is_lmem)
0997 offset = CHUNK_SZ;
0998
0999 do {
1000 int len;
1001
1002 rq = i915_request_create(ce);
1003 if (IS_ERR(rq)) {
1004 err = PTR_ERR(rq);
1005 goto out_ce;
1006 }
1007
1008 if (deps) {
1009 err = i915_request_await_deps(rq, deps);
1010 if (err)
1011 goto out_rq;
1012
1013 if (rq->engine->emit_init_breadcrumb) {
1014 err = rq->engine->emit_init_breadcrumb(rq);
1015 if (err)
1016 goto out_rq;
1017 }
1018
1019 deps = NULL;
1020 }
1021
1022
1023 err = emit_no_arbitration(rq);
1024 if (err)
1025 goto out_rq;
1026
1027 len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
1028 if (len <= 0) {
1029 err = len;
1030 goto out_rq;
1031 }
1032
1033 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
1034 if (err)
1035 goto out_rq;
1036
1037 err = emit_clear(rq, offset, len, value, is_lmem);
1038 if (err)
1039 goto out_rq;
1040
1041 if (HAS_FLAT_CCS(i915) && is_lmem && !value) {
1042
1043
1044
1045
1046 err = emit_copy_ccs(rq, offset, INDIRECT_ACCESS, offset,
1047 DIRECT_ACCESS, len);
1048 if (err)
1049 goto out_rq;
1050 }
1051
1052 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
1053
1054
1055 out_rq:
1056 if (*out)
1057 i915_request_put(*out);
1058 *out = i915_request_get(rq);
1059 i915_request_add(rq);
1060 if (err || !it.sg || !sg_dma_len(it.sg))
1061 break;
1062
1063 cond_resched();
1064 } while (1);
1065
1066 out_ce:
1067 return err;
1068 }
1069
1070 int intel_migrate_copy(struct intel_migrate *m,
1071 struct i915_gem_ww_ctx *ww,
1072 const struct i915_deps *deps,
1073 struct scatterlist *src,
1074 enum i915_cache_level src_cache_level,
1075 bool src_is_lmem,
1076 struct scatterlist *dst,
1077 enum i915_cache_level dst_cache_level,
1078 bool dst_is_lmem,
1079 struct i915_request **out)
1080 {
1081 struct intel_context *ce;
1082 int err;
1083
1084 *out = NULL;
1085 if (!m->context)
1086 return -ENODEV;
1087
1088 ce = intel_migrate_create_context(m);
1089 if (IS_ERR(ce))
1090 ce = intel_context_get(m->context);
1091 GEM_BUG_ON(IS_ERR(ce));
1092
1093 err = intel_context_pin_ww(ce, ww);
1094 if (err)
1095 goto out;
1096
1097 err = intel_context_migrate_copy(ce, deps,
1098 src, src_cache_level, src_is_lmem,
1099 dst, dst_cache_level, dst_is_lmem,
1100 out);
1101
1102 intel_context_unpin(ce);
1103 out:
1104 intel_context_put(ce);
1105 return err;
1106 }
1107
1108 int
1109 intel_migrate_clear(struct intel_migrate *m,
1110 struct i915_gem_ww_ctx *ww,
1111 const struct i915_deps *deps,
1112 struct scatterlist *sg,
1113 enum i915_cache_level cache_level,
1114 bool is_lmem,
1115 u32 value,
1116 struct i915_request **out)
1117 {
1118 struct intel_context *ce;
1119 int err;
1120
1121 *out = NULL;
1122 if (!m->context)
1123 return -ENODEV;
1124
1125 ce = intel_migrate_create_context(m);
1126 if (IS_ERR(ce))
1127 ce = intel_context_get(m->context);
1128 GEM_BUG_ON(IS_ERR(ce));
1129
1130 err = intel_context_pin_ww(ce, ww);
1131 if (err)
1132 goto out;
1133
1134 err = intel_context_migrate_clear(ce, deps, sg, cache_level,
1135 is_lmem, value, out);
1136
1137 intel_context_unpin(ce);
1138 out:
1139 intel_context_put(ce);
1140 return err;
1141 }
1142
1143 void intel_migrate_fini(struct intel_migrate *m)
1144 {
1145 struct intel_context *ce;
1146
1147 ce = fetch_and_zero(&m->context);
1148 if (!ce)
1149 return;
1150
1151 intel_engine_destroy_pinned_context(ce);
1152 }
1153
1154 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1155 #include "selftest_migrate.c"
1156 #endif